├── .gitignore
├── LICENSE
├── README.md
├── notebooks
    ├── 01_mac0460_Q.ipynb
    ├── 02_mac0460_Q.ipynb
    ├── EP4_MNIST.ipynb
    ├── README.md
    ├── cnn.ipynb
    ├── download_scripts
    │   ├── efigi.sh
    │   └── pi_car.sh
    ├── images
    │   ├── arbitrary_padding_no_strides_transposed.gif
    │   ├── cnn_arch.png
    │   ├── conv.png
    │   ├── dfn.png
    │   ├── dfn_description.png
    │   ├── dwarf.png
    │   ├── elliptical.png
    │   ├── image_classification.png
    │   ├── irregular.png
    │   ├── lenticular.png
    │   ├── logistic_regression.png
    │   ├── multchain.png
    │   ├── pytorch-logo-dark.png
    │   ├── simple_example.png
    │   ├── spiral.png
    │   ├── test_track.png
    │   ├── train_track.png
    │   └── vector_graph.png
    ├── pytorch_basico1.ipynb
    ├── pytorch_basico2.ipynb
    ├── regressao_linear1.ipynb
    ├── regressao_linear2.ipynb
    ├── transfer_learning.ipynb
    └── util
    │   ├── DataHolder.py
    │   ├── __init__.py
    │   ├── plots.py
    │   └── util.py
├── requirements.txt
└── slides
    ├── LICENSE
    ├── README.md
    ├── backprop1
        ├── Makefile
        ├── TikzFiles
        │   ├── 3x3.tex
        │   ├── 5x5.tex
        │   ├── BackPropScalar.tex
        │   ├── Compgraph1.tex
        │   ├── Compgraph2.tex
        │   ├── Compgraph3.tex
        │   ├── Compgraph4.tex
        │   ├── DFNclassification.tex
        │   ├── DFNclassification2.tex
        │   ├── DeepNN.tex
        │   ├── Dropout1.tex
        │   ├── Dropout2.tex
        │   ├── Entropy1.tex
        │   ├── Entropy2.tex
        │   ├── Kernel_image_pro.tex
        │   ├── KullbackLeibler.tex
        │   ├── NN.tex
        │   ├── OldNN1.tex
        │   ├── OldNN2.tex
        │   ├── OldNN3.tex
        │   ├── ReLU.tex
        │   ├── Sigmoid.tex
        │   ├── Softmax.tex
        │   ├── b1_path1.tex
        │   ├── b1_path1_grad.tex
        │   ├── b1_path2.tex
        │   ├── b1_path2_grad.tex
        │   ├── b1_path3.tex
        │   ├── b1_path3_grad.tex
        │   ├── batch_example_values.tex
        │   ├── batch_graph.tex
        │   ├── chain_rule_nodes.tex
        │   ├── convnet_arch.tex
        │   ├── div.tex
        │   ├── examples_values.tex
        │   ├── exp.tex
        │   ├── expanded_graph_0.tex
        │   ├── expanded_graph_1.tex
        │   ├── expanded_graph_10.tex
        │   ├── expanded_graph_11.tex
        │   ├── expanded_graph_12.tex
        │   ├── expanded_graph_13.tex
        │   ├── expanded_graph_14.tex
        │   ├── expanded_graph_2.tex
        │   ├── expanded_graph_3.tex
        │   ├── expanded_graph_4.tex
        │   ├── expanded_graph_5.tex
        │   ├── expanded_graph_6.tex
        │   ├── expanded_graph_7.tex
        │   ├── expanded_graph_8.tex
        │   ├── expanded_graph_9.tex
        │   ├── fashionMNIST.tex
        │   ├── feature_engineering.tex
        │   ├── feature_map.tex
        │   ├── log.tex
        │   ├── lr_graph.tex
        │   ├── lr_graph1.tex
        │   ├── lr_graph_grad.tex
        │   ├── minus1.tex
        │   ├── mult.tex
        │   ├── multiple_paths.tex
        │   ├── perceptron.tex
        │   ├── simple_example.tex
        │   ├── simple_example0.tex
        │   ├── simple_exampleCR.tex
        │   ├── soma.tex
        │   └── squ.tex
        ├── all_imports.tex
        ├── all_new_commands.tex
        ├── definitions
        │   ├── colors.tex
        │   └── styles.tex
        ├── header.tex
        ├── images
        │   └── logo.png
        ├── main.tex
        ├── my_references.bib
        └── pdf
        │   └── BackpropLecture1.pdf
    ├── backprop2
        ├── Makefile
        ├── all_imports.tex
        ├── all_new_commands.tex
        ├── definitions
        │   ├── colors.tex
        │   └── styles.tex
        ├── header.tex
        ├── images
        │   └── logo.png
        ├── main.tex
        ├── my_references.bib
        └── pdf
        │   └── BackpropLecture2.pdf
    ├── images
        └── cc-logo.png
    └── install.sh


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 MLIME
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MAC0460 - Introdução ao aprendizado de máquina
 2 | [![License](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/MLIME/MAC0460/blob/master/LICENSE)
 3 | 
 4 | Nesse repositório estão os diferentes materais da disciplina **MAC0460 - Introdução ao aprendizado de máquina** ministrada no Instituto de Matemática e Estatística (IME) da Universidade de São Paulo (USP). Maiores informações podem ser encontradas [aqui](https://uspdigital.usp.br/jupiterweb/obterDisciplina?sgldis=MAC0460). 
 5 | 
 6 | Um pedaço da parte teórica do curso foi baseado no curso [**Learning from Data**](https://work.caltech.edu/telecourse.html), vale a pena conferir esse material.
 7 | 
 8 | ## Uso
 9 | 
10 | Na pasta **notebooks** estão os exercícios práticos dados no curso, na pasta **slides** estão os materiais didáticos de algumas aulas.
11 | 
12 | ### Instalação (Ubuntu / Debian)
13 | Para quem desejar, recomendamos a utilização de ambientes virtuais.
14 | [Instale os pacotes necessários](http://railslide.io/virtualenvwrapper-python3.html) e crie um novo ambiente, ex:
15 | 
16 | ```
17 | $ mkvirtualenv mac0460
18 | $ workon mac0460
19 | ```
20 | 
21 | Para instalar o [Jupyter Notebook](http://jupyter.org/) basta rodar:
22 | 
23 | ```
24 | $ sudo apt install python3-pip python3-tk
25 | $ pip3 install --upgrade pip3
26 | $ sudo pip3 install jupyter
27 | ```
28 | 
29 | Vamos usar uma série de bibliotecas de Python ao longo da disciplina, para instalar as principais rode:
30 | 
31 | ```
32 | $ pip3 install -r requirements.txt
33 | ```
34 | 
35 | É preciso ir no site do [PyTorch](https://pytorch.org/) para instalar essa biblioteca.
36 | 
37 | Para acessar os notebooks basta rodar:
38 | 
39 | ```
40 | $ cd notebooks
41 | $ jupyter notebook
42 | ```
43 | 
44 | ### Pontos importantes
45 | 
46 | - Usamos a biblioteca de deep learning PyTorch. Essa biblioteca esta mudando constantemente. Os exercícios práticos foram desenvolvidos para a versão 0.4.0. Não é garantido que os notebooks vão funcionar para as versões futuras.
47 | 
48 | - Partimos do pressuposto que o usuário está usando Ubuntu ou Debian. A compatibilidade com outros sistemas não foi testada.
49 | 
50 | - Os notebooks foram feitos para a versão 3.5 do Python
51 | 
52 | 
53 | ### Referências
54 | 
55 | Vale a pena se familiarizar com cada uma das bibliotecas que vão ser usadas:
56 | - [Jupyter](https://jupyter.readthedocs.io/en/latest/)
57 | - [NumPy](https://docs.scipy.org/doc/numpy-dev/user/quickstart.html)
58 | - [Matplotlib](https://matplotlib.org/tutorials/index.html)
59 | - [PyTorch](https://pytorch.org/tutorials/)
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/notebooks/02_mac0460_Q.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Objetivos\n",
  8 |     "\n",
  9 |     "Implementar e testar a solução analítica para o problema de regressão linear \n",
 10 |     "\n",
 11 |     "Este notebook depende de módulos auxiliares que estão na pasta util/ mais alguns imports do Python"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Regressão linear: equações normais\n",
 19 |     "\n",
 20 |     "\n",
 21 |     "Dado um dataset $\\{(\\mathbf{x}_{1}, y_{1}), \\dots ,(\\mathbf{x}_{N}, y_{N})\\}$ onde $\\mathbf{x}_i \\in \\mathbb{R}^{d}$ e $y_i \\in \\mathbb{R}$, queremos aproximar a função desconhecida $f:\\mathbb{R}^{d} \\rightarrow \\mathbb{R}$ (lembrando que $y_i =f(\\mathbf{x}_i)$) por meio de um modelo linear $h$:\n",
 22 |     "$$\n",
 23 |     "h(\\mathbf{x}_{i}; \\mathbf{w}, b) = \\mathbf{w}^\\top  \\mathbf{x}_{i} + b\n",
 24 |     "$$\n",
 25 |     "\n",
 26 |     "Note que $h(\\mathbf{x}_{i}; \\mathbf{w}, b)$ é na verdade uma [transformação afim](https://en.wikipedia.org/wiki/Affine_transformation) de $\\mathbf{x}_{i}$. Como em outros lugares, vamos usar o termo \"linear\" também para caracterizar uma transformação afim.\n",
 27 |     "\n",
 28 |     "A saída de $h$ é uma transformação linear de $\\mathbf{x}_{i}$. Usamos a notação $h(\\mathbf{x}_{i}; \\mathbf{w}, b)$ para deixar claro que $h$ é um modelo parametrizado, i.e., a transformação $h$ é definida pelos parâmetros $\\mathbf{w}$ e $b$. Podemos pensar no vetor $\\mathbf{w}$ como um vetor de *pesos* controlando o efeito de cada *feature* na predição.\n",
 29 |     "\n",
 30 |     "Adicionando uma feature a mais na obsevação $\\mathbf{x}_{i}$ (com o valor 1) -- coordenada artificial -- podemos simplificar a notação do modelo:\n",
 31 |     "\n",
 32 |     "$$\n",
 33 |     "h(\\mathbf{x}_{i}; \\mathbf{w}) = \\hat{y}_{i} = \\mathbf{w}^\\top  \\mathbf{x}_{i}\n",
 34 |     "$$\n",
 35 |     "\n",
 36 |     "Gostaríamos de encontrar os melhores parâmetros $\\mathbf{w}$ de modo que a predição $\\hat{y}_{i}$ seja a mais próxima de $y_{i}$ de acordo com alguma métrica de erro. Usando o *erro quadrárico médio* como tal métrica podemos obter a seguinte função de custo:\n",
 37 |     "\n",
 38 |     "\\begin{equation}\n",
 39 |     "J(\\mathbf{w}) = \\frac{1}{N}\\sum_{i=1}^{N}(\\hat{y}_{i} - y_{i})^{2}\n",
 40 |     "\\end{equation}\n",
 41 |     "\n",
 42 |     "Desse modo, a tarefa de achar a função $h$ mais próxima de $f$ se torna a tarefa de encontrar os valores de $\\mathbf{w}$ para minimizar $J(\\mathbf{w})$.\n",
 43 |     "\n",
 44 |     "**Aqui vamos começar a explorar esse modelo olhando para um dataset bem simples**\n"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# all imports\n",
 54 |     "import numpy as np\n",
 55 |     "import time\n",
 56 |     "from util.util import get_housing_prices_data, r_squared\n",
 57 |     "from util.plots import plot_points_regression \n",
 58 |     "\n",
 59 |     "%matplotlib inline"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "### O dataset\n",
 67 |     "\n",
 68 |     "Os dados que vamos trabalhar são dados artificiais. Iremos gerar 100 observações com apenas uma *feature* e um valor asociado a cada uma delas. Podemos interpretar essas observações como sendo um par *(metros quadrados de um imóvel, preço desse imóvel em $)*. Nossa tarefa é construir um modelo que consiga predizer o valor dos imóveis, dadas as suas áreas."
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "X, y = get_housing_prices_data(N=100)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "### Plotando os dados\n",
 85 |     "\n",
 86 |     "Acima temos algumas informações sobre os dados. Podemos também visualizar cada ponto."
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "plot_points_regression(X,\n",
 96 |     "                       y,\n",
 97 |     "                       title='Real estate prices prediction',\n",
 98 |     "                       xlabel=\"m\\u00b2\",\n",
 99 |     "                       ylabel='$')"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "### Equações normais\n",
107 |     "\n",
108 |     "Dados $f:\\mathbb{R}^{n\\times m} \\rightarrow \\mathbb{R}$ e $\\mathbf{A} \\in \\mathbb{R}^{n\\times m}$, definimos o gradiente de $f$ com respeito a $\\mathbf{A}$ como:\n",
109 |     "\n",
110 |     "\\begin{equation*}\n",
111 |     "\\nabla_{\\mathbf{A}}f = \\frac{\\partial f}{\\partial \\mathbf{A}} =  \\begin{bmatrix}\n",
112 |     "\\frac{\\partial f}{\\partial \\mathbf{A}_{1,1}} & \\dots & \\frac{\\partial f}{\\partial \\mathbf{A}_{1,m}} \\\\\n",
113 |     "\\vdots &  \\ddots & \\vdots \\\\\n",
114 |     "\\frac{\\partial f}{\\partial \\mathbf{A}_{n,1}} &  \\dots & \\frac{\\partial f}{\\partial \\mathbf{A}_{n,m}}\n",
115 |     "\\end{bmatrix}\n",
116 |     "\\end{equation*}\n",
117 |     "\n",
118 |     "Seja $\\mathbf{X} \\in \\mathbb{R}^{N\\times m}$ a matriz cujas linhas são as observações do dataset (também chamada de *design matrix*) e seja $\\mathbf{y} \\in \\mathbb{R}^{N}$ o vetor contendo todos os valores de $y_{i}$ (i.e., $\\mathbf{X}_{i,:} = \\mathbf{x}_{i}$ e $\\mathbf{y}_{i} =y_{i}$). É fácil checar que: \n",
119 |     "\n",
120 |     "\n",
121 |     "\\begin{equation}\n",
122 |     "J(\\mathbf{w}) = \\frac{1}{N}(\\mathbf{X}\\mathbf{w} - \\mathbf{y})^{T}(\\mathbf{X}\\mathbf{w} - \\mathbf{y})\n",
123 |     "\\end{equation}\n",
124 |     "\n",
125 |     "\n",
126 |     "Usando certos conceitos básicos de derivada com matrizes podemos chegar no gradiente de $J(\\mathbf{w})$ com respeito a $\\mathbf{w}$:\n",
127 |     "\n",
128 |     "\\begin{equation}\n",
129 |     "\\nabla_{\\mathbf{w}}J(\\mathbf{w}) = \\frac{2}{N} (\\mathbf{X}^{T}\\mathbf{X}\\mathbf{w} -\\mathbf{X}^{T}\\mathbf{y})   \n",
130 |     "\\end{equation}\n",
131 |     "\n",
132 |     "Assim, quando $\\nabla_{\\mathbf{w}}J(\\mathbf{w}) = 0$ temos que \n",
133 |     "\n",
134 |     "\\begin{equation}\n",
135 |     "\\mathbf{X}^{T}\\mathbf{X}\\mathbf{w} = \\mathbf{X}^{T}\\mathbf{y}\n",
136 |     "\\end{equation}\n",
137 |     "\n",
138 |     "Desse modo,\n",
139 |     "\n",
140 |     "\\begin{equation}\n",
141 |     "\\mathbf{w} = (\\mathbf{X}^{T}\\mathbf{X})^{-1}\\mathbf{X}^{T}\\mathbf{y}\n",
142 |     "\\end{equation}\n",
143 |     "\n",
144 |     "A solução dada por essas equações são conhecidas como **equações normais**. Note que esse tipo de solução tem um custo, pois conforme cresce o número de variáveis, o tempo da inversão da matriz fica proibitivo. Vale a pena ler [esse material](http://cs229.stanford.edu/notes/cs229-notes1.pdf) para ver o argumento com mais detalhes."
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "### Exercício\n",
152 |     "Usando apenas a biblioteca **NumPy** (uma introdução rápida a certas funcionalidades dessa biblioteca pode ser encontrada [aqui](http://cs231n.github.io/python-numpy-tutorial/)), complete as duas funções abaixo. Lembre que $\\mathbf{X} \\in \\mathbb{R}^{N\\times d}$; assim, será preciso adicionar uma componente com valor 1 a cada observação em $\\mathbf{X}$ para realizar a computação descrita acima."
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "def normal_equation_weights(X, y):\n",
162 |     "    \"\"\"\n",
163 |     "    Calculates the weights of a linear function using the normal equation method.\n",
164 |     "    You should add into X a new column with 1s.\n",
165 |     "\n",
166 |     "    :param X: design matrix\n",
167 |     "    :type X: np.ndarray(shape=(N, d))\n",
168 |     "    :param y: regression targets\n",
169 |     "    :type y: np.ndarray(shape=(N, 1))\n",
170 |     "    :return: weight vector\n",
171 |     "    :rtype: np.ndarray(shape=(d+1, 1))\n",
172 |     "    \"\"\"\n",
173 |     "    \n",
174 |     "    # START OF YOUR CODE:\n",
175 |     "    raise NotImplementedError(\"Falta implementar normal_equation_weights()\")\n",
176 |     "    # END YOUR CODE\n",
177 |     "\n",
178 |     "    return w"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": [
187 |     "# teste da função normal_equation_weights()\n",
188 |     "\n",
189 |     "w = 0  # isto é desnecessário\n",
190 |     "w = normal_equation_weights(X, y)\n",
191 |     "print(\"Estimated w = \", w)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "def normal_equation_prediction(X, w):\n",
201 |     "    \"\"\"\n",
202 |     "    Calculates the prediction over a set of observations X using the linear function\n",
203 |     "    characterized by the weight vector w.\n",
204 |     "    You should add into X a new column with 1s.\n",
205 |     "\n",
206 |     "    :param X: design matrix\n",
207 |     "    :type X: np.ndarray(shape=(N, d))\n",
208 |     "    :param w: weight vector\n",
209 |     "    :type w: np.ndarray(shape=(d+1, 1))\n",
210 |     "    :param y: regression prediction\n",
211 |     "    :type y: np.ndarray(shape=(N, 1))\n",
212 |     "    \"\"\"\n",
213 |     "    \n",
214 |     "    # START OF YOUR CODE:\n",
215 |     "    raise NotImplementedError(\"Falta implementar normal_equation_prediction()\")\n",
216 |     "    # END YOUR CODE\n",
217 |     "    \n",
218 |     "    return prediction"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "markdown",
223 |    "metadata": {},
224 |    "source": [
225 |     "\n",
226 |     "Você pode usar a métrica [$R^2$](https://pt.wikipedia.org/wiki/R%C2%B2) para ver o quão bem o modelo linear está se ajustando aos dados.\n",
227 |     "\n",
228 |     "**Nesse caso $𝑅^2$ tem que estar próximo de 0.5.**"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": [
237 |     "# teste da função normal_equation_prediction()\n",
238 |     "prediction = normal_equation_prediction(X, w)\n",
239 |     "r_2 = r_squared(y, prediction)\n",
240 |     "plot_points_regression(X,\n",
241 |     "                       y,\n",
242 |     "                       title='Real estate prices prediction',\n",
243 |     "                       xlabel=\"m\\u00b2\",\n",
244 |     "                       ylabel='$',\n",
245 |     "                       prediction=prediction,\n",
246 |     "                       legend=True,\n",
247 |     "                       r_squared=r_2)"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {},
253 |    "source": [
254 |     "## Testes adicionais\n",
255 |     "\n",
256 |     "Vamos fazer a predição para $x=650$\n"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": null,
262 |    "metadata": {},
263 |    "outputs": [],
264 |    "source": [
265 |     "# usando a função de predição\n",
266 |     "x = np.asarray([650]).reshape(1,1)\n",
267 |     "prediction = normal_equation_prediction(x, w)\n",
268 |     "print(\"Area = %.2f  Predicted price = %.4f\" %(x[0], prediction))\n",
269 |     "\n",
270 |     "# de forma mais direta\n",
271 |     "y = np.dot(np.asarray((1,x)), w)\n",
272 |     "print(\"Area = %.2f  Predicted price = %.4f\" %(x, y))\n"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "## Efeito do número de amostras e dimensão dos dados\n",
280 |     "\n",
281 |     "Varie o número de amostras $N$ e veja como varia o tempo de processamento.\n",
282 |     "\n",
283 |     "Teste o seu código para dados nos quais $𝑑>1$."
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": null,
289 |    "metadata": {},
290 |    "outputs": [],
291 |    "source": [
292 |     "# Teste para diferentes valores de N\n",
293 |     "X, y = get_housing_prices_data(N=1000000)\n",
294 |     "init = time.time()\n",
295 |     "w = normal_equation_weights(X, y)\n",
296 |     "prediction = normal_equation_prediction(X,w)\n",
297 |     "init = time.time() - init\n",
298 |     "\n",
299 |     "print(\"Tempo de execução = {:.8f}(s)\".format(init))"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": null,
305 |    "metadata": {},
306 |    "outputs": [],
307 |    "source": [
308 |     "# Teste para dados de dimensão d>1"
309 |    ]
310 |   }
311 |  ],
312 |  "metadata": {
313 |   "kernelspec": {
314 |    "display_name": "Python 3",
315 |    "language": "python",
316 |    "name": "python3"
317 |   },
318 |   "language_info": {
319 |    "codemirror_mode": {
320 |     "name": "ipython",
321 |     "version": 3
322 |    },
323 |    "file_extension": ".py",
324 |    "mimetype": "text/x-python",
325 |    "name": "python",
326 |    "nbconvert_exporter": "python",
327 |    "pygments_lexer": "ipython3",
328 |    "version": "3.7.1"
329 |   }
330 |  },
331 |  "nbformat": 4,
332 |  "nbformat_minor": 2
333 | }
334 | 


--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
 1 | # Notebooks
 2 | 
 3 | ## Organização
 4 | 
 5 | Os notebooks aqui servem para fixar os conceitos, métodos e algoritmos apresentados em aula. Eles foram pensados na seguinte ordem:
 6 | 
 7 | - Ano 2019
 8 |   * 01 Perceptron
 9 |   * 02 Regressão linear (solução analítica)
10 | 
11 | - Ano 2018
12 |   * regressao_linear1
13 |   * regressao_linear2
14 |   * pytorch_basico1
15 |   * pytorch_basico2
16 |   * cnn
17 |   * transfer_learning
18 | 
19 | ## Guia de atividades
20 | Cada notebook é auto explicativo. Os alunos devem rodar cada um para entender a questão que está sendo tratada, os exercícios estão claramente indicados no código.
21 | 
22 | 


--------------------------------------------------------------------------------
/notebooks/download_scripts/efigi.sh:
--------------------------------------------------------------------------------
1 | wget https://www.dropbox.com/s/i1mx1mqjhcfmkwd/efigi_data_all_classes.zip
2 | unzip efigi_data_all_classes.zip
3 | rm efigi_data_all_classes.zip


--------------------------------------------------------------------------------
/notebooks/download_scripts/pi_car.sh:
--------------------------------------------------------------------------------
1 | wget https://www.dropbox.com/s/7i0kb82ti1zia73/data.tar?dl=0
2 | tar xvf data.tar?dl=0
3 | rm data.tar?dl=0
4 | mv data self_driving_pi_car_data


--------------------------------------------------------------------------------
/notebooks/images/arbitrary_padding_no_strides_transposed.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/arbitrary_padding_no_strides_transposed.gif


--------------------------------------------------------------------------------
/notebooks/images/cnn_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/cnn_arch.png


--------------------------------------------------------------------------------
/notebooks/images/conv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/conv.png


--------------------------------------------------------------------------------
/notebooks/images/dfn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/dfn.png


--------------------------------------------------------------------------------
/notebooks/images/dfn_description.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/dfn_description.png


--------------------------------------------------------------------------------
/notebooks/images/dwarf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/dwarf.png


--------------------------------------------------------------------------------
/notebooks/images/elliptical.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/elliptical.png


--------------------------------------------------------------------------------
/notebooks/images/image_classification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/image_classification.png


--------------------------------------------------------------------------------
/notebooks/images/irregular.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/irregular.png


--------------------------------------------------------------------------------
/notebooks/images/lenticular.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/lenticular.png


--------------------------------------------------------------------------------
/notebooks/images/logistic_regression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/logistic_regression.png


--------------------------------------------------------------------------------
/notebooks/images/multchain.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/multchain.png


--------------------------------------------------------------------------------
/notebooks/images/pytorch-logo-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/pytorch-logo-dark.png


--------------------------------------------------------------------------------
/notebooks/images/simple_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/simple_example.png


--------------------------------------------------------------------------------
/notebooks/images/spiral.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/spiral.png


--------------------------------------------------------------------------------
/notebooks/images/test_track.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/test_track.png


--------------------------------------------------------------------------------
/notebooks/images/train_track.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/train_track.png


--------------------------------------------------------------------------------
/notebooks/images/vector_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/notebooks/images/vector_graph.png


--------------------------------------------------------------------------------
/notebooks/regressao_linear1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Regressão linear 1: equações normais\n",
  8 |     "\n",
  9 |     "\n",
 10 |     "Dado o dataset $(\\mathbf{x}_{1}, y_{1}), \\dots ,(\\mathbf{x}_{N}, y_{N})$ onde $\\mathbf{x}_i \\in \\mathbb{R}^{d}$ e $y_i \\in \\mathbb{R}$, podemos aproximar a função desconhecida $f:\\mathbb{R}^{d} \\rightarrow \\mathbb{R}$ (lembrando que $y_i =f(\\mathbf{x}_i)$) por meio de um modelo linear $h$:\n",
 11 |     "$$\n",
 12 |     "h(\\mathbf{x}_{i}; \\mathbf{w}, b) = \\mathbf{w}^\\top  \\mathbf{x}_{i} + b\n",
 13 |     "$$\n",
 14 |     "\n",
 15 |     "Note que $h(\\mathbf{x}_{i}; \\mathbf{w}, b)$ é na verdade uma [transformação afim](https://en.wikipedia.org/wiki/Affine_transformation) de $\\mathbf{x}_{i}$. Como em outros lugares, vamos usar o termo \"linear\" também para caracterizar uma transformação afim.\n",
 16 |     "\n",
 17 |     "A saída de $h$ é uma transformação linear de $\\mathbf{x}_{i}$. Usamos a notação $h(\\mathbf{x}_{i}; \\mathbf{w}, b)$ para deixar claro que $h$ é um modelo parametrizado, i.e., a transformação $h$ é definida pelos parâmetros $\\mathbf{w}$ e $b$. Podemos pensar no vetor $\\mathbf{w}$ como um vetor de *pesos* controlando o efeito de cada *feature* na predição.\n",
 18 |     "\n",
 19 |     "Adicionando uma feature a mais na obsevação $\\mathbf{x}_{i}$ (com o valor 1) -- coordenada artificial -- podemos simplificar a notação do modelo:\n",
 20 |     "\n",
 21 |     "$$\n",
 22 |     "h(\\mathbf{x}_{i}; \\mathbf{w}) = \\hat{y}_{i} = \\mathbf{w}^\\top  \\mathbf{x}_{i}\n",
 23 |     "$$\n",
 24 |     "\n",
 25 |     "Procuramos os melhores parâmetros $\\mathbf{w}$ de modo que a predição $\\hat{y}_{i}$ seja a mais próxima de $y_{i}$ de acordo com alguma métrica de erro. Usando o *erro quadrárico médio* como tal métrica podemos obter a seguinte função de custo:\n",
 26 |     "\n",
 27 |     "\\begin{equation}\n",
 28 |     "J(\\mathbf{w}) = \\frac{1}{N}\\sum_{i=1}^{N}(\\hat{y}_{i} - y_{i})^{2}\n",
 29 |     "\\end{equation}\n",
 30 |     "\n",
 31 |     "Desse modo, a tarefa de achar a função $h$ mais próxima de $f$ se torna a tarefa de encontrar os valores de $\\mathbf{w}$ para minimizar $J(\\mathbf{w})$.\n",
 32 |     "\n",
 33 |     "**Aqui vamos começar a explorar esse modelo olhando para um dataset bem simples**\n"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# all imports\n",
 43 |     "import numpy as np\n",
 44 |     "import time\n",
 45 |     "from util.util import get_housing_prices_data, r_squared\n",
 46 |     "from util.plots import plot_points_regression \n",
 47 |     "\n",
 48 |     "%matplotlib inline"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "### O dataset\n",
 56 |     "\n",
 57 |     "Os dados que vamos trabalhar vão ser dados artificiais. Vamos pegar 100 observações com apenas uma *feature* (metros quadrados de um imóvel) e com isso vamos associar um valor (o preço desse imóvel em $). Nossa tarefa é construir um modelo que consiga predizer o valor dos imóveis."
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "X, y = get_housing_prices_data(N=100)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "### Plotando os dados\n",
 74 |     "\n",
 75 |     "Acima temos algumas informações sobre os dados, podemos também visualizar cada ponto."
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "plot_points_regression(X,\n",
 85 |     "                       y,\n",
 86 |     "                       title='Real estate prices prediction',\n",
 87 |     "                       xlabel=\"m\\u00b2\",\n",
 88 |     "                       ylabel='$')"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "### Equações normais\n",
 96 |     "\n",
 97 |     "Dados $f:\\mathbb{R}^{n\\times m} \\rightarrow \\mathbb{R}$ e $\\mathbf{A} \\in \\mathbb{R}^{n\\times m}$, definimos o gradiente de $f$ com respeito a $\\mathbf{A}$ como:\n",
 98 |     "\n",
 99 |     "\\begin{equation*}\n",
100 |     "\\nabla_{\\mathbf{A}}f = \\frac{\\partial f}{\\partial \\mathbf{A}} =  \\begin{bmatrix}\n",
101 |     "\\frac{\\partial f}{\\partial \\mathbf{A}_{1,1}} & \\dots & \\frac{\\partial f}{\\partial \\mathbf{A}_{1,m}} \\\\\n",
102 |     "\\vdots &  \\ddots & \\vdots \\\\\n",
103 |     "\\frac{\\partial f}{\\partial \\mathbf{A}_{n,1}} &  \\dots & \\frac{\\partial f}{\\partial \\mathbf{A}_{n,m}}\n",
104 |     "\\end{bmatrix}\n",
105 |     "\\end{equation*}\n",
106 |     "\n",
107 |     "Seja $\\mathbf{X} \\in \\mathbb{R}^{N\\times m}$ a matriz cujas linhas são as observações do dataset (também chamada de *design matrix*) e seja $\\mathbf{y} \\in \\mathbb{R}^{N}$ o vetor contendo todos os valores de $y_{i}$ (i.e., $\\mathbf{X}_{i,:} = \\mathbf{x}_{i}$ e $\\mathbf{y}_{i} =y_{i}$). É fácil checar que: \n",
108 |     "\n",
109 |     "\n",
110 |     "\\begin{equation}\n",
111 |     "J(\\mathbf{w}) = \\frac{1}{N}(\\mathbf{X}\\mathbf{w} - \\mathbf{y})^{T}(\\mathbf{X}\\mathbf{w} - \\mathbf{y})\n",
112 |     "\\end{equation}\n",
113 |     "\n",
114 |     "\n",
115 |     "Usando certos conceitos básicos de derivada com matrizes podemos chegar no gradiente de $J(\\mathbf{w})$ com respeito a $\\mathbf{w}$:\n",
116 |     "\n",
117 |     "\\begin{equation}\n",
118 |     "\\nabla_{\\mathbf{w}}J(\\mathbf{w}) = \\frac{2}{N} (\\mathbf{X}^{T}\\mathbf{X}\\mathbf{w} -\\mathbf{X}^{T}\\mathbf{y})   \n",
119 |     "\\end{equation}\n",
120 |     "\n",
121 |     "Assim, quando $\\nabla_{\\mathbf{w}}J(\\mathbf{w}) = 0$ temos que \n",
122 |     "\n",
123 |     "\\begin{equation}\n",
124 |     "\\mathbf{X}^{T}\\mathbf{X}\\mathbf{w} = \\mathbf{X}^{T}\\mathbf{y}\n",
125 |     "\\end{equation}\n",
126 |     "\n",
127 |     "Desse modo,\n",
128 |     "\n",
129 |     "\\begin{equation}\n",
130 |     "\\mathbf{w} = (\\mathbf{X}^{T}\\mathbf{X})^{-1}\\mathbf{X}^{T}\\mathbf{y}\n",
131 |     "\\end{equation}\n",
132 |     "\n",
133 |     "A solução dada por essas equações são conhecidas como **equações normais**. Note que esse tipo de solução tem um custo, pois conforme cresce o número de variáveis, o tempo da inversão da matriz fica proibitivo. Vale a pena ler [esse material](http://cs229.stanford.edu/notes/cs229-notes1.pdf) para ver o argumento com mais detalhes."
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "### Exercício\n",
141 |     "Implemente a predição usando o método de equações normais. Usando apenas a biblioteca **NumPy** você deve completar a função abaixo (uma introdução rápida a certas funcionalidades dessa biblioteca pode ser encontrada [aqui](http://cs231n.github.io/python-numpy-tutorial/)). Lembre que $\\mathbf{X} \\in \\mathbb{R}^{N\\times d}$, você precisa adicionar uma componente com apenas 1s a cada observação em $\\mathbf{X}$ e realizar a computação descrita acima."
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "def normal_equation_prediction(X, y):\n",
151 |     "    \"\"\"\n",
152 |     "    Calculates the prediction using the normal equation method.\n",
153 |     "    You should add a new row with 1s.\n",
154 |     "\n",
155 |     "    :param X: design matrix\n",
156 |     "    :type X: np.ndarray(shape=(N, d))\n",
157 |     "    :param y: regression targets\n",
158 |     "    :type y: np.ndarray(shape=(N, 1))\n",
159 |     "    :return: prediction\n",
160 |     "    :rtype: np.ndarray(shape=(N, 1))\n",
161 |     "    \"\"\"\n",
162 |     "    # YOUR CODE HERE:\n",
163 |     "    raise NotImplementedError(\"falta completar a função normal_equation_prediction\")\n",
164 |     "    # END YOUR CODE\n",
165 |     "    return prediction\n"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "prediction = normal_equation_prediction(X, y)\n",
175 |     "r_2 = r_squared(y, prediction)\n",
176 |     "plot_points_regression(X,\n",
177 |     "                       y,\n",
178 |     "                       title='Real estate prices prediction',\n",
179 |     "                       xlabel=\"m\\u00b2\",\n",
180 |     "                       ylabel='$',\n",
181 |     "                       prediction=prediction,\n",
182 |     "                       legend=True,\n",
183 |     "                       r_squared=r_2)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "Você pode usar a métrica [$R^2$](https://pt.wikipedia.org/wiki/R%C2%B2) para ver o quão bem o modelo linear está se ajustando aos dados.\n",
191 |     "\n",
192 |     "**Nesse caso $R^2$ tem que estar próximo de $0.5$.**"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": [
201 |     "X, y = get_housing_prices_data(N=1000000, verbose=False)\n",
202 |     "init = time.time()\n",
203 |     "prediction = normal_equation_prediction(X, y)\n",
204 |     "init = time.time() - init\n",
205 |     "print(\"Tempo de execução da função de predição = {:.8f}(s)\".format(init))\n",
206 |     "print(\"Tem que ser em menos de 1 segundo \")"
207 |    ]
208 |   }
209 |  ],
210 |  "metadata": {
211 |   "kernelspec": {
212 |    "display_name": "Python 3",
213 |    "language": "python",
214 |    "name": "python3"
215 |   },
216 |   "language_info": {
217 |    "codemirror_mode": {
218 |     "name": "ipython",
219 |     "version": 3
220 |    },
221 |    "file_extension": ".py",
222 |    "mimetype": "text/x-python",
223 |    "name": "python",
224 |    "nbconvert_exporter": "python",
225 |    "pygments_lexer": "ipython3",
226 |    "version": "3.5.2"
227 |   }
228 |  },
229 |  "nbformat": 4,
230 |  "nbformat_minor": 2
231 | }
232 | 


--------------------------------------------------------------------------------
/notebooks/util/DataHolder.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import DataLoader
 2 | import torch
 3 | 
 4 | 
 5 | class GentleLoader:
 6 |     def __init__(self, dataset, batch_size, shuffle):
 7 |         self.dataLoader = DataLoader(dataset=dataset,
 8 |                                      batch_size=batch_size,
 9 |                                      shuffle=True)
10 | 
11 |     def __iter__(self):
12 |         return ((batch_X.type(torch.float), batch_y.type(torch.long))
13 |                 for (batch_X, batch_y) in self.dataLoader)
14 | 
15 | 
16 | class DataHolderGentle():
17 |     """
18 |     Class to store all data using the GentleLoader.
19 | 
20 |     :param config: hyper params configuration
21 |     :type config: LRConfig or DFNConfig
22 |     :param train_dataset: dataset of training data
23 |     :type train_dataset: torch.utils.data.dataset.TensorDataset
24 |     :param test_dataset: dataset of test data
25 |     :type test_dataset: torch.utils.data.dataset.TensorDataset
26 |     :param valid_dataset: dataset of valid data
27 |     :type valid_dataset: torch.utils.data.dataset.TensorDataset
28 |     :param batch_size: batch size for training
29 |     :type test_batch: batch size for the testing data
30 |     :param test_batch: int
31 |     """
32 |     def __init__(self,
33 |                  config,
34 |                  train_dataset,
35 |                  valid_dataset,
36 |                  test_dataset,
37 |                  test_batch=1000):
38 |         batch_size = config.batch_size
39 |         self.train_loader = GentleLoader(dataset=train_dataset,
40 |                                          batch_size=batch_size,
41 |                                          shuffle=True)
42 |         self.valid_loader = GentleLoader(dataset=valid_dataset,
43 |                                          batch_size=batch_size,
44 |                                          shuffle=True)
45 |         self.test_loader = GentleLoader(dataset=test_dataset,
46 |                                         batch_size=test_batch,
47 |                                         shuffle=True)
48 | 
49 | 
50 | class DataHolder():
51 |     """
52 |     Class to store all data.
53 | 
54 |     :param config: hyper params configuration
55 |     :type config: LRConfig or DFNConfig
56 |     :param train_dataset: dataset of training data
57 |     :type train_dataset: torch.utils.data.dataset.TensorDataset
58 |     :param test_dataset: dataset of test data
59 |     :type test_dataset: torch.utils.data.dataset.TensorDataset
60 |     :param valid_dataset: dataset of valid data
61 |     :type valid_dataset: torch.utils.data.dataset.TensorDataset
62 |     :param batch_size: batch size for training
63 |     :type test_batch: batch size for the testing data
64 |     :param test_batch: int
65 |     """
66 |     def __init__(self,
67 |                  config,
68 |                  train_dataset,
69 |                  valid_dataset,
70 |                  test_dataset,
71 |                  test_batch=1000):
72 |         batch_size = config.batch_size
73 |         self.train_loader = DataLoader(dataset=train_dataset,
74 |                                        batch_size=batch_size,
75 |                                        shuffle=True)
76 |         self.valid_loader = DataLoader(dataset=valid_dataset,
77 |                                        batch_size=batch_size,
78 |                                        shuffle=True)
79 |         self.test_loader = DataLoader(dataset=test_dataset,
80 |                                       batch_size=test_batch,
81 |                                       shuffle=True)
82 | 


--------------------------------------------------------------------------------
/notebooks/util/__init__.py:
--------------------------------------------------------------------------------
1 | # util module
2 | 


--------------------------------------------------------------------------------
/notebooks/util/plots.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import itertools
  3 | import matplotlib.pyplot as plt
  4 | from mpl_toolkits.mplot3d import Axes3D
  5 | from sklearn.metrics import confusion_matrix
  6 | 
  7 | 
  8 | 
  9 | def plot_points_regression(x,
 10 |                            y,
 11 |                            title,
 12 |                            xlabel,
 13 |                            ylabel,
 14 |                            prediction=None,
 15 |                            legend=False,
 16 |                            r_squared=None,
 17 |                            position=(90, 100)):
 18 |     """
 19 |     Plots the data points and the prediction,
 20 |     if there is one.
 21 | 
 22 |     :param x: design matrix
 23 |     :type x: np.array
 24 |     :param y: regression targets
 25 |     :type y: np.array
 26 |     :param title: plot's title
 27 |     :type title: str
 28 |     :param xlabel: x axis label
 29 |     :type xlabel: str
 30 |     :param ylabel: y axis label
 31 |     :type ylabel: str
 32 |     :param prediction: model's prediction
 33 |     :type prediction: np.array
 34 |     :param legend: param to control print legends
 35 |     :type legend: bool
 36 |     :param r_squared: r^2 value
 37 |     :type r_squared: float
 38 |     :param position: text position
 39 |     :type position: tuple
 40 |     """
 41 |     fig, ax = plt.subplots(1, 1, figsize=(8, 8))
 42 |     line1, = ax.plot(x, y, 'bo', label='Real data')
 43 |     if prediction is not None:
 44 |         line2, = ax.plot(x, prediction, 'r', label='Predicted data')
 45 |         if legend:
 46 |             plt.legend(handles=[line1, line2], loc=2)
 47 |     ax.set_title(title,
 48 |                  fontsize=20,
 49 |                  fontweight='bold')
 50 |     if r_squared is not None:
 51 |         bbox_props = dict(boxstyle="square,pad=0.3",
 52 |                           fc="white", ec="black", lw=0.2)
 53 |         t = ax.text(position[0], position[1], "$R^2 ={:.4f}$".format(r_squared),
 54 |                     size=15, bbox=bbox_props)
 55 | 
 56 |     ax.set_xlabel(xlabel, fontsize=20)
 57 |     ax.set_ylabel(ylabel, fontsize=20)
 58 |     plt.show()
 59 | 
 60 | def plot_cost_function_curve(X,
 61 |                              y,
 62 |                              cost_function,
 63 |                              title,
 64 |                              weights_list=None,
 65 |                              cost_list=None,
 66 |                              position=(20, 40),
 67 |                              range_points=(20, 40)):
 68 |     """
 69 |     Plots a cost surfice.
 70 |     It assumes that weight.shape == (2,). 
 71 | 
 72 |     :param X: design matrix
 73 |     :type X: np.ndarray
 74 |     :param y: regression targets
 75 |     :type y: np.ndarray
 76 |     :param cost_function: function to compute regression cost
 77 |     :type cost_function: lambda: (np.ndarray, np.ndarray, np.ndarray) -> float
 78 |     :param title: plot's title
 79 |     :type title: str
 80 |     :param weights_list: list of weights
 81 |     :type weights_list: list
 82 |     :param cost_list: list of costs
 83 |     :type cost_list: list
 84 |     :param position: surfice rotation position
 85 |     :type position: tuple
 86 |     :param range_points: range of values for w
 87 |     :type range_points: tuple
 88 |     """
 89 | 
 90 |     w_0, w_1 = 0, 0
 91 |     ms = np.linspace(w_0 - range_points[0] , w_0 + range_points[0], range_points[0])
 92 |     bs = np.linspace(w_1 - range_points[1] , w_1 + range_points[1], range_points[1])
 93 |     M, B = np.meshgrid(ms, bs)
 94 |     MB = np.stack((np.ravel(M), np.ravel(B)), axis=1)
 95 |     size = MB.shape[0] 
 96 |     MB = MB.reshape((size, 2, 1))
 97 |     zs = np.array([cost_function(X, y, MB[i]) 
 98 |                    for i in range(size)])
 99 |     Z = zs.reshape(M.shape)
100 |     fig = plt.figure(figsize=(20, 10))
101 |     ax = fig.add_subplot(111, projection='3d')
102 |     ax.plot_surface(M, B, Z, rstride=1, cstride=1, color='b', alpha=0.2)
103 |     ax.set_xlabel('w[0]', labelpad=30, fontsize=24, fontweight='bold')
104 |     ax.set_ylabel('w[1]', labelpad=30, fontsize=24, fontweight='bold')
105 |     ax.set_zlabel('J(w)', labelpad=30, fontsize=24, fontweight='bold')
106 |     if weights_list is not None and cost_list is not None:
107 |         ax.plot([weights_list[0][0]],
108 |                 [weights_list[0][1]],
109 |                 [cost_list[0]],
110 |                 markerfacecolor=(1.0, 0.0, 0.0, 1.0),
111 |                 markeredgecolor=(1.0, 0.0, 0.0, 1.0),
112 |                 marker='o',
113 |                 markersize=7)
114 |         ax.plot([weights_list[-1][0]],
115 |                 [weights_list[-1][1]],
116 |                 [cost_list[-1]],
117 |                 markerfacecolor=(0.0, 0.0, 1.0, 1.0),
118 |                 markeredgecolor=(0.0, 0.0, 1.0, 1.0),
119 |                 marker='o',
120 |                 markersize=7)
121 |         temp_red = 1.0
122 |         temp_blue = 0.0
123 |         size = len(weights_list)
124 |         oldx = 0.0
125 |         oldy = 0.0
126 |         oldz = 0.0
127 |         for w, cost in zip(weights_list, cost_list):
128 |             rgba_color = (temp_red * 1.0, 0.0, temp_blue * 1.0, 1.0)
129 |             ax.plot([w[0]],
130 |                     [w[1]],
131 |                     [cost],
132 |                     markerfacecolor=rgba_color,
133 |                     markeredgecolor=rgba_color,
134 |                     marker='.',
135 |                     markersize=4)
136 |             if oldx + oldy + oldz != 0.0 :
137 |                 rgba_color_weak = list(rgba_color)
138 |                 rgba_color_weak[-1] = 0.3
139 |                 ax.plot([w[0], oldx],[w[1], oldy], [cost, oldz],color=rgba_color_weak)
140 |             temp_red += - 1 / size
141 |             temp_blue +=  1 / size
142 |             oldx = w[0]
143 |             oldy = w[1]
144 |             oldz = cost    
145 |     ax.view_init(elev=position[0], azim=position[1])
146 |     ax.set_title(title,
147 |              fontsize=20,
148 |              fontweight='bold')
149 |     plt.show()
150 |     
151 | def simple_step_plot(ylist,
152 |                      yname,
153 |                      title,
154 |                      figsize=(4, 4),
155 |                      labels=None):
156 |     """
157 |     Plots values over time.
158 | 
159 |     :param ylist: list of values lists
160 |     :type ylist: list
161 |     :param yname: value name
162 |     :type yname: str
163 |     :param title: plot's title
164 |     :type title: str
165 |     :param figsize: plot's size
166 |     :type figsize: tuple
167 |     :param labels: label for each values list in ylist
168 |     :type range_points: list
169 |     """
170 |     y0 = ylist[0]
171 |     x = np.arange(1, len(y0) + 1, 1)
172 |     fig, ax = plt.subplots(1, 1, figsize=figsize)
173 |     for y in ylist:
174 |         ax.plot(x, y)
175 |     plt.xlabel('step')
176 |     plt.ylabel(yname)
177 |     plt.title(title,
178 |               fontsize=14,
179 |               fontweight='bold')
180 |     plt.grid(True)
181 |     if labels is not None:
182 |         plt.legend(labels,
183 |            loc='upper right')
184 |     plt.show()
185 | 
186 | def plot9images(images, cls_true, img_shape, cls_pred=None, lspace=0.3):
187 |     """
188 |     Function to show 9 images with their respective classes.
189 |     If cls_pred is an array, you can see the image and the prediction.
190 | 
191 |     :param images: images
192 |     :type images: np array
193 |     :param cls_true: true classes
194 |     :type cls_true: np array
195 |     :param img_shape: image shape
196 |     :type img_shape: tuple
197 |     :param cls_pred: model's prediction 
198 |     :type cls_pred: None or np array
199 |     :param lspace: space between images 
200 |     :type lspace: float
201 |     """
202 |     assert len(images) == len(cls_true) == 9
203 |     if cls_pred is None:
204 |         title = "Some images with labels"
205 |     else:
206 |         title = "Some images with predictions and labels"
207 |     fig, axes = plt.subplots(3, 3)
208 |     fig.subplots_adjust(hspace=lspace, wspace=0.3)
209 |     st = fig.suptitle(title, fontsize=24, fontweight='bold')
210 | 
211 |     for i, ax in enumerate(axes.flat):
212 |         ax.imshow(images[i].reshape(img_shape), cmap=None)
213 |         if cls_pred is None:
214 |             xlabel = "Label: {0}".format(cls_true[i])
215 |         else:
216 |             xlabel = "Label: {0}\nPred: {1}".format(cls_true[i], cls_pred[i])
217 |         ax.set_xlabel(xlabel)
218 |         ax.set_xticks([])
219 |         ax.set_yticks([])
220 |     plt.tight_layout()
221 |     st.set_y(1.05)
222 |     fig.subplots_adjust(top=0.85)
223 |     plt.show()
224 | 
225 | 
226 | def plot_confusion_matrix(truth,
227 |                           predictions,
228 |                           classes,
229 |                           normalize=False,
230 |                           save=False,
231 |                           cmap=plt.cm.Oranges,
232 |                           path="confusion_matrix.png"):
233 |     """
234 |     This function plots the confusion matrix.
235 |     Normalization can be applied by setting `normalize=True`.
236 |     'cmap' controls the color plot. colors:
237 |     https://matplotlib.org/1.3.1/examples/color/colormaps_reference.html
238 |     :param truth: true labels
239 |     :type truth: np array
240 |     :param predictions: model predictions
241 |     :type predictions: np array
242 |     :param classes: list of classes in order
243 |     :type classes: list
244 |     :param normalize: param to normalize cm matrix
245 |     :type normalize: bool
246 |     :param save: param to save cm plot
247 |     :type save: bool
248 |     :param cmap: plt color map
249 |     :type cmap: plt.cm
250 |     :param path: path to save image
251 |     :type path: str
252 |     """
253 |     acc = np.array(truth) == np.array(predictions)
254 |     size = float(acc.shape[0])
255 |     acc = np.sum(acc.astype("int32")) / size
256 |     title = "Confusion matrix of {0} examples\n accuracy = {1:.6f}".format(int(size),  # noqa
257 |                                                                            acc)
258 |     cm = confusion_matrix(truth, predictions)
259 |     if normalize:
260 |         cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
261 |     plt.figure(figsize=(9, 9))
262 |     plt.imshow(cm, interpolation='nearest', cmap=cmap)
263 |     plt.title(title, fontsize=24, fontweight='bold')
264 |     plt.colorbar()
265 |     tick_marks = np.arange(len(classes))
266 |     plt.xticks(tick_marks, classes, rotation=45)
267 |     plt.yticks(tick_marks, classes)
268 | 
269 |     fmt = '.2f' if normalize else 'd'
270 |     thresh = cm.max() / 2.
271 |     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
272 |         plt.text(j, i, format(cm[i, j], fmt),
273 |                  horizontalalignment="center",
274 |                  color="white" if cm[i, j] > thresh else "black")
275 | 
276 |     plt.tight_layout()
277 |     plt.ylabel('True label', fontweight='bold')
278 |     plt.xlabel('Predicted label', fontweight='bold')
279 |     plt.show()
280 |     if save:
281 |         plt.savefig(path)
282 | 
283 | def plot_histogram_from_labels(labels, labels_legend, comment):
284 |     """
285 |     Plot dataset histogram
286 |     :param label_path: array of labels
287 |     :type label_path: np.array
288 |     :param labels_legend: list with the name of labels
289 |     :type labels_legend: list
290 |     :param comment: comment to dataset to be printed on title
291 |     :type comment: str
292 |     """
293 | 
294 |     data_hist = plt.hist(labels,
295 |                          bins=np.arange(len(labels_legend) + 1) - 0.5,
296 |                          edgecolor='black')
297 |     axes = plt.gca()
298 |     axes.set_ylim([0, len(labels)])
299 | 
300 |     plt.title("Histogram of {} data points ({})".format(len(labels), comment))
301 |     plt.xticks(np.arange(len(labels_legend) + 1), labels_legend)
302 |     plt.xlabel("Label")
303 |     plt.ylabel("Frequency")
304 | 
305 |     for i in range(len(labels_legend)):
306 |         plt.text(data_hist[1][i] + 0.25,
307 |                  data_hist[0][i] + (data_hist[0][i] * 0.01),
308 |                  str(int(data_hist[0][i])))
309 |     plt.show()
310 |     plt.close()
311 | 
312 | 


--------------------------------------------------------------------------------
/notebooks/util/util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from mpl_toolkits.mplot3d import Axes3D
 4 | 
 5 | 
 6 | def add_feature_ones(X):
 7 |     """
 8 |     Returns the ndarray 'X' with the extra
 9 |     feature column containing only 1s.
10 | 
11 |     :param X: input array
12 |     :type X: np.ndarray(shape=(N, d))
13 |     :return: output array
14 |     :rtype: np.ndarray(shape=(N, d+1))
15 |     """
16 |     return np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
17 | 
18 | 
19 | def get_housing_prices_data(N, verbose=True):
20 |     """
21 |     Generates artificial linear data,
22 |     where x = square meter, y = house price
23 | 
24 |     :param N: data set size
25 |     :type N: int
26 |     :param verbose: param to control print
27 |     :type verbose: bool
28 |     :return: design matrix, regression targets
29 |     :rtype: np.array, np.array
30 |     """
31 |     cond = False
32 |     while not cond:
33 |         x = np.linspace(90, 1200, N)
34 |         gamma = np.random.normal(30, 10, x.size)
35 |         y = 50 * x + gamma * 400
36 |         x = x.astype("float32")
37 |         x = x.reshape((x.shape[0], 1))
38 |         y = y.astype("float32")
39 |         y = y.reshape((y.shape[0], 1))
40 |         cond = min(y) > 0
41 |     xmean, xsdt, xmax, xmin = np.mean(x), np.std(x), np.max(x), np.min(x)
42 |     ymean, ysdt, ymax, ymin = np.mean(y), np.std(y), np.max(y), np.min(y)
43 |     if verbose:
44 |         print("\nX shape = {}".format(x.shape))
45 |         print("\ny shape = {}\n".format(y.shape))
46 |         print("X:\nmean {}, sdt {:.2f}, max {}, min {}".format(xmean,
47 |                                                                xsdt,
48 |                                                                xmax,
49 |                                                                xmin))
50 |         print("\ny:\nmean {}, sdt {:.2f}, max {}, min {}".format(ymean,
51 |                                                                  ysdt,
52 |                                                                  ymax,
53 |                                                                  ymin))
54 |     return x, y
55 | 
56 | 
57 | def r_squared(y, y_hat):
58 |     """
59 |     Calculate the R^2 value
60 | 
61 |     :param y: regression targets
62 |     :type y: np array
63 |     :param y_hat: prediction
64 |     :type y_hat: np array
65 |     :return: r^2 value
66 |     :rtype: float
67 |     """
68 |     y_mean = np.mean(y)
69 |     ssres = np.sum(np.square(y - y_mean))
70 |     ssexp = np.sum(np.square(y_hat - y_mean))
71 |     sstot = ssres + ssexp
72 |     return 1 - (ssexp / sstot)
73 | 
74 |     
75 | def randomize_in_place(list1, list2, init=0):
76 |     """
77 |     Function to randomize two lists in the same way.
78 | 
79 |     :param list1: list
80 |     :type list1: list or np.array
81 |     :param list2: list
82 |     :type list2: list or np.array
83 |     :param init: seed
84 |     :type init: int
85 |     """
86 |     np.random.seed(seed=init)
87 |     np.random.shuffle(list1)
88 |     np.random.seed(seed=init)
89 |     np.random.shuffle(list2)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.14.1
2 | matplotlib==2.1.2
3 | scikit-learn==0.19.1
4 | 


--------------------------------------------------------------------------------
/slides/LICENSE:
--------------------------------------------------------------------------------
  1 | CC0 1.0 Universal
  2 | 
  3 | Statement of Purpose
  4 | 
  5 | The laws of most jurisdictions throughout the world automatically confer
  6 | exclusive Copyright and Related Rights (defined below) upon the creator and
  7 | subsequent owner(s) (each and all, an "owner") of an original work of
  8 | authorship and/or a database (each, a "Work").
  9 | 
 10 | Certain owners wish to permanently relinquish those rights to a Work for the
 11 | purpose of contributing to a commons of creative, cultural and scientific
 12 | works ("Commons") that the public can reliably and without fear of later
 13 | claims of infringement build upon, modify, incorporate in other works, reuse
 14 | and redistribute as freely as possible in any form whatsoever and for any
 15 | purposes, including without limitation commercial purposes. These owners may
 16 | contribute to the Commons to promote the ideal of a free culture and the
 17 | further production of creative, cultural and scientific works, or to gain
 18 | reputation or greater distribution for their Work in part through the use and
 19 | efforts of others.
 20 | 
 21 | For these and/or other purposes and motivations, and without any expectation
 22 | of additional consideration or compensation, the person associating CC0 with a
 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
 25 | and publicly distribute the Work under its terms, with knowledge of his or her
 26 | Copyright and Related Rights in the Work and the meaning and intended legal
 27 | effect of CC0 on those rights.
 28 | 
 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 30 | protected by copyright and related or neighboring rights ("Copyright and
 31 | Related Rights"). Copyright and Related Rights include, but are not limited
 32 | to, the following:
 33 | 
 34 |   i. the right to reproduce, adapt, distribute, perform, display, communicate,
 35 |   and translate a Work;
 36 | 
 37 |   ii. moral rights retained by the original author(s) and/or performer(s);
 38 | 
 39 |   iii. publicity and privacy rights pertaining to a person's image or likeness
 40 |   depicted in a Work;
 41 | 
 42 |   iv. rights protecting against unfair competition in regards to a Work,
 43 |   subject to the limitations in paragraph 4(a), below;
 44 | 
 45 |   v. rights protecting the extraction, dissemination, use and reuse of data in
 46 |   a Work;
 47 | 
 48 |   vi. database rights (such as those arising under Directive 96/9/EC of the
 49 |   European Parliament and of the Council of 11 March 1996 on the legal
 50 |   protection of databases, and under any national implementation thereof,
 51 |   including any amended or successor version of such directive); and
 52 | 
 53 |   vii. other similar, equivalent or corresponding rights throughout the world
 54 |   based on applicable law or treaty, and any national implementations thereof.
 55 | 
 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of,
 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
 59 | and Related Rights and associated claims and causes of action, whether now
 60 | known or unknown (including existing as well as future claims and causes of
 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum
 62 | duration provided by applicable law or treaty (including future time
 63 | extensions), (iii) in any current or future medium and for any number of
 64 | copies, and (iv) for any purpose whatsoever, including without limitation
 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
 66 | the Waiver for the benefit of each member of the public at large and to the
 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver
 68 | shall not be subject to revocation, rescission, cancellation, termination, or
 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work
 70 | by the public as contemplated by Affirmer's express Statement of Purpose.
 71 | 
 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be
 73 | judged legally invalid or ineffective under applicable law, then the Waiver
 74 | shall be preserved to the maximum extent permitted taking into account
 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
 76 | is so judged Affirmer hereby grants to each affected person a royalty-free,
 77 | non transferable, non sublicensable, non exclusive, irrevocable and
 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in
 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration
 80 | provided by applicable law or treaty (including future time extensions), (iii)
 81 | in any current or future medium and for any number of copies, and (iv) for any
 82 | purpose whatsoever, including without limitation commercial, advertising or
 83 | promotional purposes (the "License"). The License shall be deemed effective as
 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the
 85 | License for any reason be judged legally invalid or ineffective under
 86 | applicable law, such partial invalidity or ineffectiveness shall not
 87 | invalidate the remainder of the License, and in such case Affirmer hereby
 88 | affirms that he or she will not (i) exercise any of his or her remaining
 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims
 90 | and causes of action with respect to the Work, in either case contrary to
 91 | Affirmer's express Statement of Purpose.
 92 | 
 93 | 4. Limitations and Disclaimers.
 94 | 
 95 |   a. No trademark or patent rights held by Affirmer are waived, abandoned,
 96 |   surrendered, licensed or otherwise affected by this document.
 97 | 
 98 |   b. Affirmer offers the Work as-is and makes no representations or warranties
 99 |   of any kind concerning the Work, express, implied, statutory or otherwise,
100 |   including without limitation warranties of title, merchantability, fitness
101 |   for a particular purpose, non infringement, or the absence of latent or
102 |   other defects, accuracy, or the present or absence of errors, whether or not
103 |   discoverable, all to the greatest extent permissible under applicable law.
104 | 
105 |   c. Affirmer disclaims responsibility for clearing rights of other persons
106 |   that may apply to the Work or any use thereof, including without limitation
107 |   any person's Copyright and Related Rights in the Work. Further, Affirmer
108 |   disclaims responsibility for obtaining any necessary consents, permissions
109 |   or other rights required for any use of the Work.
110 | 
111 |   d. Affirmer understands and acknowledges that Creative Commons is not a
112 |   party to this document and has no duty or obligation with respect to this
113 |   CC0 or use of the Work.
114 | 
115 | For more information, please see
116 | <http://creativecommons.org/publicdomain/zero/1.0/>
117 | 


--------------------------------------------------------------------------------
/slides/README.md:
--------------------------------------------------------------------------------
 1 | # Slides
 2 | 
 3 | ![alt text](images/cc-logo.png "CC")
 4 | 
 5 | 
 6 | Licensed under [creative commons](https://github.com/MLIME/MAC0460/blob/master/2017/slides/LICENSE)
 7 | 
 8 | ## Uso
 9 | 
10 | Slides com os diferentes materiais dados em aula (diferentes aulas estão em diferentes pastas). Cada pasta possui uma subpasta chamada "pdf" em que você pode pegar o material. Se quiser compilar o código em latex da pasta "foo" basta rodar (em Ubuntu / Debian):
11 | 
12 | ```
13 | $ bash install.sh 
14 | $ cd foo/
15 | $ make
16 | ```
17 | 
18 | 


--------------------------------------------------------------------------------
/slides/backprop1/Makefile:
--------------------------------------------------------------------------------
 1 | BASE_NAME := main
 2 | BUILD_DIR := build
 3 | PDF_NAME := BackpropLecture1.pdf
 4 | 
 5 | PDFLATEX_OPTIONS = -halt-on-error -aux-directory=$(BUILD_DIR) -output-directory=$(BUILD_DIR) --shell-escape
 6 | LATEX     := latex
 7 | PDFLATEX  = pdflatex $(PDFLATEX_OPTIONS)
 8 | BIBTEX    := bibtex
 9 | 
10 | pdf: $(BASE_NAME).pdf
11 | 
12 | $(BASE_NAME).pdf: $(BASE_NAME).tex 
13 | 	mkdir -p $(BUILD_DIR)
14 | 	$(PDFLATEX) $<
15 | 	$(BIBTEX) $(BUILD_DIR)/$(BASE_NAME) 
16 | 	$(PDFLATEX) $< 
17 | 	$(PDFLATEX) $<
18 | 	$(PDFLATEX) $<
19 | 	cp $(BUILD_DIR)/$(BASE_NAME).pdf $(PDF_NAME)
20 | 
21 | clean:
22 | 	rm -rf build $(PDF_NAME)
23 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/3x3.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.7}{
 5 |     \begin{tikzpicture}[scale=1.4,every node/.style={minimum size=1cm}, on grid]
 6 |             \draw[fill=base02,opacity=0.4] (0,0) rectangle (3,3);
 7 |             \draw[draw=base03,thick] (0,0) grid (3,3);
 8 |             \node (00) at (0.5,2.5) {\Large 0};
 9 |             \node (01) at (1.5,2.5) {\Large 1};
10 |             \node (02) at (2.5,2.5) {\Large 2};
11 |             \node (10) at (0.5,1.5) {\Large 2};
12 |             \node (11) at (1.5,1.5) {\Large 2};
13 |             \node (12) at (2.5,1.5) {\Large 0};
14 |             \node (20) at (0.5,0.5) {\Large 0};
15 |             \node (21) at (1.5,0.5) {\Large 1};
16 |             \node (22) at (2.5,0.5) {\Large 2};
17 |     \end{tikzpicture}
18 | } % scalebox
19 | \end{figure}
20 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/5x5.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.7}{
 5 |     \begin{tikzpicture}[scale=1.5,every node/.style={minimum size=2cm}, on grid]
 6 |             \draw[fill=blue2,opacity=1.2] (0,0) rectangle (5,5);
 7 |             \draw[draw=base03,thick] (0,0) grid (5,5);
 8 | 	    	\node (00) at (0.5,4.5) {\LARGE 3};
 9 | 	    	\node (10) at (0.5,3.5) {\LARGE 0};
10 |             \node (20) at (0.5,2.5) {\LARGE 3};
11 |             \node (30) at (0.5,1.5) {\LARGE 2};
12 |             \node (40) at (0.5,0.5) {\LARGE 2};
13 | 
14 | 	    	\node (01) at (1.5,4.5) {\LARGE 3};
15 | 	    	\node (11) at (1.5,3.5) {\LARGE 0};
16 |             \node (21) at (1.5,2.5) {\LARGE 1};
17 |             \node (31) at (1.5,1.5) {\LARGE 0};
18 |             \node (41) at (1.5,0.5) {\LARGE 0};
19 | 
20 | 	    	\node (02) at (2.5,4.5) {\LARGE 2};
21 | 	    	\node (12) at (2.5,3.5) {\LARGE 1};
22 |             \node (22) at (2.5,2.5) {\LARGE 2};
23 |             \node (32) at (2.5,1.5) {\LARGE 0};
24 |             \node (42) at (2.5,0.5) {\LARGE 0};
25 | 
26 | 	    	\node (03) at (3.5,4.5) {\LARGE 1};
27 | 	    	\node (13) at (3.5,3.5) {\LARGE 3};
28 |             \node (23) at (3.5,2.5) {\LARGE 2};
29 |             \node (33) at (3.5,1.5) {\LARGE 2};
30 |             \node (43) at (3.5,0.5) {\LARGE 0};
31 | 
32 | 	    	\node (04) at (4.5,4.5) {\LARGE 0};
33 | 	    	\node (14) at (4.5,3.5) {\LARGE 1};
34 |             \node (24) at (4.5,2.5) {\LARGE 3};
35 |             \node (34) at (4.5,1.5) {\LARGE 2};
36 |             \node (44) at (4.5,0.5) {\LARGE 1};
37 |     \end{tikzpicture}
38 | } % scalebox
39 | \end{figure}
40 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/BackPropScalar.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.5}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (y) {y};
 9 | \node[op, above=30pt of y] (z) {z};
10 | \node[op, below=30pt of y] (x) {x};
11 | 
12 | % gradients =============================
13 | \visible<2->{\node[gradient, right=50pt of z] (delzdely) {$\frac{dz}{dy}$};}
14 | \visible<2->{\node[textonly, right=0.1pt of delzdely] {$=\frac{1}{x^{2}}$};}
15 | \visible<3->{\node[gradient, right=50pt of y] (delydelx) {$\frac{dy}{dx}$};}
16 | \visible<3->{\node[textonly, right=0.1pt of delydelx] {$=2x$};}
17 | \visible<4->{\node[gradient, right=80pt of x] (delzdelx) {$\frac{dz}{dx}$};}
18 | \visible<4->{\node[textonly, right=0.1pt of delzdelx] {$=\frac{2}{x}$};}
19 | 
20 | 
21 | % edges
22 | \path[tedge] (x) -- (y);
23 | \path[tedge] (y) -- (z);
24 | \visible<2->{\path[tedge] (z) -- (delzdely);}
25 | \visible<3->{\path[tedge] (y) -- (delydelx);}
26 | \visible<4->{\path[tedge] (x) -- (delzdelx);}
27 | \visible<4->{\path[tedge] (delydelx) -- (delzdelx);}
28 | \visible<4->{\path[tedge] (delzdely) to [bend right, out=-310, in=100, distance=40pt] (delzdelx);}
29 | 
30 | 
31 | % \visible<2->{\node[gradient, left=20pt of V] (grad-V) {$\nabla_{\textbf{V}}\textbf{L}$};}
32 | 
33 | 
34 | \end{tikzpicture}
35 | } % scalebox
36 | \end{figure}
37 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/Compgraph1.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.3}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % nodes
10 | \node[op] (W1) {$\vect{W}$};
11 | \node[textonly, below=20pt of W1] (inv1) {};
12 | \node[op, below=40pt of W1] (x1) {$\vect{x}$};
13 | \node[op, right=30.5pt of inv1] (v1) {matmul};
14 | \node[op, right=120pt of W1] (W2) {$\vect{W}$};
15 | \node[textonly, below=20pt of W2] (inv2) {};
16 | \node[op, below=40pt of W2] (x2) {$\vect{x}$};
17 | \node[op, right=30.5pt of inv2] (v2) {$\vect{v}$};
18 | \node[textonly, left=1.5pt of v2] (matmtul) {matmul};
19 | \node[textonly, left=40pt of inv2] (inv3) {};
20 | \node[textonly, below=50.5pt of inv3] (result) {{\large $\vect{v} = \vect{W} \vect{x} $}};
21 | 
22 | % edges
23 | \path[tedge] (W1) -- (v1);
24 | \path[tedge] (x1) -- (v1);
25 | \path[tedge] (W2) -- (v2);
26 | \path[tedge] (x2) -- (v2);
27 | 
28 | \end{tikzpicture}
29 | } % scalebox
30 | \end{figure}
31 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/Compgraph2.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.15}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % nodes
10 | \node[op] (W1) {$\vect{W}_{1}$};
11 | \node[textonly, below=40pt of W1] (inv1) {};
12 | \node[op, below=20pt of inv1] (x) {$\vect{x}$};
13 | \node[op, right=30.5pt of inv1] (v) {$\vect{v}$};
14 | \node[textonly, left=1.5pt of v] (matmtul1) {matmul};
15 | \node[op, right=35pt of v] (h) {$\vect{h}$};
16 | \node[op, right=110pt of W1] (W2) {$\vect{W}_{2}$};
17 | \node[op, right=35pt of h] (z) {$\vect{z}$};
18 | \node[op, right=55pt of z] (y) {$\hat{\vect{y}}$};
19 | \node[textonly, above left=1.5pt of z] (matmtul2) {matmul};
20 | 
21 | 
22 | % edges
23 | \path[tedge] (W1) -- (v);
24 | \path[tedge] (x) -- (v);
25 | \path[tedge] (v) edge node[above=1pt] {{\Large$\sigma$}} (h);
26 | \path[tedge] (z) edge node[above=1pt] {{\Large softmax}}  (y);
27 | \path[tedge] (W2) -- (z);
28 | \path[tedge] (h) -- (z);
29 | 
30 | 
31 | \end{tikzpicture}
32 | } % scalebox
33 | \end{figure}
34 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/Compgraph3.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.2}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % nodes on the left
10 | \node[op] (W1) {$\vect{W}_{1}$};
11 | \node[textonly, below=20pt of W1] (inv1) {};
12 | \node[op, right=30pt of inv1] (v) {$\vect{v}$};
13 | \node[op, below right=30.5pt of v] (x) {$\vect{x}$};
14 | \node[op, above=30.5pt of v] (hprime) {$\vect{h}^{\prime}$};
15 | \node[textonly, above right=2pt of hprime] (h) {{\LARGE$\vect{h}$}};
16 | 
17 | 
18 | %% namedscope in the left
19 | \begin{scope}[on background layer]
20 | \coordinate (p1) at (hprime.north);
21 | \coordinate (p2) at (v.south east);
22 | \coordinate (p3) at (W1.west);
23 | \tkzCircumCenter(p1,p2,p3)
24 | \tkzGetPoint{O}
25 | \tkzDrawCircle[draw=orange, line width=1.5pt, fill=orange!60](O,p1)
26 | \end{scope}
27 | 
28 | % edges on the left
29 | \path[tedge] (W1) -- (v);
30 | \path[tedge] (x) -- (v);
31 | \path[tedge] (v) -- (hprime);
32 | 
33 | % nodes on the right
34 | \node[op, right=30pt of x] (hh) {$\vect{h}$};
35 | \node[op, above right=30.5pt of hh] (z) {$\vect{z}$};
36 | \node[op, above=30.5pt of z] (yprime) {$\vect{y}^{\prime}$};
37 | \node[textonly, right=30pt of z] (inv2) {};
38 | \node[op, above=20pt of inv2] (W2) {$\vect{W}_{2}$};
39 | \node[textonly, above left=1pt of yprime] (yhat) {{\LARGE$\hat{\vect{y}}$}};
40 | 
41 | 
42 | %% namedscope in the right
43 | \begin{scope}[on background layer]
44 | \coordinate (p1) at (yprime.north);
45 | \coordinate (p2) at (W2.north east);
46 | \coordinate (p3) at (z.south west);
47 | \tkzCircumCenter(p1,p2,p3)
48 | \tkzGetPoint{O}
49 | \tkzDrawCircle[draw=orange, line width=1.5pt, fill=orange!60](O,p1)
50 | \end{scope}
51 | 
52 | 
53 | 
54 | % edges on the right
55 | \path[tedge] (W2) -- (z);
56 | \path[tedge] (hh) -- (z);
57 | \path[tedge] (z) -- (yprime);
58 | 
59 | 
60 | \end{tikzpicture}
61 | } % scalebox
62 | \end{figure}
63 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/Compgraph4.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.5}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (h) {$\vect{h}$};
 9 | \node[op, above=30pt of h] (y) {$\hat{\vect{y}}$};
10 | \node[op, below=30pt of h] (x) {$\vect{x}$};
11 | 
12 | 
13 | % edges
14 | \path[tedge] (x) -- (h);
15 | \path[tedge] (h) -- (y);
16 | 
17 | \end{tikzpicture}
18 | } % scalebox
19 | \end{figure}
20 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/DFNclassification.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.3}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % nodes
10 | \node[textonly] (vectorx) {$\begin{bmatrix}0.34\\ \vdots \\0.06\end{bmatrix}$};
11 | \node[textonly, above=1pt of vectorx] (x) {$\vect{x}$};
12 | \node[textonly, below=1pt of vectorx] (dimension1) {{\small$d\times 1$}};
13 | \node[op, right=30pt of vectorx] (model) {$h(\vect{x}; \vect{\theta})$};
14 | \node[textonly, right=30pt of model] (vectoryhat) {$\begin{bmatrix}p(y=1| \vect{x};\vect{\theta})\\ \vdots \\p(y=n| \vect{x};\vect{\theta})\end{bmatrix}$};
15 | \node[textonly, above=1pt of vectoryhat] (yhat) {$\hat{\vect{y}}$};
16 | \node[textonly, below=1pt of vectoryhat] (dimension2) {{\small$n\times 1$}};
17 | 
18 | 
19 | 
20 | % edges
21 | \path[tedge] (vectorx) -- (model);
22 | \path[tedge] (model) -- (vectoryhat);
23 | 
24 | 
25 | \end{tikzpicture}
26 | } % scalebox
27 | \end{figure}
28 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/DFNclassification2.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.3}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % nodes
10 | \node[textonly] (vectorx) {$\begin{bmatrix}0.34\\ \vdots \\0.06\end{bmatrix}$};
11 | \node[textonly, above=1pt of vectorx] (x) {$\vect{x}$};
12 | \node[textonly, below=1pt of vectorx] (dimension1) {{\small$d\times 1$}};
13 | \node[op, right=30pt of vectorx] (model) {$h(\vect{x}; \vect{\theta})$};
14 | \node[textonly, right=30pt of model] (vectoryhat) {$p(y=1| \vect{x};\vect{\theta})$};
15 | \node[textonly, above=1pt of vectoryhat] (yhat) {$\hat{\vect{y}}$};
16 | 
17 | 
18 | 
19 | 
20 | % edges
21 | \path[tedge] (vectorx) -- (model);
22 | \path[tedge] (model) -- (vectoryhat);
23 | 
24 | 
25 | \end{tikzpicture}
26 | } % scalebox
27 | \end{figure}
28 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/DeepNN.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.7}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (h1) {$\vect{h}^{(1)}$};
 9 | \node[op, below=30pt of h1] (x) {$\vect{x}$};
10 | \node[op, above=30pt of h1] (h2) {$\vect{h}^{(2)}$};
11 | \node[textonly, above=20pt of h2] (hdots) {{\LARGE$\vdots$}};
12 | \node[op, above=20pt of hdots] (hn) {$\vect{h}^{(n)}$};
13 | \node[op, above=30pt of hn] (y) {$\hat{\vect{y}}$};
14 | 
15 | %invisible nodes
16 | \node[textonly, above right=2pt of y] (yinv1) {};
17 | \node[textonly, below right=2pt of y] (yinv2) {};
18 | \node[textonly, right=26pt of y] (yinv3) {output layer};
19 | 
20 | \node[textonly, above right=2pt of hn] (hinv1) {};
21 | \node[textonly, below right=2pt of h1] (hinv2) {};
22 | \node[textonly, right=26pt of h2] (hinv3) {hidden layers};
23 | 
24 | \node[textonly, above right=2pt of x] (xinv1) {};
25 | \node[textonly, below right=2pt of x] (xinv2) {};
26 | \node[textonly, right=26pt of x] (xinv3) {input layer};
27 | 
28 | 
29 | \visible<2->{\node[textonly, above left=0.1pt and 0.1pt of y] (d1) {};}
30 | \visible<2->{\node[textonly, below left=0.1pt and 0.1pt of x] (d2) {};}
31 | \visible<2->{\node[textonly, left=56pt of hdots] (d3) {{\large\alert{deep model}}};}
32 | 
33 | 
34 | % edges
35 | \path[tedge] (x) -- (h1);
36 | \path[tedge] (h1) -- (h2);
37 | \path[tedge] (h2) -- (hdots);
38 | \path[tedge] (hdots) -- (hn);
39 | \path[tedge] (hn) -- (y);
40 | 
41 | % visual aid edges
42 | \draw[orange!120, line width=1mm]  (yinv3) to [out=180,in=-80] (yinv1);
43 | \draw[orange!120, line width=1mm]  (yinv3) to [out=180,in=80] (yinv2);
44 | 
45 | 
46 | \draw[orange!120, line width=1mm]  (hinv3) to [out=180,in=-80] (hinv1);
47 | \draw[orange!120, line width=1mm]  (hinv3) to [out=180,in=80] (hinv2);
48 | 
49 | 
50 | \draw[orange!120, line width=1mm]  (xinv3) to [out=180,in=-80] (xinv1);
51 | \draw[orange!120, line width=1mm]  (xinv3) to [out=180,in=80] (xinv2);
52 | 
53 | 
54 | \visible<2->{\draw[orange!180, line width=1mm]  (d3) to [out=0,in=180] (d1);}
55 | \visible<2->{\draw[orange!180, line width=1mm]  (d3) to [out=0,in=180] (d2);}
56 | 
57 | 
58 | 
59 | \end{tikzpicture}
60 | } % scalebox
61 | \end{figure}
62 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/Dropout1.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.3}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (x2) {$x_2$};
 9 | \node[op, above=20pt of x2] (x1) {$x_1$};
10 | \node[op, below=20pt of x2] (x3) {$x_3$};
11 | \node[op, above right=10pt and 40pt of x2] (h2) {$h_2$};
12 | \node[op, above=20pt of h2] (h1) {$h_1$};
13 | \node[op, below=20pt of h2] (h3) {$h_3$};
14 | \node[op, below=20pt of h3] (h4) {$h_4$};
15 | \node[op, right=90pt of x2] (o) {$\hat{y}$};
16 | 
17 | % edges
18 | \path[tedge_dashed] (x1) edge node[pos=0.25, above=1.8pt, dashed] {\large{\alert{$0$}}} (h1);
19 | \path[tedge] (x1) edge node[above=1.2pt] {} (h2);
20 | \path[tedge] (x1) edge node[above=1.8pt] {} (h3);
21 | \path[tedge] (x1) edge node[above=1.8pt] {} (h4);
22 | 
23 | \path[tedge] (x2) edge node[above=1.8pt] {} (h1);
24 | \path[tedge] (x2) edge node[above=1.8pt] {} (h2);
25 | \path[tedge] (x2) edge node[above=1.8pt] {} (h3);
26 | \path[tedge] (x2) edge node[above=1.8pt] {} (h4);
27 | 
28 | \path[tedge] (x3) edge node[above=1.8pt] {} (h1);
29 | \path[tedge] (x3) edge node[above=1.8pt] {} (h2);
30 | \path[tedge] (x3) edge node[above=1.8pt] {} (h3);
31 | \path[tedge_dashed] (x3) edge node[above=1.0pt] {} (h4);
32 | 
33 | \path[tedge_dashed] (h1) edge node[pos=0.25, above=1.8pt, right=0.1cm] {} (o);
34 | \path[tedge] (h2) edge node[above=1.8pt] {} (o);
35 | \path[tedge] (h3) edge node[above=1.8pt] {} (o);
36 | \path[tedge] (h4) edge node[above=1.8pt] {} (o);
37 | 
38 | 
39 | % info edges
40 | 
41 | 
42 | \end{tikzpicture}
43 | } % scalebox
44 | \end{figure}


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/Dropout2.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.3}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (x2) {$x_2$};
 9 | \node[op, above=20pt of x2] (x1) {$x_1$};
10 | \node[op, below=20pt of x2] (x3) {$x_3$};
11 | \node[op, above right=10pt and 40pt of x2] (h2) {$h_2$};
12 | \node[op, above=20pt of h2] (h1) {$h_1$};
13 | \node[op, below=20pt of h2] (h3) {$h_3$};
14 | \node[op, below=20pt of h3] (h4) {$h_4$};
15 | \node[op, right=90pt of x2] (o) {$\hat{y}$};
16 | % edges
17 | \path[tedge] (x1) edge node[pos=0.25, above=1.8pt] {} (h1);
18 | \path[tedge] (x1) edge node[above=1.2pt] {} (h2);
19 | \path[tedge] (x1) edge node[above=1.8pt] {} (h3);
20 | \path[tedge_dashed] (x1) edge node[above=1.8pt] {} (h4);
21 | 
22 | \path[tedge] (x2) edge node[above=1.8pt] {} (h1);
23 | \path[tedge_dashed] (x2) edge node[above=1.8pt] {} (h2);
24 | \path[tedge] (x2) edge node[above=1.8pt] {} (h3);
25 | \path[tedge] (x2) edge node[above=1.8pt] {} (h4);
26 | 
27 | \path[tedge_dashed] (x3) edge node[above=1.8pt] {} (h1);
28 | \path[tedge] (x3) edge node[above=1.8pt] {} (h2);
29 | \path[tedge] (x3) edge node[above=1.8pt] {} (h3);
30 | \path[tedge] (x3) edge node[above=1.0pt] {} (h4);
31 | 
32 | \path[tedge] (h1) edge node[pos=0.25, above=1.8pt, right=0.1cm] {} (o);
33 | \path[tedge] (h2) edge node[above=1.8pt] {} (o);
34 | \path[tedge] (h3) edge node[above=1.8pt] {} (o);
35 | \path[tedge_dashed] (h4) edge node[below=1.8pt] {\large{\alert{$0$}}}(o);
36 | 
37 | 
38 | % info edges
39 | 
40 | 
41 | \end{tikzpicture}
42 | } % scalebox
43 | \end{figure}
44 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/Entropy1.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.3}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % nodes
10 | \node[textonly] (pprob) {$\begin{bmatrix}0.8\\0.2\end{bmatrix}$};
11 | \node[textonly, right=40pt of pprob] (qprob) {$\begin{bmatrix}0.5\\0.5\end{bmatrix}$};
12 | \node[textonly, above=1pt of pprob] (p) {$\vect{p}$};
13 | \node[textonly, above=1pt of qprob] (q) {$\vect{q}$};
14 | 
15 | 
16 | \node[textonly, below=20pt of pprob] (Hp) {$H(\vect{p}) = 0.72$};
17 | \node[textonly, below=20pt of qprob] (Hq) {$H(\vect{q}) = 1$};
18 | \node[textonly, below=30pt of Hp] (inv1) {};
19 | \node[textonly,  right=-40pt of inv1] (Hquation) {{\Large$H(\vect{p}) = \sum_{i} \vect{p}_i\log\frac{1}{\vect{p}_i}$}};
20 | 
21 | 
22 | \end{tikzpicture}
23 | } % scalebox
24 | \end{figure}
25 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/Entropy2.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.0}{
 5 | \begin{tikzpicture}
 6 |     \begin{axis}%
 7 |     [
 8 |         grid=major,     
 9 |         xmin=0,
10 |         xmax=1,
11 |         axis x line=bottom,
12 |         ytick={0,.5,1},
13 |         ymax=1.1,
14 |         axis y line=middle,
15 | 		xlabel= $p$,
16 |   		ylabel= $H(\vect{p})$,
17 |     ]
18 |         \addplot%
19 |         [	orange!180,
20 |         	ultra thick,
21 | %             blue,%
22 |             mark=none,
23 |             samples=200,
24 |             domain=0.0001:0.9999,
25 |         ]
26 |         (x,{(x*log2(1/x)) + ((1-x)*log2(1/(1-x)))});
27 |     \end{axis}
28 | \node[textonly] (pprob) at (8.75,2.8) {{\Large$\begin{bmatrix}p\\1-p\end{bmatrix}$}};
29 | \node[textonly, above=1pt of pprob] (p) {{\Large$\vect{p}$}};
30 | \end{tikzpicture}
31 | } % scalebox
32 | \end{figure}


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/Kernel_image_pro.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.70}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % nodes
10 | \node (original)
11 |     {\includegraphics[width=.15\textwidth]{images/Vd-Orig.png}};
12 | \node[above right= 20pt and 150pt of original] (edge) {\includegraphics[width=.15\textwidth]{images/Vd-Edge3.png}};
13 | \node[below=20pt of edge] (sharpen) {\includegraphics[width=.15\textwidth]{images/Vd-Sharp.png}};
14 | \node[below=20pt of sharpen] (blur) {\includegraphics[width=.15\textwidth]{images/Vd-Blur1.png}};
15 | 
16 | % edges
17 | \path[tedge, orange!120, line width=1mm]  (original) to [out=90,in=180, looseness=9, distance=125pt] (edge);
18 | \path[tedge, orange!120, line width=1mm]  (original) to [out=0,in=180] (sharpen);
19 | \path[tedge, orange!120, line width=1mm]  (original) to [out=-90,in=180, looseness=9, distance=125pt] (blur);
20 | 
21 | % nodes for kernels 
22 | \node[op3, right=20pt of original] (kernel1) {$\begin{bmatrix}0 & -1 & 0\\ -1 & 5 & -1\\0 & -1 & 0\end{bmatrix}$};
23 | \node[op3, above=10pt of kernel1] (kernel2) {$\begin{bmatrix}-1 & -1 & -1\\ -1 & 8 & -1\\-1 & -1 & -1\end{bmatrix}$};
24 | \node[op3, below=10pt of kernel1] (kernel3) {$\frac{1}{16}\begin{bmatrix}1 & 2 & 1\\ 2 & 4 & 2\\1 & 2 & 1\end{bmatrix}$};
25 | 
26 | \end{tikzpicture}
27 | } % scalebox
28 | \vspace*{-10mm}
29 | \caption{Exemplo de aplicação de filtros em uma imagem (extraído de \url{https://en.wikipedia.org/wiki/Kernel_(image_processing)})}
30 | \end{figure}
31 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/KullbackLeibler.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.3}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % nodes
10 | \node[textonly] (p1prob) {$\begin{bmatrix}0.8\\0.2\end{bmatrix}$};
11 | \node[textonly, right=30pt of p1prob] (q1prob) {$\begin{bmatrix}0.5\\0.5\end{bmatrix}$};
12 | \node[textonly, above=1pt of p1prob] (p1) {$\vect{p}$};
13 | \node[textonly, above=1pt of q1prob] (q1) {$\vect{q}$};
14 | \node[textonly, right=20pt of q1prob] (p2prob) {$\begin{bmatrix}0.8\\0.2\end{bmatrix}$};
15 | \node[textonly, right=30pt of p2prob] (q2prob) {$\begin{bmatrix}0.88\\0.12\end{bmatrix}$};
16 | \node[textonly, above=1pt of p2prob] (p2) {$\vect{p}^{\prime}$};
17 | \node[textonly, above=1pt of q2prob] (q2) {$\vect{q}^{\prime}$};
18 | 
19 | 
20 | \node[textonly, below right=20pt and -15pt of p1prob] (Dkl1) {$D_{KL}(\vect{p}||\vect{q}) = 0.28$};
21 | \node[textonly, below right=20pt and -15pt of p2prob] (Dkl2) {$D_{KL}(\vect{p}^{\prime}||\vect{q}^{\prime}) = 0.04$};
22 | \node[textonly, below=20pt of Dkl1] (inv1) {};
23 | \node[textonly,  right=-40pt of inv1] (Dklequation) {{\Large$D_{KL}(\vect{p}||\vect{q}) = \sum_{i} \vect{p}_i\log\frac{\vect{p}_i}{\vect{q}_i}$}};
24 | 
25 | 
26 | 
27 | % edges
28 | \draw[orange!120, line width=1mm]  (Dkl1) to [out=150,in=-90] (p1prob);
29 | \draw[orange!120, line width=1mm] (Dkl1) to [out=150,in=-100] (q1prob);
30 | 
31 | \draw[orange!120, line width=1mm]  (Dkl2) to [out=150,in=-90] (p2prob);
32 | \draw[orange!120, line width=1mm] (Dkl2) to [out=150,in=-100] (q2prob);
33 | 
34 | \end{tikzpicture}
35 | } % scalebox
36 | \end{figure}
37 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/NN.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.5}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (h) {$\vect{h}$};
 9 | \node[op, above=30pt of h] (y) {$\hat{\vect{y}}$};
10 | \node[op, below=30pt of h] (x) {$\vect{x}$};
11 | \node[textonly, right=76pt of h] (inv) {};
12 | \node[textonly, above=16pt of inv] (f1) {$\hat{y} = f^{(2)}(f^{(1)}(\vect{x}; \vect{W}_1); \vect{W}_2)$};
13 | \node[textonly, below=10pt of f1] (f2) {$\hat{y} = softmax( \vect{W}_2 (\sigma(\vect{W}_1\vect{x})))$};
14 | 
15 | 
16 | % edges
17 | \path[tedge] (x) edge [out=90,in=-90] node[right] {$\vect{W}_{1}$} (h);
18 | \path[tedge] (h) edge [out=90,in=-90] node[right] {$\vect{W}_{2}$} (y);
19 | 
20 | \end{tikzpicture}
21 | } % scalebox
22 | \end{figure}
23 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/OldNN1.tex:
--------------------------------------------------------------------------------
  1 | \begin{figure}[ht!]
  2 | \centering
  3 | 
  4 | \scalebox{0.7}{
  5 | \begin{tikzpicture}[auto]
  6 | 
  7 | % operations =============================
  8 | 
  9 | % input layer
 10 | \node[op] (x5) {$x_5$};
 11 | \node[op, above=2.5pt of x5] (x4) {$x_4$};
 12 | \node[op, above=2.5pt of x4] (x3) {$x_3$};
 13 | \node[op, above=2.5pt of x3] (x2) {$x_2$};
 14 | \node[op, above=2.5pt of x2] (x1) {$x_1$};
 15 | \node[op, below=2.5pt of x5] (x6) {$x_6$};
 16 | \node[op, below=2.5pt of x6] (x7) {$x_7$};
 17 | \node[op, below=2.5pt of x7] (x8) {$x_8$};
 18 | \node[op, below=2.5pt of x8] (x9) {$x_9$};
 19 | \node[op, below=2.5pt of x9] (x10) {$x_{10}$};
 20 | 
 21 | % hidden layer
 22 | \node[op,  right=130pt of x5] (v2) {$v_2$};
 23 | \node[op, below=2.5pt of v2] (v3) {$v_3$};
 24 | \node[op, below=2.5pt of v3] (v4) {$v_4$};
 25 | \node[op, above=2.5pt of v2] (v1) {$v_1$};
 26 | 
 27 | \node[op,  right=40pt of v2] (h2) {$h_2$};
 28 | \node[op, below=2.5pt of h2] (h3) {$h_3$};
 29 | \node[op, below=2.5pt of h3] (h4) {$h_4$};
 30 | \node[op, above=2.5pt of h2] (h1) {$h_1$};
 31 | 
 32 | 
 33 | % output layer
 34 | \node[op,  right=60pt of h2] (z1) {$z_1$};
 35 | \node[op,  right=60pt of h3] (z2) {$z_2$};
 36 | 
 37 | \node[op,  right=50pt of z1] (y1) {$\hat{y}_1$};
 38 | \node[op,  right=50pt of z2] (y2) {$\hat{y}_2$};
 39 | 
 40 | 
 41 | % edges input layer to hidden
 42 | \path[tedge] (x1) -- (v1);
 43 | \path[tedge] (x1) -- (v2);
 44 | \path[tedge] (x1) -- (v3);
 45 | \path[tedge] (x1) -- (v4);
 46 | 
 47 | \path[tedge] (x2) -- (v1);
 48 | \path[tedge] (x2) -- (v2);
 49 | \path[tedge] (x2) -- (v3);
 50 | \path[tedge] (x2) -- (v4);
 51 | 
 52 | \path[tedge] (x3) -- (v1);
 53 | \path[tedge] (x3) -- (v2);
 54 | \path[tedge] (x3) -- (v3);
 55 | \path[tedge] (x3) -- (v4);
 56 | 
 57 | \path[tedge] (x4) -- (v1);
 58 | \path[tedge] (x4) -- (v2);
 59 | \path[tedge] (x4) -- (v3);
 60 | \path[tedge] (x4) -- (v4);
 61 | 
 62 | \path[tedge] (x5) -- (v1);
 63 | \path[tedge] (x5) -- (v2);
 64 | \path[tedge] (x5) -- (v3);
 65 | \path[tedge] (x5) -- (v4);
 66 | 
 67 | \path[tedge] (x6) -- (v1);
 68 | \path[tedge] (x6) -- (v2);
 69 | \path[tedge] (x6) -- (v3);
 70 | \path[tedge] (x6) -- (v4);
 71 | 
 72 | \path[tedge] (x7) -- (v1);
 73 | \path[tedge] (x7) -- (v2);
 74 | \path[tedge] (x7) -- (v3);
 75 | \path[tedge] (x7) -- (v4);
 76 | 
 77 | \path[tedge] (x8) -- (v1);
 78 | \path[tedge] (x8) -- (v2);
 79 | \path[tedge] (x8) -- (v3);
 80 | \path[tedge] (x8) -- (v4);
 81 | 
 82 | \path[tedge] (x9) -- (v1);
 83 | \path[tedge] (x9) -- (v2);
 84 | \path[tedge] (x9) -- (v3);
 85 | \path[tedge] (x9) -- (v4);
 86 | 
 87 | \path[tedge] (x10) -- (v1);
 88 | \path[tedge] (x10) -- (v2);
 89 | \path[tedge] (x10) -- (v3);
 90 | \path[tedge] (x10) -- (v4);
 91 | 
 92 | % edges hidden to hidden
 93 | \path[tedge] (v1) edge node[above=1pt] {{\Large$\sigma$}}  (h1) ;
 94 | \path[tedge] (v2) edge node[above=1pt] {{\Large$\sigma$}}  (h2) ;
 95 | \path[tedge] (v3) edge node[above=1pt] {{\Large$\sigma$}}  (h3) ;
 96 | \path[tedge] (v4) edge node[above=1pt] {{\Large$\sigma$}}  (h4) ;
 97 | 
 98 | % edges hidden to output
 99 | \path[tedge] (h1) -- (z1);
100 | \path[tedge] (h1) -- (z2);
101 | 
102 | \path[tedge] (h2) -- (z1);
103 | \path[tedge] (h2) -- (z2);
104 | 
105 | \path[tedge] (h3) -- (z1);
106 | \path[tedge] (h3) -- (z2);
107 | 
108 | \path[tedge] (h4) -- (z1);
109 | \path[tedge] (h4) -- (z2);
110 | 
111 | % edges output to output
112 | \path[tedge] (z1) edge node[above=1pt] {{\Large softmax}}  (y1) ;
113 | \path[tedge] (z2) edge node[above=1pt] {{\Large softmax}}  (y2) ;
114 | 
115 | 
116 | 
117 | 
118 | \end{tikzpicture}
119 | } % scalebox
120 | \end{figure}
121 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/OldNN2.tex:
--------------------------------------------------------------------------------
  1 | \begin{figure}[ht!]
  2 | \centering
  3 | 
  4 | \scalebox{0.7}{
  5 | \begin{tikzpicture}[auto]
  6 | 
  7 | % operations =============================
  8 | 
  9 | % input layer
 10 | \node[op] (x5) {};
 11 | \node[op, above=2.5pt of x5] (x4) {};
 12 | \node[op, above=2.5pt of x4] (x3) {};
 13 | \node[op, above=2.5pt of x3] (x2) {};
 14 | \node[op, above=2.5pt of x2] (x1) {};
 15 | \node[op, below=2.5pt of x5] (x6) {};
 16 | \node[op, below=2.5pt of x6] (x7) {};
 17 | \node[op, below=2.5pt of x7] (x8) {};
 18 | \node[op, below=2.5pt of x8] (x9) {};
 19 | \node[op, below=2.5pt of x9] (x10) {};
 20 | 
 21 | \node[textonly, above=2.5pt of x1] (x) {{\LARGE$\vect{x}$}};
 22 | \node[textonly, below=4.5pt of x10] (input) {{\large Input layer}};
 23 | 
 24 | % hidden layer
 25 | \node[op,  right=130pt of x5] (h2) {};
 26 | \node[op, below=2.5pt of h2] (h3) {};
 27 | \node[op, below=2.5pt of h3] (h4) {};
 28 | \node[op, above=2.5pt of h2] (h1) {};
 29 | 
 30 | \node[textonly, above=2.5pt of h1] (h) {{\LARGE$\vect{h}$}};
 31 | \node[textonly, below=4.5pt of h4] (hidden) {{\large Hidden layer}};
 32 | 
 33 | % output layer
 34 | \node[op,  right=60pt of h2] (y1) {};
 35 | \node[op,  right=60pt of h3] (y2) {};
 36 | 
 37 | \node[textonly, above=2.5pt of y1] (y) {{\LARGE$\hat{\vect{y}}$}};
 38 | \node[textonly, below=4.5pt of y2] (output) {{\large Output layer}};
 39 | 
 40 | % edges input layer to hidden
 41 | \path[tedge] (x1) -- (h1);
 42 | \path[tedge] (x1) -- (h2);
 43 | \path[tedge] (x1) -- (h3);
 44 | \path[tedge] (x1) -- (h4);
 45 | 
 46 | \path[tedge] (x2) -- (h1);
 47 | \path[tedge] (x2) -- (h2);
 48 | \path[tedge] (x2) -- (h3);
 49 | \path[tedge] (x2) -- (h4);
 50 | 
 51 | \path[tedge] (x3) -- (h1);
 52 | \path[tedge] (x3) -- (h2);
 53 | \path[tedge] (x3) -- (h3);
 54 | \path[tedge] (x3) -- (h4);
 55 | 
 56 | \path[tedge] (x4) -- (h1);
 57 | \path[tedge] (x4) -- (h2);
 58 | \path[tedge] (x4) -- (h3);
 59 | \path[tedge] (x4) -- (h4);
 60 | 
 61 | \path[tedge] (x5) -- (h1);
 62 | \path[tedge] (x5) -- (h2);
 63 | \path[tedge] (x5) -- (h3);
 64 | \path[tedge] (x5) -- (h4);
 65 | 
 66 | \path[tedge] (x6) -- (h1);
 67 | \path[tedge] (x6) -- (h2);
 68 | \path[tedge] (x6) -- (h3);
 69 | \path[tedge] (x6) -- (h4);
 70 | 
 71 | \path[tedge] (x7) -- (h1);
 72 | \path[tedge] (x7) -- (h2);
 73 | \path[tedge] (x7) -- (h3);
 74 | \path[tedge] (x7) -- (h4);
 75 | 
 76 | \path[tedge] (x8) -- (h1);
 77 | \path[tedge] (x8) -- (h2);
 78 | \path[tedge] (x8) -- (h3);
 79 | \path[tedge] (x8) -- (h4);
 80 | 
 81 | \path[tedge] (x9) -- (h1);
 82 | \path[tedge] (x9) -- (h2);
 83 | \path[tedge] (x9) -- (h3);
 84 | \path[tedge] (x9) -- (h4);
 85 | 
 86 | \path[tedge] (x10) -- (h1);
 87 | \path[tedge] (x10) -- (h2);
 88 | \path[tedge] (x10) -- (h3);
 89 | \path[tedge] (x10) -- (h4);
 90 | 
 91 | % edges hidden to output
 92 | \path[tedge] (h1) -- (y1);
 93 | \path[tedge] (h1) -- (y2);
 94 | 
 95 | \path[tedge] (h2) -- (y1);
 96 | \path[tedge] (h2) -- (y2);
 97 | 
 98 | \path[tedge] (h3) -- (y1);
 99 | \path[tedge] (h3) -- (y2);
100 | 
101 | \path[tedge] (h4) -- (y1);
102 | \path[tedge] (h4) -- (y2);
103 | 
104 | 
105 | 
106 | \end{tikzpicture}
107 | } % scalebox
108 | \end{figure}
109 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/OldNN3.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.7}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input layer
10 | \node[op] (x5) {};
11 | \node[op, above=2.5pt of x5] (x4) {};
12 | \node[op, above=2.5pt of x4] (x3) {};
13 | \node[op, above=2.5pt of x3] (x2) {};
14 | \node[op, above=2.5pt of x2] (x1) {};
15 | \node[op, below=2.5pt of x5] (x6) {};
16 | \node[op, below=2.5pt of x6] (x7) {};
17 | \node[op, below=2.5pt of x7] (x8) {};
18 | \node[op, below=2.5pt of x8] (x9) {};
19 | \node[op, below=2.5pt of x9] (x10) {};
20 | 
21 | \node[textonly, above=2.5pt of x1] (x) {{\LARGE$\vect{x}$}};
22 | \node[textonly, below=4.5pt of x10] (input) {{\large Input layer}};
23 | 
24 | % hidden layer
25 | \node[op,  right=130pt of x5] (h2) {};
26 | \node[op, below=2.5pt of h2] (h3) {};
27 | \node[op, below=2.5pt of h3] (h4) {};
28 | \node[op, above=2.5pt of h2] (h1) {};
29 | 
30 | \node[textonly, above=2.5pt of h1] (h) {{\LARGE$\vect{h}$}};
31 | \node[textonly, below=4.5pt of h4] (hidden) {{\large Hidden layer}};
32 | 
33 | % output layer
34 | \node[op,  right=60pt of h2] (y1) {};
35 | \node[op,  right=60pt of h3] (y2) {};
36 | 
37 | \node[textonly, above=2.5pt of y1] (y) {{\LARGE$\hat{\vect{y}}$}};
38 | \node[textonly, below=4.5pt of y2] (output) {{\large Output layer}};
39 | 
40 | % edges input layer to hidden
41 | \draw[line width=0.5mm] (x1) -- (hidden);
42 | \draw[line width=0.5mm] (x10) -- (h);
43 | 
44 | % edges hidden to output
45 | \draw [line width=0.5mm]  (h1) -- (output);
46 | \draw [line width=0.5mm]  (h4) -- (y);
47 | 
48 | \end{tikzpicture}
49 | } % scalebox
50 | \end{figure}
51 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/ReLU.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.0}{
 5 | \begin{tikzpicture}
 6 |     \begin{axis}%
 7 |     [
 8 |         grid=major,     
 9 |         xmin=-6,
10 |         xmax=6,
11 |         axis x line=bottom,
12 |         ytick={0},
13 |         ymax=10,
14 |         axis y line=middle,
15 |     ]
16 |         \addplot%
17 |         [	orange!180,
18 |         	ultra thick,
19 | %             blue,%
20 |             mark=none,
21 |             samples=100,
22 |             domain=-6:6,
23 |         ]
24 |         (x,{max(x,0)});
25 |     \end{axis}
26 | \node[textonly] (relu) at (8.95,2.8) {{\Large$g(x) = max\{0,x\}$}};
27 | \end{tikzpicture}
28 | } % scalebox
29 | \end{figure}


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/Sigmoid.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.0}{
 5 | \begin{tikzpicture}
 6 |     \begin{axis}%
 7 |     [
 8 |         grid=major,     
 9 |         xmin=-6,
10 |         xmax=6,
11 |         axis x line=bottom,
12 |         ytick={0,.5,1},
13 |         ymax=1,
14 |         axis y line=middle,
15 |     ]
16 |         \addplot%
17 |         [	orange!180,
18 |         	ultra thick,
19 | %             blue,%
20 |             mark=none,
21 |             samples=100,
22 |             domain=-6:6,
23 |         ]
24 |         (x,{1/(1+exp(-x))});
25 |     \end{axis}
26 | \node[textonly] (sigmoid) at (8.75,2.8) {{\Large$\sigma(x) = \frac{1}{1 + e^{-x}}$}};
27 | \end{tikzpicture}
28 | } % scalebox
29 | \end{figure}


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/Softmax.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.3}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % nodes
10 | \node[textonly] (logits) {$\begin{bmatrix}3.82\\5.35\\1.44\\-1.26\\2.71 \\1.98\end{bmatrix}$};
11 | \node[textonly, right=60pt of logits] (softmax) {$\begin{bmatrix}0.16115195\\0.74422819\\0.01491471\\0.00100235\\0.05310907 \\0.02559374\end{bmatrix}$};
12 | \node[textonly, below=15pt of logits] (inv1) {};
13 | \node[textonly, right=10pt of inv1] (softmax_eq) {{\Large$softmax(\vect{x})_i = \frac{e^{\vect{x}_i}}{\sum_j e^{\vect{x}_j}}$}};
14 | 
15 | 
16 | 
17 | % edges
18 | \path[tedge] (logits) edge node[above=1pt] {{\Large softmax}} (softmax);
19 | \end{tikzpicture}
20 | } % scalebox
21 | \end{figure}
22 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/b1_path1.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op2, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op2, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op2, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op2, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op2, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op2, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op2, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op2, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | %edges
52 | \path[tedge] (w11) -- (mult1);
53 | \path[tedge] (x1) -- (mult1);
54 | \path[tedge] (w12) -- (mult2);
55 | \path[tedge] (x2) -- (mult2);
56 | \path[tedge] (w21) -- (mult3);
57 | \path[tedge] (x11) -- (mult3);
58 | \path[tedge] (w22) -- (mult4);
59 | \path[tedge] (x22) -- (mult4);
60 | 
61 | \path[tedge] (mult1) -- (sum1);
62 | \path[tedge] (mult2) -- (sum1);
63 | \path[tedge] (mult3) -- (sum2);
64 | \path[tedge] (mult4) -- (sum2);
65 | \path[tedge] (sum1) -- (sum3);
66 | \path[tedge] (b1) -- (sum3);
67 | \path[tedge] (sum2) -- (sum4);
68 | \path[tedge] (b2) -- (sum4);
69 | 
70 | \path[tedge] (sum3) -- (exp1);
71 | \path[tedge] (sum4) -- (exp2);
72 | \path[tedge] (exp1) -- (sum5);
73 | \path[tedge] (exp2) -- (sum5);
74 | \path[tedge] (exp1) -- (div1);
75 | \path[tedge] (exp2) -- (div2);
76 | \path[tedge] (sum5) -- (div1);
77 | \path[tedge] (sum5) -- (div2);
78 | 
79 | 
80 | \path[tedge] (div1) -- (log1);
81 | \path[tedge] (div2) -- (log2);
82 | \path[tedge] (log1) -- (mult5);
83 | \path[tedge] (y1) -- (mult5);
84 | \path[tedge] (log2) -- (mult6);
85 | \path[tedge] (y2) -- (mult6);
86 | \path[tedge] (mult5) -- (sum6);
87 | \path[tedge] (mult6) -- (sum6);
88 | \path[tedge] (sum6) -- (minus1);
89 | 
90 | \end{tikzpicture}
91 | } % scalebox
92 | \end{figure}
93 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/b1_path1_grad.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.8}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % multiplication
10 | \node[op] (z1) {$z_1$};
11 | \node[op, above left=25pt and 20pt of z1] (b1) {$b_1$};
12 | 
13 | % exp
14 | \node[op, right=25pt of z1] (exp1) {$h_1$};
15 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
16 | \node[op, below left=25pt and 10pt of div1] (H) {$H$};
17 | 
18 | % log
19 | \node[op, right=25pt of div1] (log1) {$\log$};
20 | \node[op, right=25pt of log1] (mult5) {$*$};
21 | \node[op, right=25pt of mult5] (sum6) {$+$};
22 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
23 | \node[op, below left=25pt and 10pt of mult5] (y1) {$y_1$};
24 | \node[textonly, below left=55pt and 10pt of sum6] (dots) {{\LARGE$\dots$}};
25 | 
26 | %gradients 1
27 | \node[gradient, above=10pt of sum6] (dsum6) {$-1$};
28 | \node[gradient, above=10pt of mult5] (dmult5) {$1$};
29 | \node[gradient, above=10pt of log1] (dlog1) {$y_1$};
30 | \node[gradient, above=10pt of div1] (ddiv1) {$\frac{H}{h_1}$};
31 | \node[gradient, above=10pt of exp1] (dexp1) {$\frac{1}{H}$};
32 | \node[gradient, above=10pt of z1] (dz1) {$h_1$};
33 | \node[gradient, above=10pt of b1] (db1) {$1$};
34 | 
35 | %gradients 2
36 | \node[gradient2, above=10pt of minus1] (dLdL) {$1$};
37 | \node[gradient2, above right =30pt and 10pt of dsum6] (dLdLpp) {$-1$};
38 | \node[gradient2, above left=30pt and 10pt of dsum6] (dLdmult5) {$-1$};
39 | \node[gradient2, left=25pt of dLdmult5] (dLlog1) {$-y_1$};
40 | \node[gradient2, left=25pt of dLlog1] (dLdiv1) {$-y_1\frac{H}{h_1}$};
41 | \node[gradient2, left=25pt of dLdiv1] (dLdexp1) {$\frac{-y_1}{h_1}$};
42 | \node[gradient2, left=25pt of dLdexp1] (dLdz1) {$-y_1$};
43 | \node[gradient2, above=35pt of db1] (dLdb1) {$-y_1$};
44 | 
45 | %edges
46 | \path[tedge] (b1) -- (z1);
47 | \path[tedge] (z1) -- (exp1);
48 | \path[tedge] (exp1) -- (div1);
49 | \path[tedge] (H) -- (div1);
50 | \path[tedge] (div1) -- (log1);
51 | \path[tedge] (log1) -- (mult5);
52 | \path[tedge] (y1) -- (mult5);
53 | \path[tedge] (mult5) -- (sum6);
54 | \path[tedge] (dots) -- (sum6);
55 | \path[tedge] (sum6) -- (minus1);
56 | 
57 | %edges gradient 1
58 | % \path[tedge] (b1) -- (db1);
59 | % \path[tedge] (z1) -- (dz1);
60 | % \path[tedge] (exp1) -- (dexp1);
61 | % \path[tedge] (div1) -- (ddiv1);
62 | % \path[tedge] (log1) -- (dlog1);
63 | % \path[tedge] (mult5) -- (dmult5);
64 | % \path[tedge] (sum6) -- (dsum6);
65 | 
66 | %edges gradient 2
67 | \path[tedge] (dsum6) -- (dLdLpp);
68 | \path[tedge] (dLdL) -- (dLdLpp);
69 | \path[tedge] (dLdLpp) -- (dLdmult5);
70 | \path[tedge] (dmult5) -- (dLdmult5);
71 | \path[tedge] (dLdmult5) -- (dLlog1);
72 | \path[tedge] (dlog1) -- (dLlog1);
73 | \path[tedge] (ddiv1) -- (dLdiv1);
74 | \path[tedge] (dLlog1) -- (dLdiv1);
75 | \path[tedge] (dexp1) -- (dLdexp1);
76 | \path[tedge] (dLdiv1) -- (dLdexp1);
77 | \path[tedge] (dz1) -- (dLdz1);
78 | \path[tedge] (dLdexp1) -- (dLdz1);
79 | \path[tedge] (db1) -- (dLdb1);
80 | \path[tedge] (dLdz1) -- (dLdb1);
81 | 
82 | 
83 | \end{tikzpicture}
84 | } % scalebox
85 | \end{figure}
86 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/b1_path2.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op2, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op2, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op2, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op2, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op2, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op2, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op2, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op2, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op2, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/b1_path2_grad.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.7}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % multiplication
10 | \node[op] (z1) {$z_1$};
11 | \node[op, above left=25pt and 20pt of z1] (b1) {$b_1$};
12 | 
13 | % exp
14 | \node[op, right=25pt of z1] (exp1) {$h_1$};
15 | \node[op, right=25pt of exp1] (H) {$H$};
16 | \node[op, right=35pt of H] (div1) {$\hat{y}_1$};
17 | 
18 | % log
19 | \node[op, right=25pt of div1] (log1) {$\log$};
20 | \node[op, right=25pt of log1] (mult5) {$*$};
21 | \node[op, below left=25pt and 10pt of mult5] (y1) {$y_1$};
22 | \node[op, right=25pt of mult5] (sum6) {$+$};
23 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
24 | \node[textonly, below left=55pt and 10pt of sum6] (dots) {{\LARGE$\dots$}};
25 | 
26 | %gradients 1
27 | \node[gradient, above=10pt of sum6] (dsum6) {$-1$};
28 | \node[gradient, above=10pt of mult5] (dmult5) {$1$};
29 | \node[gradient, above=10pt of log1] (dlog1) {$y_1$};
30 | \node[gradient, above=10pt of div1] (ddiv1) {$\frac{H}{h_1}$};
31 | \node[gradient, above=10pt of H] (dH) {$-\frac{h_1}{H^2}$};
32 | \node[gradient, above=10pt of exp1] (dexp1) {$1$};
33 | \node[gradient, above=10pt of z1] (dz1) {$h_1$};
34 | \node[gradient, above=10pt of b1] (db1) {$1$};
35 | 
36 | %gradients 2
37 | \node[gradient2, above=10pt of minus1] (dLdL) {$1$};
38 | \node[gradient2, above right =30pt and 10pt of dsum6] (dLdLpp) {$-1$};
39 | \node[gradient2, above left=30pt and 10pt of dsum6] (dLdmult5) {$-1$};
40 | \node[gradient2, left=25pt of dLdmult5] (dLlog1) {$-y_1$};
41 | \node[gradient2, left=25pt of dLlog1] (dLdiv1) {$-y_1\frac{H}{h_1}$};
42 | \node[gradient2, left=25pt of dLdiv1] (dLdH) {$\frac{y_1}{H}$};
43 | \node[gradient2, left=25pt of dLdH] (dLdexp1) {$\frac{y_1}{H}$};
44 | \node[gradient2, left=25pt of dLdexp1] (dLdz1) {$y_1\frac{h_1}{H}$};
45 | \node[gradient2, above=35pt of db1] (dLdb1) {$y_1\frac{h_1}{H}$};
46 | 
47 | %edges
48 | \path[tedge] (b1) -- (z1);
49 | \path[tedge] (z1) -- (exp1);
50 | \path[tedge] (exp1) -- (H);
51 | \path[tedge] (H) -- (div1);
52 | 
53 | \path[tedge] (div1) -- (log1);
54 | \path[tedge] (log1) -- (mult5);
55 | \path[tedge] (y1) -- (mult5);
56 | \path[tedge] (mult5) -- (sum6);
57 | \path[tedge] (dots) -- (sum6);
58 | \path[tedge] (sum6) -- (minus1);
59 | 
60 | %edges gradient 1
61 | % \path[tedge] (b1) -- (db1);
62 | % \path[tedge] (z1) -- (dz1);
63 | % \path[tedge] (exp1) -- (dexp1);
64 | % \path[tedge] (div1) -- (ddiv1);
65 | % \path[tedge] (H) -- (dH);
66 | % \path[tedge] (log1) -- (dlog1);
67 | % \path[tedge] (mult5) -- (dmult5);
68 | % \path[tedge] (sum6) -- (dsum6);
69 | 
70 | %edges gradient 2
71 | \path[tedge] (dsum6) -- (dLdLpp);
72 | \path[tedge] (dLdL) -- (dLdLpp);
73 | \path[tedge] (dLdLpp) -- (dLdmult5);
74 | \path[tedge] (dmult5) -- (dLdmult5);
75 | \path[tedge] (dLdmult5) -- (dLlog1);
76 | \path[tedge] (dlog1) -- (dLlog1);
77 | \path[tedge] (ddiv1) -- (dLdiv1);
78 | \path[tedge] (dLlog1) -- (dLdiv1);
79 | \path[tedge] (dexp1) -- (dLdexp1);
80 | \path[tedge] (dH) -- (dLdH);
81 | \path[tedge] (dLdiv1) -- (dLdH);
82 | \path[tedge] (dLdH) -- (dLdexp1);
83 | \path[tedge] (dz1) -- (dLdz1);
84 | \path[tedge] (dLdexp1) -- (dLdz1);
85 | \path[tedge] (db1) -- (dLdb1);
86 | \path[tedge] (dLdz1) -- (dLdb1);
87 | 
88 | 
89 | \end{tikzpicture}
90 | } % scalebox
91 | \end{figure}
92 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/b1_path3.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op2, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op2, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op2, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op2, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op2, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op2, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op2, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op2, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op2, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/b1_path3_grad.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.7}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % multiplication
10 | \node[op] (z1) {$z_1$};
11 | \node[op, above left=25pt and 20pt of z1] (b1) {$b_1$};
12 | 
13 | % exp
14 | \node[op, right=25pt of z1] (exp1) {$h_1$};
15 | \node[op, right=25pt of exp1] (H) {$H$};
16 | \node[op, right=35pt of H] (div1) {$\hat{y}_2$};
17 | 
18 | % log
19 | \node[op, right=25pt of div1] (log1) {$\log$};
20 | \node[op, right=25pt of log1] (mult5) {$*$};
21 | \node[op, below left=25pt and 10pt of mult5] (y2) {$y_2$};
22 | \node[op, right=25pt of mult5] (sum6) {$+$};
23 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
24 | \node[textonly, below left=55pt and 10pt of sum6] (dots) {{\LARGE$\dots$}};
25 | 
26 | %gradients 1
27 | \node[gradient, above=10pt of sum6] (dsum6) {$-1$};
28 | \node[gradient, above=10pt of mult5] (dmult5) {$1$};
29 | \node[gradient, above=10pt of log1] (dlog1) {$y_2$};
30 | \node[gradient, above=10pt of div1] (ddiv1) {$\frac{H}{h_1}$};
31 | \node[gradient, above=10pt of H] (dH) {$-\frac{h_1}{H^2}$};
32 | \node[gradient, above=10pt of exp1] (dexp1) {$1$};
33 | \node[gradient, above=10pt of z1] (dz1) {$h_1$};
34 | \node[gradient, above=10pt of b1] (db1) {$1$};
35 | 
36 | %gradients 2
37 | \node[gradient2, above=10pt of minus1] (dLdL) {$1$};
38 | \node[gradient2, above right =30pt and 10pt of dsum6] (dLdLpp) {$-1$};
39 | \node[gradient2, above left=30pt and 10pt of dsum6] (dLdmult5) {$-1$};
40 | \node[gradient2, left=25pt of dLdmult5] (dLlog1) {$-y_2$};
41 | \node[gradient2, left=25pt of dLlog1] (dLdiv1) {$-y_2\frac{H}{h_1}$};
42 | \node[gradient2, left=25pt of dLdiv1] (dLdH) {$\frac{y_2}{H}$};
43 | \node[gradient2, left=25pt of dLdH] (dLdexp1) {$\frac{y_2}{H}$};
44 | \node[gradient2, left=25pt of dLdexp1] (dLdz1) {$y_2\frac{h_1}{H}$};
45 | \node[gradient2, above=35pt of db1] (dLdb1) {$y_2\frac{h_1}{H}$};
46 | 
47 | %edges
48 | \path[tedge] (b1) -- (z1);
49 | \path[tedge] (z1) -- (exp1);
50 | \path[tedge] (exp1) -- (H);
51 | \path[tedge] (H) -- (div1);
52 | 
53 | \path[tedge] (div1) -- (log1);
54 | \path[tedge] (log1) -- (mult5);
55 | \path[tedge] (y2) -- (mult5);
56 | \path[tedge] (mult5) -- (sum6);
57 | \path[tedge] (dots) -- (sum6);
58 | \path[tedge] (sum6) -- (minus1);
59 | 
60 | %edges gradient 1
61 | % \path[tedge] (b1) -- (db1);
62 | % \path[tedge] (z1) -- (dz1);
63 | % \path[tedge] (exp1) -- (dexp1);
64 | % \path[tedge] (div1) -- (ddiv1);
65 | % \path[tedge] (H) -- (dH);
66 | % \path[tedge] (log1) -- (dlog1);
67 | % \path[tedge] (mult5) -- (dmult5);
68 | % \path[tedge] (sum6) -- (dsum6);
69 | 
70 | %edges gradient 2
71 | \path[tedge] (dsum6) -- (dLdLpp);
72 | \path[tedge] (dLdL) -- (dLdLpp);
73 | \path[tedge] (dLdLpp) -- (dLdmult5);
74 | \path[tedge] (dmult5) -- (dLdmult5);
75 | \path[tedge] (dLdmult5) -- (dLlog1);
76 | \path[tedge] (dlog1) -- (dLlog1);
77 | \path[tedge] (ddiv1) -- (dLdiv1);
78 | \path[tedge] (dLlog1) -- (dLdiv1);
79 | \path[tedge] (dexp1) -- (dLdexp1);
80 | \path[tedge] (dH) -- (dLdH);
81 | \path[tedge] (dLdiv1) -- (dLdH);
82 | \path[tedge] (dLdH) -- (dLdexp1);
83 | \path[tedge] (dz1) -- (dLdz1);
84 | \path[tedge] (dLdexp1) -- (dLdz1);
85 | \path[tedge] (db1) -- (dLdb1);
86 | \path[tedge] (dLdz1) -- (dLdb1);
87 | 
88 | 
89 | \end{tikzpicture}
90 | } % scalebox
91 | \end{figure}
92 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/batch_example_values.tex:
--------------------------------------------------------------------------------
 1 | {\large\textbf{batch\_size = 3}
 2 | }\begin{figure}[ht!]
 3 | \centering
 4 | 
 5 | \scalebox{1.3}{
 6 | \begin{tikzpicture}[auto]
 7 | 
 8 | % operations =============================
 9 | 
10 | % nodes
11 | \node[textonly] (x1) {$\begin{bmatrix}0.2\\0.7\end{bmatrix}$};
12 | \node[textonly, below=30pt of x1] (y1) {$\begin{bmatrix}1\\0\end{bmatrix}$};
13 | \node[textonly, right=10pt of x1] (x2) {$\begin{bmatrix}0.8\\0.1\end{bmatrix}$};
14 | \node[textonly, below=30pt of x2] (y2) {$\begin{bmatrix}0\\1\end{bmatrix}$};
15 | \node[textonly, right=10pt of x2] (x3) {$\begin{bmatrix}0.3\\0.5\end{bmatrix}$};
16 | \node[textonly, below=30pt of x3] (y3) {$\begin{bmatrix}1\\0\end{bmatrix}$};
17 | 
18 | 
19 | \node[textonly, above=1pt of x1] (x1name) {$\vect{x}_1$};
20 | \node[textonly, above=1pt of y1] (y1name) {$\vect{y}_1$};
21 | \node[textonly, above=1pt of x2] (x2name) {$\vect{x}_2$};
22 | \node[textonly, above=1pt of y2] (y2name) {$\vect{y}_2$};
23 | \node[textonly, above=1pt of x3] (x3name) {$\vect{x}_3$};
24 | \node[textonly, above=1pt of y3] (y3name) {$\vect{y}_3$};
25 | 
26 | 
27 | \end{tikzpicture}
28 | } % scalebox
29 | \end{figure}


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/batch_graph.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.8}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | \begin{scope}[xshift=0cm,yshift=0cm]
10 |             \begin{scope}[xshift=0cm,yshift=0cm]
11 |             \node[placeholder] (x1) at (1,3.5) {$\begin{bmatrix}0.3\\0.5\end{bmatrix}$};
12 |             \end{scope}
13 |             \begin{scope}[xshift=-0.6cm,yshift=-0.6cm]
14 |             \node[placeholder] (x2)  at (1,3.5) {$\begin{bmatrix}0.8\\0.1\end{bmatrix}$};
15 |             \end{scope}
16 |             \begin{scope}[xshift=-1.2cm,yshift=-1.2cm]
17 |             \node[placeholder] (x3)  at (1,3.5) {$\begin{bmatrix}0.2\\0.7\end{bmatrix}$};
18 |             \end{scope}  
19 | \end{scope}
20 | 
21 | \begin{scope}[xshift=0cm,yshift=0cm]
22 |             \begin{scope}[xshift=0cm,yshift=0cm]
23 |             \node[placeholder] (y1) at (1,1) {$\begin{bmatrix}1\\0\end{bmatrix}$};
24 |             \end{scope}
25 |             \begin{scope}[xshift=-0.6cm,yshift=-0.6cm]
26 |             \node[placeholder] (y2)  at (1,1) {$\begin{bmatrix}0\\1\end{bmatrix}$};
27 |             \end{scope}
28 |             \begin{scope}[xshift=-1.2cm,yshift=-1.2cm]
29 |             \node[placeholder] (y3)  at (1,1) {$\begin{bmatrix}1\\0\end{bmatrix}$};
30 |             \end{scope}  
31 | \end{scope}
32 | 
33 | \begin{scope}[xshift=0cm,yshift=0cm]
34 |             \begin{scope}[xshift=0cm,yshift=0cm]
35 |             \node[op] (df1) at (7,1)  {\large{L}};
36 |             \end{scope}
37 |             \begin{scope}[xshift=-0.4cm,yshift=-0.4cm]
38 |             \node[op] (df2)  at (7,1)  {\large{L}};
39 |             \end{scope}
40 |             \begin{scope}[xshift=-0.8cm,yshift=-0.8cm]
41 |             \node[op] (df3)  at (7,1)  {\large{L}};
42 |             \end{scope}  
43 | \end{scope}
44 | 
45 | 
46 | \begin{scope}[xshift=0cm,yshift=0cm]
47 |             \begin{scope}[xshift=0cm,yshift=0cm]
48 |             \node[gradient2] (grad1) at (12,1)  {$-0.26$};
49 |             \end{scope}
50 |             \begin{scope}[xshift=-0.6cm,yshift=-0.6cm]
51 |             \node[gradient2] (grad2)  at (12,1)  {$0.54$};
52 |             \end{scope}
53 |             \begin{scope}[xshift=-1.2cm,yshift=-1.2cm]
54 |             \node[gradient2] (grad3)  at (12,1)  {$-0.19$};
55 |             \end{scope}  
56 | \end{scope}
57 | 
58 | 
59 | \node[textonly, right=10pt of x2] (inv1) {};
60 | \node[textonly, right=10pt of y2] (inv2) {};
61 | \node[textonly, left=10pt of df2] (inv3) {};
62 | \node[textonly, right=10pt of df2] (inv4) {};
63 | \node[textonly, left=10pt of grad2] (inv5) {};
64 | 
65 | %edges
66 | \path[tedge, orange!120, line width=1.5mm]  (inv1) -- (inv3);
67 | \path[tedge, orange!120, line width=1.5mm]  (inv2) -- (inv3);
68 | \path[tedge, green2!120, line width=1.5mm]  (inv4) -- (inv5);
69 | 
70 | \end{tikzpicture}
71 | } % scalebox
72 | \end{figure}


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/chain_rule_nodes.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.0}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (nt) {$u_j$};
 9 | \node[op, above left=50pt of nt] (a) {$u_{j-1}$};
10 | \node[op, below left=40pt of nt] (b) {$u_{j-2}$};
11 | \node[op, right=40pt of nt] (ntp) {$u_{j+1}$};
12 | \node[textonly, right=20pt of ntp] (ntpp) {$\dots$};
13 | \node[op, right=40pt of ntpp] (nT) {$u_{n}$};
14 | \node[gradient, above=15pt of nt] (dnt) {$\frac{\partial u_{j+1}}{\partial u_{j}}$};
15 | \node[gradient2, above=15pt of ntp] (dntp) {$\frac{\partial u_{n}}{\partial u_{j+1}}$};
16 | \node[gradient2, above=15pt of dnt] (ddnt) {$\frac{\partial u_{n}}{\partial u_{j}}$};
17 | \node[gradient2, above=20pt of nT] (dLdL) {$\frac{\partial u_{n}}{\partial u_{n}}$};
18 | \node[textonly, right=0.1pt of dLdL] {$=1$};
19 | 
20 | % edges =============================
21 | \path[tedge] (a) -- (nt);
22 | \path[tedge] (b) -- (nt);
23 | \path[tedge] (nt) -- (ntp);
24 | \path[tedge] (nt) -- (dnt);
25 | \path[tedge] (ntp) -- (ntpp);
26 | \path[tedge] (ntpp) -- (nT);
27 | \path[tedge] (ntp) -- (dntp);
28 | \path[tedge] (dnt) -- (ddnt);
29 | \path[tedge] (dntp) -- (ddnt);
30 | 
31 | \end{tikzpicture}
32 | } % scalebox
33 | \end{figure}
34 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/convnet_arch.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}
 2 | \node (convlayer) at (0, 0) {CONV};
 3 | \node (poollayer) at (1.5, 0) {POOL};
 4 | \node (actvlayer) at (3, 0) {ReLU};
 5 | \node (otherlayer) at (4.5, 0) {(...)};
 6 | \node (fullyconnected) at (6, 0) {FC};
 7 | \draw [->, thin] (convlayer.east) -- (poollayer.west);
 8 | \draw [->, thin] (poollayer.east) -- (actvlayer.west);
 9 | \draw [->, thin] (actvlayer.east) -- (otherlayer.west);
10 | \draw [->, thin] (otherlayer.east) -- (fullyconnected.west);
11 | \end{tikzpicture}


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/div.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.2}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (times) {$div$};
 9 | \node[op, above left=20pt of times] (a) {$a$};
10 | \node[op, below left=20pt of times] (b) {$b$};
11 | \node[gradient, above left=15pt and 20pt of a] (da) {$\frac{\partial f}{\partial a}$};
12 | \node[gradient, below left=15pt and 20pt of b] (db) {$\frac{\partial f}{\partial b}$};
13 | \node[textonly, right=0.1pt of da] {$=\frac{1}{b}$};
14 | \node[textonly, right=0.1pt of db] {$=\frac{-a}{b^{2}}$};
15 | \node[textonly, right=0.1pt of times] {$=f(a,b) =\frac{a}{b}$};
16 | 
17 | % edges
18 | \path[tedge] (a) -- (times);
19 | \path[tedge] (b) -- (times);
20 | \path[tedge] (b) -- (da);
21 | \path[tedge] (a) -- (db);
22 | \path[tedge] (b) -- (db);
23 | 
24 | 
25 | \end{tikzpicture}
26 | } % scalebox
27 | \end{figure}
28 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/examples_values.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.3}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % nodes
10 | \node[textonly] (W) {$\begin{bmatrix}0.65  & 1.19\\0.69  & -0.92\end{bmatrix}$};
11 | \node[textonly, right=40pt of W] (x) {$\begin{bmatrix}0.2\\0.7\end{bmatrix}$};
12 | \node[textonly, below=30pt of W] (b) {$\begin{bmatrix}0\\0\end{bmatrix}$};
13 | \node[textonly, below=30pt of x] (y) {$\begin{bmatrix}1\\0\end{bmatrix}$};
14 | \node[textonly, left=1pt of W] (Wname) {$\vect{W}=$};
15 | \node[textonly, left=1pt of x] (xname) {$\vect{x}=$};
16 | \node[textonly, left=1pt of b] (bname) {$\vect{b}=$};
17 | \node[textonly, left=1pt of y] (yname) {$\vect{y}=$};
18 | 
19 | 
20 | \end{tikzpicture}
21 | } % scalebox
22 | \end{figure}
23 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/exp.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.5}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (times) {$exp$};
 9 | \node[op, left=20pt of times] (a) {$a$};
10 | \node[gradient, above=30pt of a] (da) {$\frac{\partial f}{\partial a}$};
11 | \node[textonly, right=0.1pt of da] {$=e^{a}$};
12 | \node[textonly, right=0.1pt of times] {$=f(a) = e^{a}$};
13 | 
14 | % edges
15 | \path[tedge] (a) -- (times);
16 | \path[tedge] (a) -- (da);
17 | 
18 | \end{tikzpicture}
19 | } % scalebox
20 | \end{figure}
21 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_0.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_1.tex:
--------------------------------------------------------------------------------
  1 | \begin{figure}[ht!]
  2 | \centering
  3 | 
  4 | \scalebox{0.6}{
  5 | \begin{tikzpicture}[auto]
  6 | 
  7 | % operations =============================
  8 | 
  9 | % input x and W
 10 | \node[state] (w11) {$0.65$};
 11 | \node[state, below=10pt of w11] (x1) {$0.2$};
 12 | \node[state, below=20pt of x1] (w12) {$1.19$};
 13 | \node[state, below=10pt of w12] (x2) {$0.7$};
 14 | 
 15 | \node[state, below=20pt of x2] (w21) {$0.69$};
 16 | \node[state, below=10pt of w21] (x11) {$0.2$};
 17 | \node[state, below=20pt of x11] (w22) {$-0.92$};
 18 | \node[state, below=10pt of w22] (x22) {$0.7$};
 19 | 
 20 | % multiplication
 21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
 22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
 23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
 24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
 25 | 
 26 | % sum
 27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
 28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
 29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
 30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
 31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
 32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
 33 | 
 34 | % exp
 35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
 36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
 37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
 38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
 39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
 40 | 
 41 | % log
 42 | \node[op, right=25pt of div1] (log1) {$\log$};
 43 | \node[op, right=25pt of div2] (log2) {$\log$};
 44 | \node[op, right=25pt of log1] (mult5) {$*$};
 45 | \node[op, right=25pt of log2] (mult6) {$*$};
 46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
 47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
 48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
 49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
 50 | 
 51 | 
 52 | 
 53 | %edges
 54 | \path[tedge] (w11) -- (mult1);
 55 | \path[tedge] (x1) -- (mult1);
 56 | \path[tedge] (w12) -- (mult2);
 57 | \path[tedge] (x2) -- (mult2);
 58 | \path[tedge] (w21) -- (mult3);
 59 | \path[tedge] (x11) -- (mult3);
 60 | \path[tedge] (w22) -- (mult4);
 61 | \path[tedge] (x22) -- (mult4);
 62 | 
 63 | \path[tedge] (mult1) -- (sum1);
 64 | \path[tedge] (mult2) -- (sum1);
 65 | \path[tedge] (mult3) -- (sum2);
 66 | \path[tedge] (mult4) -- (sum2);
 67 | \path[tedge] (sum1) -- (sum3);
 68 | \path[tedge] (b1) -- (sum3);
 69 | \path[tedge] (sum2) -- (sum4);
 70 | \path[tedge] (b2) -- (sum4);
 71 | 
 72 | \path[tedge] (sum3) -- (exp1);
 73 | \path[tedge] (sum4) -- (exp2);
 74 | \path[tedge] (exp1) -- (sum5);
 75 | \path[tedge] (exp2) -- (sum5);
 76 | \path[tedge] (exp1) -- (div1);
 77 | \path[tedge] (exp2) -- (div2);
 78 | \path[tedge] (sum5) -- (div1);
 79 | \path[tedge] (sum5) -- (div2);
 80 | 
 81 | 
 82 | \path[tedge] (div1) -- (log1);
 83 | \path[tedge] (div2) -- (log2);
 84 | \path[tedge] (log1) -- (mult5);
 85 | \path[tedge] (y1) -- (mult5);
 86 | \path[tedge] (log2) -- (mult6);
 87 | \path[tedge] (y2) -- (mult6);
 88 | \path[tedge] (mult5) -- (sum6);
 89 | \path[tedge] (mult6) -- (sum6);
 90 | \path[tedge] (sum6) -- (minus1);
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | \end{tikzpicture}
106 | } % scalebox
107 | \end{figure}
108 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_10.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[state, right=25pt of div1] (log1) {$-0.207$};
43 | \node[state, right=25pt of div2] (log2) {$-1.67$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[state, above left=25pt and 10pt of mult5] (y1) {$1$};
47 | \node[state, below left=25pt and 10pt of mult6] (y2) {$0$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_11.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[state, right=25pt of log1] (mult5) {$-0.207$};
45 | \node[state, right=25pt of log2] (mult6) {$0$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_12.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[state, below right=65pt and 15pt of mult5] (sum6) {$-0.207$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_13.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[state, right=25pt of sum6] (minus1) {$0.207$};
50 | 
51 | % gradient
52 | \visible<2->{\node[gradient, right=25pt of b1] (dLdb1) {$\frac{\partial L}{\partial b_1}$};}
53 | 
54 | 
55 | %edges
56 | \path[tedge] (w11) -- (mult1);
57 | \path[tedge] (x1) -- (mult1);
58 | \path[tedge] (w12) -- (mult2);
59 | \path[tedge] (x2) -- (mult2);
60 | \path[tedge] (w21) -- (mult3);
61 | \path[tedge] (x11) -- (mult3);
62 | \path[tedge] (w22) -- (mult4);
63 | \path[tedge] (x22) -- (mult4);
64 | 
65 | \path[tedge] (mult1) -- (sum1);
66 | \path[tedge] (mult2) -- (sum1);
67 | \path[tedge] (mult3) -- (sum2);
68 | \path[tedge] (mult4) -- (sum2);
69 | \path[tedge] (sum1) -- (sum3);
70 | \path[tedge] (b1) -- (sum3);
71 | \path[tedge] (sum2) -- (sum4);
72 | \path[tedge] (b2) -- (sum4);
73 | 
74 | \path[tedge] (sum3) -- (exp1);
75 | \path[tedge] (sum4) -- (exp2);
76 | \path[tedge] (exp1) -- (sum5);
77 | \path[tedge] (exp2) -- (sum5);
78 | \path[tedge] (exp1) -- (div1);
79 | \path[tedge] (exp2) -- (div2);
80 | \path[tedge] (sum5) -- (div1);
81 | \path[tedge] (sum5) -- (div2);
82 | 
83 | 
84 | \path[tedge] (div1) -- (log1);
85 | \path[tedge] (div2) -- (log2);
86 | \path[tedge] (log1) -- (mult5);
87 | \path[tedge] (y1) -- (mult5);
88 | \path[tedge] (log2) -- (mult6);
89 | \path[tedge] (y2) -- (mult6);
90 | \path[tedge] (mult5) -- (sum6);
91 | \path[tedge] (mult6) -- (sum6);
92 | \path[tedge] (sum6) -- (minus1);
93 | 
94 | \visible<2->{\path[tedge] (b1) -- (dLdb1);}
95 | 
96 | \end{tikzpicture}
97 | } % scalebox
98 | \end{figure}
99 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_14.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[state, right=25pt of sum6] (minus1) {$0.207$};
50 | 
51 | % gradient
52 | \node[gradient, right=25pt of b1] (dLdb1) {$-0.19$};
53 | 
54 | 
55 | %edges
56 | \path[tedge] (w11) -- (mult1);
57 | \path[tedge] (x1) -- (mult1);
58 | \path[tedge] (w12) -- (mult2);
59 | \path[tedge] (x2) -- (mult2);
60 | \path[tedge] (w21) -- (mult3);
61 | \path[tedge] (x11) -- (mult3);
62 | \path[tedge] (w22) -- (mult4);
63 | \path[tedge] (x22) -- (mult4);
64 | 
65 | \path[tedge] (mult1) -- (sum1);
66 | \path[tedge] (mult2) -- (sum1);
67 | \path[tedge] (mult3) -- (sum2);
68 | \path[tedge] (mult4) -- (sum2);
69 | \path[tedge] (sum1) -- (sum3);
70 | \path[tedge] (b1) -- (sum3);
71 | \path[tedge] (sum2) -- (sum4);
72 | \path[tedge] (b2) -- (sum4);
73 | 
74 | \path[tedge] (sum3) -- (exp1);
75 | \path[tedge] (sum4) -- (exp2);
76 | \path[tedge] (exp1) -- (sum5);
77 | \path[tedge] (exp2) -- (sum5);
78 | \path[tedge] (exp1) -- (div1);
79 | \path[tedge] (exp2) -- (div2);
80 | \path[tedge] (sum5) -- (div1);
81 | \path[tedge] (sum5) -- (div2);
82 | 
83 | 
84 | \path[tedge] (div1) -- (log1);
85 | \path[tedge] (div2) -- (log2);
86 | \path[tedge] (log1) -- (mult5);
87 | \path[tedge] (y1) -- (mult5);
88 | \path[tedge] (log2) -- (mult6);
89 | \path[tedge] (y2) -- (mult6);
90 | \path[tedge] (mult5) -- (sum6);
91 | \path[tedge] (mult6) -- (sum6);
92 | \path[tedge] (sum6) -- (minus1);
93 | 
94 | \path[tedge] (b1) -- (dLdb1);
95 | 
96 | \end{tikzpicture}
97 | } % scalebox
98 | \end{figure}
99 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_2.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[state, below right=1pt and 20pt of w11] (mult1) {$0.13$};
22 | \node[state, below right=1pt and 20pt of w12] (mult2) {$0.83$};
23 | \node[state, below right=1pt and 20pt of w21] (mult3) {$0.14$};
24 | \node[state, below right=1pt and 20pt of w22] (mult4) {$-0.64$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_3.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[state, below right=25pt and 20pt of mult1] (sum1) {$0.96$};
28 | \node[state, below right=25pt and 20pt of mult3] (sum2) {$-0.5$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_4.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[state, below right=25pt and 20pt of mult1] (sum1) {$0.96$};
28 | \node[state, below right=25pt and 20pt of mult3] (sum2) {$-0.5$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[state, above left=25pt and 10pt of sum3] (b1) {$0$};
32 | \node[state, below left=25pt and 10pt of sum4] (b2) {$0$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_5.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[state, right=25pt of sum1] (sum3) {$0.96$};
30 | \node[state, right=25pt of sum2] (sum4) {$-0.5$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_6.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[state, right=25pt of sum3] (exp1) {$2.61$};
36 | \node[state, right=25pt of sum4] (exp2) {$0.6$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_7.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[state, right=25pt of sum3] (exp1) {$2.61$};
36 | \node[state, right=25pt of sum4] (exp2) {$0.6$};
37 | \node[state, below right=65pt and 15pt of exp1] (sum5) {$3.22$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_8.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[state, right=35pt of exp1] (div1) {$0.81$};
39 | \node[state, right=35pt of exp2] (div2) {$0.19$};
40 | 
41 | % log
42 | \node[op, right=25pt of div1] (log1) {$\log$};
43 | \node[op, right=25pt of div2] (log2) {$\log$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/expanded_graph_9.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.6}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w11) {$w_{11}$};
11 | \node[op, below=10pt of w11] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w12) {$w_{12}$};
13 | \node[op, below=10pt of w12] (x2) {$x_{2}$};
14 | 
15 | \node[op, below=20pt of x2] (w21) {$w_{21}$};
16 | \node[op, below=10pt of w21] (x11) {$x_{1}$};
17 | \node[op, below=20pt of x11] (w22) {$w_{22}$};
18 | \node[op, below=10pt of w22] (x22) {$x_{2}$};
19 | 
20 | % multiplication
21 | \node[op, below right=1pt and 20pt of w11] (mult1) {$*$};
22 | \node[op, below right=1pt and 20pt of w12] (mult2) {$*$};
23 | \node[op, below right=1pt and 20pt of w21] (mult3) {$*$};
24 | \node[op, below right=1pt and 20pt of w22] (mult4) {$*$};
25 | 
26 | % sum
27 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
28 | \node[op, below right=25pt and 20pt of mult3] (sum2) {$+$};
29 | \node[op, right=25pt of sum1] (sum3) {$z_1$};
30 | \node[op, right=25pt of sum2] (sum4) {$z_2$};
31 | \node[op, above left=25pt and 10pt of sum3] (b1) {$b_1$};
32 | \node[op, below left=25pt and 10pt of sum4] (b2) {$b_2$};
33 | 
34 | % exp
35 | \node[op, right=25pt of sum3] (exp1) {$h_1$};
36 | \node[op, right=25pt of sum4] (exp2) {$h_2$};
37 | \node[op, below right=65pt and 15pt of exp1] (sum5) {$H$};
38 | \node[op, right=35pt of exp1] (div1) {$\hat{y}_1$};
39 | \node[op, right=35pt of exp2] (div2) {$\hat{y}_2$};
40 | 
41 | % log
42 | \node[state, right=25pt of div1] (log1) {$-0.207$};
43 | \node[state, right=25pt of div2] (log2) {$-1.67$};
44 | \node[op, right=25pt of log1] (mult5) {$*$};
45 | \node[op, right=25pt of log2] (mult6) {$*$};
46 | \node[op, above left=25pt and 10pt of mult5] (y1) {$y_1$};
47 | \node[op, below left=25pt and 10pt of mult6] (y2) {$y_2$};
48 | \node[op, below right=65pt and 15pt of mult5] (sum6) {$+$};
49 | \node[op, right=25pt of sum6] (minus1) {$*-1$};
50 | 
51 | 
52 | 
53 | %edges
54 | \path[tedge] (w11) -- (mult1);
55 | \path[tedge] (x1) -- (mult1);
56 | \path[tedge] (w12) -- (mult2);
57 | \path[tedge] (x2) -- (mult2);
58 | \path[tedge] (w21) -- (mult3);
59 | \path[tedge] (x11) -- (mult3);
60 | \path[tedge] (w22) -- (mult4);
61 | \path[tedge] (x22) -- (mult4);
62 | 
63 | \path[tedge] (mult1) -- (sum1);
64 | \path[tedge] (mult2) -- (sum1);
65 | \path[tedge] (mult3) -- (sum2);
66 | \path[tedge] (mult4) -- (sum2);
67 | \path[tedge] (sum1) -- (sum3);
68 | \path[tedge] (b1) -- (sum3);
69 | \path[tedge] (sum2) -- (sum4);
70 | \path[tedge] (b2) -- (sum4);
71 | 
72 | \path[tedge] (sum3) -- (exp1);
73 | \path[tedge] (sum4) -- (exp2);
74 | \path[tedge] (exp1) -- (sum5);
75 | \path[tedge] (exp2) -- (sum5);
76 | \path[tedge] (exp1) -- (div1);
77 | \path[tedge] (exp2) -- (div2);
78 | \path[tedge] (sum5) -- (div1);
79 | \path[tedge] (sum5) -- (div2);
80 | 
81 | 
82 | \path[tedge] (div1) -- (log1);
83 | \path[tedge] (div2) -- (log2);
84 | \path[tedge] (log1) -- (mult5);
85 | \path[tedge] (y1) -- (mult5);
86 | \path[tedge] (log2) -- (mult6);
87 | \path[tedge] (y2) -- (mult6);
88 | \path[tedge] (mult5) -- (sum6);
89 | \path[tedge] (mult6) -- (sum6);
90 | \path[tedge] (sum6) -- (minus1);
91 | 
92 | \end{tikzpicture}
93 | } % scalebox
94 | \end{figure}
95 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/fashionMNIST.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.05}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % nodes
10 | \node (shirt)
11 |     {\includegraphics[width=.15\textwidth]{images/shirt.png}};
12 | \node[textonly, below=1pt of shirt] (dimension0) {{\small$28\times 28$}};
13 | \node[textonly, above right= 10pt and 90pt of shirt] (vector) {$\begin{bmatrix}0.34\\ \vdots \\0.06\end{bmatrix}$};
14 | \node[textonly, above=1pt of vector] (x) {$\vect{x}$};
15 | \node[textonly, below=1pt of vector] (dimension1) {{\small$784\times 1$}};
16 | 
17 | \node[textonly, below=30pt of vector] (y) {$y=1$};
18 | 
19 | \node[textonly, below=30pt of y] (one-hot) {$\begin{bmatrix}1\\ \vdots \\0\end{bmatrix}$};
20 | 
21 | \node[textonly, above=1pt of one-hot] (yvector) {$\vect{y}$};
22 | \node[textonly, below=1pt of one-hot] (dimension2) {{\small$10\times 1$}};
23 | 
24 | 
25 | 
26 | % edges
27 | \path[tedge, orange!120, line width=1mm]  (shirt) -- (vector);
28 | \path[tedge, orange!120, line width=1mm] (shirt) -- (y);
29 | \path[tedge, orange!120, line width=1mm] (shirt) -- (one-hot);
30 | 
31 | 
32 | 
33 | \end{tikzpicture}
34 | } % scalebox
35 | \end{figure}
36 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/feature_engineering.tex:
--------------------------------------------------------------------------------
 1 | \begin{center}
 2 | \begin{tikzpicture}
 3 | \node (original) at (1, 6) 
 4 | 	{\includegraphics[width=.15\textwidth]{images/original.png}};
 5 | \node (segmented) at (5, 6)
 6 | 	{\includegraphics[width=.15\textwidth]{images/segmented.png}};
 7 | 	
 8 | \node (circled) at (0, 3)
 9 | 	{\includegraphics[width=.15\textwidth]{images/circled.png}};
10 | \node (rected) at (3, 3)
11 | 	{\includegraphics[width=.15\textwidth]{images/rected.png}};
12 | \node (ellipsed) at (6, 3)
13 | 	{\includegraphics[width=.15\textwidth]{images/ellipsed.png}};
14 | 	
15 | \draw[->, thick] (original.east) -- (segmented.west);
16 | \draw[->, thick] (segmented.south) -- (circled.north);
17 | \draw[->, thick] (segmented.south) -- (rected.north);
18 | \draw[->, thick] (segmented.south) -- (ellipsed.north);
19 | \draw[->, thick] (circled.south) -- (0.5, 0.7);
20 | \draw[->, thick] (rected.south) -- (3, 0.7);
21 | \draw[->, thick] (ellipsed.south) -- (5.5, 0.7);
22 | 
23 | \foreach \i in {0,...,12}
24 | {
25 | 	\pgfkeys{/pgf/number format/.cd,fixed,precision=0}
26 | 	\pgfmathsetmacro\myvalue{abs(rand) * 10}
27 | 	\draw[fill=blue!45!white] (0.5 * \i, 0) rectangle (0.5 + 0.5 * \i, 0.5) node[pos=.5]{\pgfmathprintnumber\myvalue};
28 | }
29 | \end{tikzpicture}
30 | \end{center}


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/feature_map.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.7}{
 5 |     \begin{tikzpicture}[scale=1.4,every node/.style={minimum size=1cm}, on grid]
 6 |             \draw[fill=cyan,opacity=1.2] (0,0) rectangle (3,3);
 7 |             \draw[draw=base03,thick] (0,0) grid (3,3);
 8 |             \node (00) at (0.5,2.5) {\large 12.0};
 9 |             \node (01) at (1.5,2.5) {\large 12.0};
10 |             \node (02) at (2.5,2.5) {\large 17.0};
11 |             \node (10) at (0.5,1.5) {\large 10.0};
12 |             \node (11) at (1.5,1.5) {\large 17.0};
13 |             \node (12) at (2.5,1.5) {\large 19.0};
14 |             \node (20) at (0.5,0.5) {\large 9.0};
15 |             \node (21) at (1.5,0.5) {\large 6.0};
16 |             \node (22) at (2.5,0.5) {\large 14.0};
17 |     \end{tikzpicture}
18 | } % scalebox
19 | \end{figure}
20 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/log.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.5}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (times) {$\log$};
 9 | \node[op, left=20pt of times] (a) {$a$};
10 | \node[gradient, above=30pt of a] (da) {$\frac{\partial f}{\partial a}$};
11 | \node[textonly, right=0.1pt of da] {$=\frac{1}{a}$};
12 | \node[textonly, right=0.1pt of times] {$=f(a)=\log(a)$};
13 | 
14 | % edges
15 | \path[tedge] (a) -- (times);
16 | \path[tedge] (a) -- (da);
17 | 
18 | \end{tikzpicture}
19 | } % scalebox
20 | \end{figure}
21 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/lr_graph.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.8}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w1) {$w_{1}$};
11 | \node[op, below=10pt of w1] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w2) {$w_{2}$};
13 | \node[op, below=10pt of w2] (x2) {$x_{2}$};
14 | 
15 | % multiplication
16 | \node[op, below right=1pt and 40pt of w1] (mult1) {$*$};
17 | \node[op, below right=1pt and 40pt of w2] (mult2) {$*$};
18 | % sum
19 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
20 | \node[op, right=65pt of sum1] (sum2) {$+$};
21 | \node[op, below=35pt of sum2] (minus) {$-1$};
22 | \node[op, left=25pt of minus] (y) {$y$};
23 | \node[op, right=45pt of sum2] (squ) {$squ$};
24 | 
25 | 
26 | %edges
27 | \path[tedge] (w1) -- (mult1);
28 | \path[tedge] (x1) -- (mult1);
29 | \path[tedge] (w2) -- (mult2);
30 | \path[tedge] (x2) -- (mult2);
31 | 
32 | \path[tedge] (mult1) -- (sum1);
33 | \path[tedge] (mult2) -- (sum1);
34 | \path[tedge] (sum1) -- (sum2);
35 | \path[tedge] (y) -- (minus);
36 | \path[tedge] (minus) -- (sum2);
37 | \path[tedge] (sum2) -- (squ);
38 | \end{tikzpicture}
39 | } % scalebox
40 | \end{figure}
41 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/lr_graph1.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.8}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op2] (w1) {$w_{1}$};
11 | \node[op, below=10pt of w1] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w2) {$w_{2}$};
13 | \node[op, below=10pt of w2] (x2) {$x_{2}$};
14 | 
15 | % multiplication
16 | \node[op2, below right=1pt and 40pt of w1] (mult1) {$*$};
17 | \node[op, below right=1pt and 40pt of w2] (mult2) {$*$};
18 | % sum
19 | \node[op2, below right=25pt and 20pt of mult1] (sum1) {$+$};
20 | \node[op2, right=65pt of sum1] (sum2) {$+$};
21 | \node[op, below=35pt of sum2] (minus) {$-1$};
22 | \node[op, left=25pt of minus] (y) {$y$};
23 | \node[op2, right=45pt of sum2] (squ) {$squ$};
24 | 
25 | 
26 | %edges
27 | \path[tedge] (w1) -- (mult1);
28 | \path[tedge] (x1) -- (mult1);
29 | \path[tedge] (w2) -- (mult2);
30 | \path[tedge] (x2) -- (mult2);
31 | 
32 | \path[tedge] (mult1) -- (sum1);
33 | \path[tedge] (mult2) -- (sum1);
34 | \path[tedge] (sum1) -- (sum2);
35 | \path[tedge] (y) -- (minus);
36 | \path[tedge] (minus) -- (sum2);
37 | \path[tedge] (sum2) -- (squ);
38 | \end{tikzpicture}
39 | } % scalebox
40 | \end{figure}
41 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/lr_graph_grad.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.7}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | 
 9 | % input x and W
10 | \node[op] (w1) {$w_{1}$};
11 | \node[op, below=10pt of w1] (x1) {$x_{1}$};
12 | \node[op, below=20pt of x1] (w2) {$w_{2}$};
13 | \node[op, below=10pt of w2] (x2) {$x_{2}$};
14 | 
15 | % multiplication
16 | \node[op, below right=1pt and 40pt of w1] (mult1) {$*$};
17 | \node[op, below right=1pt and 40pt of w2] (mult2) {$*$};
18 | % sum
19 | \node[op, below right=25pt and 20pt of mult1] (sum1) {$+$};
20 | \node[op, right=65pt of sum1] (sum2) {$+$};
21 | \node[op, below=35pt of sum2] (minus) {$-1$};
22 | \node[op, left=25pt of minus] (y) {$y$};
23 | \node[op, right=45pt of sum2] (squ) {$squ$};
24 | 
25 | 
26 | 
27 | %gradients 1
28 | \visible<3->{\node[gradient, above=10pt of sum2] (dsum2) {$2(\hat{y} - y)$};}
29 | \visible<5->{\node[gradient, above=10pt of sum1] (dsum1) {$1$};}
30 | \visible<7->{\node[gradient, above=10pt of mult1] (dmult1) {$1$};}
31 | \visible<9->{\node[gradient, above=10pt of w1] (dw1) {$x_1$};}
32 | 
33 | %gradients 2
34 | \visible<2->{\node[gradient2, above=35pt of squ] (dLdL) {$1$};}
35 | \visible<4->{\node[gradient2, above right=35pt and 15pt of dsum2] (dLdLpp) {$2(\hat{y} - y)$};}
36 | \visible<6->{\node[gradient2, above left=35pt and 15pt of dsum2] (dLdsum2) {$2(\hat{y} - y)$};}
37 | \visible<8->{\node[gradient2, above left=10pt and 25pt of dLdsum2] (dLdmult1) {$2(\hat{y} - y)$};}
38 | \visible<10->{\node[gradient2, left=25pt of dLdmult1] (dLdw1) {$2(\hat{y} - y)x_1$};}
39 | 
40 | 
41 | 
42 | 
43 | %edges
44 | \path[tedge] (w1) -- (mult1);
45 | \path[tedge] (x1) -- (mult1);
46 | \path[tedge] (w2) -- (mult2);
47 | \path[tedge] (x2) -- (mult2);
48 | 
49 | \path[tedge] (mult1) -- (sum1);
50 | \path[tedge] (mult2) -- (sum1);
51 | \path[tedge] (sum1) -- (sum2);
52 | \path[tedge] (y) -- (minus);
53 | \path[tedge] (minus) -- (sum2);
54 | \path[tedge] (sum2) -- (squ);
55 | 
56 | \visible<4->{\path[tedge] (dLdL) -- (dLdLpp);}
57 | \visible<4->{\path[tedge] (dsum2) -- (dLdLpp);}
58 | \visible<3->{\path[tedge] (sum2) -- (dsum2);}
59 | \visible<6->{\path[tedge] (dLdLpp) -- (dLdsum2);}
60 | 
61 | % \visible<4->{\path[tedge] (dsum2) -- (dLdsum2);}
62 | \visible<6->{\path[tedge] (dsum1) -- (dLdsum2);}
63 | \visible<8->{\path[tedge] (dLdsum2) -- (dLdmult1);}
64 | \visible<8->{\path[tedge] (dmult1) -- (dLdmult1);}
65 | \visible<10->{\path[tedge] (dLdmult1) -- (dLdw1);}
66 | \visible<10->{\path[tedge] (dw1) -- (dLdw1);}
67 | 
68 | 
69 | \end{tikzpicture}
70 | } % scalebox
71 | \end{figure}
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/minus1.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.5}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (times) {$*-1$};
 9 | \node[op, left=20pt of times] (a) {$a$};
10 | \node[gradient, above=30pt of a] (da) {$\frac{\partial f}{\partial a}$};
11 | \node[textonly, right=0.1pt of da] {$=-1$};
12 | \node[textonly, right=0.1pt of times] {$=f(a) =-a$};
13 | 
14 | % edges
15 | \path[tedge] (a) -- (times);
16 | 
17 | \end{tikzpicture}
18 | } % scalebox
19 | \end{figure}
20 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/mult.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.2}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (times) {$\times$};
 9 | \node[op, above left=20pt of times] (a) {$a$};
10 | \node[op, below left=20pt of times] (b) {$b$};
11 | \node[gradient, above left=15pt and 20pt of a] (da) {$\frac{\partial f}{\partial a}$};
12 | \node[gradient, below left=15pt and 20pt of b] (db) {$\frac{\partial f}{\partial b}$};
13 | \node[textonly, right=0.1pt of da] {$=b$};
14 | \node[textonly, right=0.1pt of db] {$=a$};
15 | \node[textonly, right=0.1pt of times] {$=f(a,b) =a*b$};
16 | 
17 | % edges
18 | \path[tedge] (a) -- (times);
19 | \path[tedge] (b) -- (times);
20 | \path[tedge] (a) -- (db);
21 | \path[tedge] (b) -- (da);
22 | 
23 | 
24 | \end{tikzpicture}
25 | } % scalebox
26 | \end{figure}
27 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/multiple_paths.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.2}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (div) {$div$};
 9 | \node[textonly, right=0.1pt of div] {$=f(x,y)=\frac{x}{y}=\frac{a*c}{a+b}$};
10 | \node[op, above left=20pt of div] (mult) {$*$};
11 | \node[op, below left=20pt of div] (plus) {$+$};
12 | \node[op, below=20pt of plus] (b2) {$b$};
13 | \node[op, above=20pt of mult] (b1) {$c$};
14 | \node[op, left=55pt of div] (a) {$a$};
15 | \node[textonly, above right=5pt and 5pt of div] (inv1) {$\frac{\partial f}{\partial x} \frac{\partial x}{\partial a}$};
16 | \node[textonly, above left=2pt and 2pt of a] (inv2) {};
17 | \node[textonly, below right=5pt and 5pt of div] (inv3) {$\frac{\partial f}{\partial y} \frac{\partial y}{\partial a}$};
18 | \node[textonly, below left=2pt and 2pt of a] (inv4) {};
19 | 
20 | 
21 | % edges
22 | \path[tedge] (a) -- (mult);
23 | \path[tedge] (b1) -- (mult);
24 | \path[tedge] (a) -- (plus);
25 | \path[tedge] (b2) -- (plus);
26 | \path[tedge] (plus) -- (div);
27 | \path[tedge] (mult) -- (div);
28 | \path[tedge, nephritis!60, line width=1mm] (inv1) to [out=120,in=80] (inv2);
29 | \path[tedge, nephritis!60, line width=1mm] (inv3) to [out=-120,in=-80] (inv4);
30 | 
31 | 
32 | 
33 | \end{tikzpicture}
34 | } % scalebox
35 | \end{figure}
36 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/perceptron.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.15}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (x2) {$x_2$};
 9 | \node[op, above=40pt of x2] (x1) {$x_1$};
10 | \node[op, below=40pt of x2] (x3) {$x_3$};
11 | \node[op, right=60pt of x2] (vk) {$v$};
12 | \node[op, right=40pt of vk] (yk) {$\hat{y}$};
13 | \node[textonly, below right=20pt of x3] (Synaptic) {Synaptic link};
14 | \node[textonly, below right=50pt of vk] (Activation) {Activation link};
15 | \node[textonly, above right=28pt and -46pt of yk] (f) {$\hat{y} = f(\vect{x};\vect{\theta})= g\left(\sum_{i=1}^{3} \theta_ix_i\right)$};
16 | 
17 | 
18 | % edges
19 | \path[tedge] (x1) edge node[above=1.8pt] {$\theta_{1}$} (vk);
20 | \path[tedge] (x2) edge node[above=0.2pt] {$\theta_{2}$} (vk);
21 | \path[tedge] (x3) edge node[above=3.8pt] {$\theta_{3}$} (vk);
22 | \path[tedge] (vk) edge node[above=1pt] {{\Large$g$}}  (yk) ;
23 | 
24 | % info edges
25 | \draw[orange!120, line width=1mm]  (Synaptic) to [out=150,in=0] (x3);
26 | \draw[orange!120, line width=1mm] (Synaptic) to [out=150,in=-100] (vk);
27 | 
28 | \draw[orange!120, line width=1mm]  (Activation) to [out=170,in=-40] (vk);
29 | \draw[orange!120, line width=1mm] (Activation) to [out=170,in=-100] (yk);
30 | 
31 | 
32 | 
33 | \end{tikzpicture}
34 | } % scalebox
35 | \end{figure}
36 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/simple_example.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.0}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (z) {$z$};
 9 | \node[op, above left=50pt of z] (x) {$x$};
10 | \node[op, below left=40pt of z] (y) {$y$};
11 | \node[op, right=40pt of z] (u) {$u$};
12 | 
13 | 
14 | % edges =============================
15 | \path[tedge] (x) -- (z);
16 | \path[tedge] (y) -- (z);
17 | \path[tedge] (z) -- (u);
18 | \end{tikzpicture}
19 | } % scalebox
20 | \end{figure}
21 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/simple_example0.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.0}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (x) {$x$};
 9 | \node[op, right=40pt of x] (y) {$y$};
10 | \node[op, right=40pt of y] (u) {$u$};
11 | 
12 | 
13 | % edges =============================
14 | \path[tedge] (x) -- (y);
15 | \path[tedge] (y) -- (u);
16 | \end{tikzpicture}
17 | } % scalebox
18 | \end{figure}
19 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/simple_exampleCR.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{0.7}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (x) {$x$};
 9 | \node[op, right=40pt of x] (y) {$y$};
10 | \node[op, right=40pt of y] (u) {$u$};
11 | 
12 | 
13 | % edges =============================
14 | \path[tedge] (x) -- (y);
15 | \path[tedge] (y) -- (u);
16 | \end{tikzpicture}
17 | } % scalebox
18 | \end{figure}
19 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/soma.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.2}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (times) {$+$};
 9 | \node[op, above left=20pt of times] (a) {$a$};
10 | \node[op, below left=20pt of times] (b) {$b$};
11 | \node[gradient, above=15pt of a] (da) {$\frac{\partial f}{\partial a}$};
12 | \node[gradient, below=15pt of b] (db) {$\frac{\partial f}{\partial b}$};
13 | \node[textonly, right=0.1pt of da] {$=1$};
14 | \node[textonly, right=0.1pt of db] {$=1$};
15 | \node[textonly, right=0.1pt of times] {$=f(a,b) = a+b$};
16 | 
17 | % edges
18 | \path[tedge] (a) -- (times);
19 | \path[tedge] (b) -- (times);
20 | 
21 | 
22 | \end{tikzpicture}
23 | } % scalebox
24 | \end{figure}
25 | 


--------------------------------------------------------------------------------
/slides/backprop1/TikzFiles/squ.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[ht!]
 2 | \centering
 3 | 
 4 | \scalebox{1.5}{
 5 | \begin{tikzpicture}[auto]
 6 | 
 7 | % operations =============================
 8 | \node[op] (times) {$squ$};
 9 | \node[op, left=20pt of times] (a) {$a$};
10 | \node[gradient, above=30pt of a] (da) {$\frac{\partial f}{\partial a}$};
11 | \node[textonly, right=0.1pt of da] {$=2a$};
12 | \node[textonly, right=0.1pt of times] {$=f(a)=a^2$};
13 | 
14 | % edges
15 | \path[tedge] (a) -- (times);
16 | \path[tedge] (a) -- (da);
17 | 
18 | \end{tikzpicture}
19 | } % scalebox
20 | \end{figure}
21 | 


--------------------------------------------------------------------------------
/slides/backprop1/all_imports.tex:
--------------------------------------------------------------------------------
 1 | \usepackage[utf8]{inputenc}
 2 | \usepackage[T1]{fontenc}
 3 | \usepackage{lmodern}
 4 | \usepackage{appendixnumberbeamer}
 5 | \usepackage{hyperref}
 6 | \usepackage{booktabs}
 7 | \usepackage{bm}
 8 | \usepackage[scale=2]{ccicons}
 9 | \usepackage[outputdir=build]{minted}
10 | \usepackage{pgfplots}
11 | \usepackage{array,colortbl,xcolor}
12 | \usepgfplotslibrary{dateplot}
13 | \usepackage{setspace}
14 | \usepackage{etoolbox}
15 | \usepackage{xspace}
16 | \usepackage{tikz}
17 | \usetikzlibrary{shapes,arrows,positioning,fit,backgrounds}
18 | \usepackage{tkz-euclide}
19 | \usepackage{soul}
20 | \usepackage{ragged2e}
21 | \usepackage{algorithm,algorithmic}
22 | 


--------------------------------------------------------------------------------
/slides/backprop1/all_new_commands.tex:
--------------------------------------------------------------------------------
 1 | \newcommand{\themename}{\textbf{\textsc{metropolis}}\xspace}
 2 | \newcommand{\vect}[1]{\bm{#1}}
 3 | \newcommand{\myprime}[1]{{#1}^{\prime}}
 4 | \newcommand{\grad}[2]{\nabla_{#1} {#2}}
 5 | \newcommand{\dotp}[2]{{#1}^{\top}{#2}}
 6 | \newcommand{\dotpPright}[2]{{#1}^{\top}\left({#2}\right)}
 7 | \newcommand{\outerp}[2]{\left({#1}\right){#2}^{\top}}
 8 | \newcommand{\Jacobian}[2]{\frac{\partial #1}{\partial #2}}
 9 | \newcommand{\Vocab}{\mathbb{V}}
10 | \DeclareMathOperator*{\argmin}{arg\,min}
11 | \DeclareMathOperator*{\argmax}{arg\,max}
12 | \DeclareMathOperator{\E}{\mathbb{E}}
13 | 
14 | 
15 | % Quote with author reference at the end
16 | \let\oldquote\quote
17 | \let\endoldquote\endquote
18 | \renewenvironment{quote}[2][]
19 |   {\if\relax\detokenize{#1}\relax
20 |      \def\quoteauthor{#2}%
21 |    \else
22 |      \def\quoteauthor{#2~---~#1}%
23 |    \fi
24 |    \oldquote}
25 |   {\par\nobreak\smallskip\hfill(\quoteauthor)%
26 |    \endoldquote\addvspace{\bigskipamount}}
27 | %-----------------------------------------   
28 | 


--------------------------------------------------------------------------------
/slides/backprop1/definitions/colors.tex:
--------------------------------------------------------------------------------
 1 | \definecolor{blue}{RGB}{159, 192, 176}
 2 | \definecolor{blue2}{RGB}{38,139,210}
 3 | \definecolor{green}{RGB}{160, 227, 127}
 4 | \definecolor{green2}{RGB}{132, 164, 76}
 5 | \definecolor{orange}{RGB}{243, 188, 125}
 6 | \definecolor{red}{RGB}{253, 123, 84}
 7 | \definecolor{nephritis}{RGB}{39, 174, 96}
 8 | \definecolor{emerald}{RGB}{46, 204, 113}
 9 | \definecolor{turquoise}{RGB}{39, 174, 96}
10 | \definecolor{green-sea}{RGB}{22, 160, 133}
11 | \definecolor{base02}{RGB}{7,54,66}
12 | \definecolor{base03}{RGB}{0,43,54}
13 | \definecolor{cyan}{RGB}{42,161,152}


--------------------------------------------------------------------------------
/slides/backprop1/definitions/styles.tex:
--------------------------------------------------------------------------------
 1 | % Tikzstyles for Computation Graphs
 2 | 
 3 | % nodes
 4 | \tikzstyle{noop} = [circle, draw=none, fill=red, minimum size = 10pt]
 5 | \tikzstyle{op} = [circle, draw=red, line width=1.5pt, fill=red!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt]
 6 | \tikzstyle{op2} = [circle, draw=orange, line width=1.5pt, fill=orange!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt]
 7 | \tikzstyle{op3} = [circle, draw=orange, line width=1.5pt, fill=orange!70, text=black, text centered, font=\bf \scriptsize, minimum size = 7pt]
 8 | \tikzstyle{placeholder} = [circle, draw=red, line width=1.5pt, fill=red!30, text=black, text centered, font=\bf  \normalsize, minimum size = 25pt]
 9 | \tikzstyle{state} = [circle, draw=blue, line width=1.5pt, fill=blue!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt]
10 | \tikzstyle{gradient} = [circle, draw=nephritis, line width=1.5pt, fill=nephritis!60, text=black, text centered, font=\bf \normalsize, minimum size = 25pt]
11 | \tikzstyle{gradient2} = [circle, draw=green2, line width=1.5pt, fill=green2!60, text=black, text centered, font=\bf \normalsize, minimum size = 25pt]
12 | \tikzstyle{textonly} = [draw=none, fill=none, text centered, font=\bf \normalsize]
13 | 
14 | % edges
15 | % \tikzstyle{tedge}  = [draw, thick, >=stealth, ->]
16 | \tikzstyle{tedge}  = [draw, thick, >=latex, ->]
17 | \tikzstyle{tedge_dashed}  = [draw, thick, >=latex, ->, dashed]
18 | 
19 | % namedscope
20 | \tikzstyle{namedscope} = [circle, draw=orange, line width=1.5pt, fill=orange!60, align=center, inner sep=0pt]
21 | 
22 | % \tikzstyle{container} = [draw=none, rectangle, dotted, inner ysep=1.5em]
23 | % \tikzstyle{novertex} = [draw=none, fill=none, text centered]
24 | % \tikzstyle{predicate} = [ellipse, draw, thick, text centered, rounded corners, minimum size=30pt]
25 | % \tikzstyle{aux} = [rectangle, draw, thick, text centered, rounded corners, minimum size=30pt]
26 | % \tikzstyle{ledge}  = [draw, dashed, thick, >=stealth, ->]
27 | % \tikzstyle{pedge}  = [draw, thick, >=stealth, ->]
28 | 


--------------------------------------------------------------------------------
/slides/backprop1/header.tex:
--------------------------------------------------------------------------------
 1 | \title{MAC0460 - Introdução ao aprendizado de máquina \\ \vspace{0.2 cm} Back-propagation 1} 
 2 | \date{\today}
 3 | 
 4 | \author{
 5 |   Felipe Salvatore\\
 6 |   \url{https://felipessalvatore.github.io/}\vspace{0.4 cm}
 7 |   \and\\ 
 8 |   Nina S. T. Hirata\\
 9 |   \url{https://www.ime.usp.br/~nina/}
10 |   \vspace{0.4 cm}
11 | }
12 | 
13 | \institute{\textbf{IME-USP}: Institute of Mathematics and Statistics, University of São Paulo}
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/slides/backprop1/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/slides/backprop1/images/logo.png


--------------------------------------------------------------------------------
/slides/backprop1/main.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[10pt]{beamer}
  2 | \usetheme{metropolis}
  3 | % all imports
  4 | \input{all_imports}
  5 | 
  6 | \AtBeginEnvironment{quote}{\singlespacing}
  7 | 
  8 | % new commands
  9 | \input{all_new_commands}
 10 | 
 11 | % definitions
 12 | \input{definitions/colors}
 13 | \input{definitions/styles}
 14 | 
 15 | \input{header}
 16 | 
 17 | \begin{document}
 18 | \nocite{DeepLearningbook}
 19 | 
 20 | \maketitle
 21 | 
 22 | \section{Revisão: regressão logística}
 23 | 
 24 | \begin{frame}{O problema de classificação}
 25 | 
 26 | \begin{itemize}
 27 | \item [] Em vários casos a função desconhecida $f:\mathbb{R}^{d} \rightarrow \mathbb{R}$ que queremos aproximar é uma \textbf{distribuição de probabilidade}.
 28 | \vspace{0.3cm}
 29 | \item[] Temos um vetor $\vect{x}$ e queremos saber a qual das classes $k_1, \dots, k_n$ ele pertence. Um modo de formular esse problema como um problema de apreendizado supervisionado é coletar um conjunto de dados $(\vect{x}_{1}, y_{1}), \dots ,(\vect{x}_{N}, y_{N})$ onde $y_i \in \{k_1, \dots, k_n\}$ e tentar estimar $p(y  | \vect{x})$ por meio de uma família de modelos $p(y | \vect{x}; \vect{\theta})$.
 30 | \end{itemize}
 31 | \end{frame}
 32 | 
 33 | 
 34 | \begin{frame}{Classificação com duas classes}
 35 | Quando $y$ é uma variável binária definimos o modelo $p(y | \vect{x}; \vect{\theta})$ do seguinte modo:
 36 | \Large{
 37 | \begin{align*}
 38 | \hat{y} &= p(y=1| \vect{x}; \vect{\theta})\\
 39 | &= h(\vect{x}; \vect{\theta}) \\
 40 | &= \sigma(z)\\
 41 | \end{align*}
 42 | }
 43 | em que 
 44 | \begin{equation*}
 45 | z = \dotp{\vect{w}}{\vect{x}} + b
 46 | \end{equation*}
 47 | 
 48 | \end{frame}
 49 | 
 50 | \begin{frame}[fragile]{Revisão: função sigmoide}
 51 | \input{TikzFiles/Sigmoid}
 52 | \end{frame}
 53 | 
 54 | \begin{frame}{Classificação}
 55 | \input{TikzFiles/DFNclassification2}
 56 | \end{frame}
 57 | 
 58 | \begin{frame}{Classificação para várias classes}
 59 | E quando $y$ é uma variável com $n$ valores definimos $p(y | \vect{x}; \vect{\theta})$ do seguinte modo:
 60 | \Large{
 61 | \begin{align*}
 62 | \hat{\vect{y}} &= p(y| \vect{x}; \vect{\theta})\\
 63 | &= h(\vect{x}; \vect{\theta}) \\
 64 | &= softmax(\vect{z})\\
 65 | \end{align*}
 66 | }
 67 | em que 
 68 | \begin{equation*}
 69 | \vect{z} = \vect{W}\vect{x} + \vect{b}
 70 | \end{equation*}
 71 | 
 72 | \end{frame}
 73 | 
 74 | \begin{frame}[fragile]{Revisão: função softmax}
 75 | \input{TikzFiles/Softmax}
 76 | \end{frame}
 77 | 
 78 | \begin{frame}{Classificação}
 79 | \input{TikzFiles/DFNclassification}
 80 | \end{frame}
 81 | 
 82 | 
 83 | \begin{frame}{Princípio da máxima verossimilhança}
 84 | Os parâmetros $\vect{\theta}$ vão ser adaptados de modo que  $p(y| \vect{x};\vect{\theta})$ seja a distribuição mais adequada para os dados
 85 | \begin{equation*}
 86 | (\vect{x}^{(1)},y^{(1)}), \dots, (\vect{x}^{(N)},y^{(N)})
 87 | \end{equation*}
 88 | \end{frame}
 89 | 
 90 | \begin{frame}{Classificação}
 91 | A função que queremos maximizar é
 92 | \Large{
 93 | \begin{align*}
 94 | \mathcal{L}(\vect{\theta}) &= \E_{\vect{x},y \sim p_{data}} \log p(y| \vect{x}; \vect{\theta})\\
 95 | &= \frac{1}{N}\sum_{i=1}^{N}\log p(y^{(i)}| \vect{x}^{(i)}; \vect{\theta})\\
 96 | \end{align*}
 97 | }
 98 | \end{frame}
 99 | 
100 | \begin{frame}{Revisão: entropia}
101 | \input{TikzFiles/Entropy1}
102 | \end{frame}
103 | 
104 | \begin{frame}{Revisão: entropia}
105 | \input{TikzFiles/Entropy2}
106 | \end{frame}
107 | 
108 | \begin{frame}{Revisão: divergência Kullback-Leibler}
109 | \input{TikzFiles/KullbackLeibler}
110 | \end{frame}
111 | 
112 | \begin{frame}{Revisão: entropia cruzada}
113 | \Large{
114 | \begin{align*}
115 | CE(\vect{p},\vect{q}) &= H(\vect{p}) + D_{KL}(\vect{p}||\vect{q})\\
116 | \vspace{0.2cm}
117 | &= -\sum_{i}\vect{p}_{i}\log(\vect{q}_{i})
118 | \end{align*}
119 | }
120 | \vspace{0.2cm}
121 | \begin{equation*}
122 | \argmin_{\vect{q}} CE(\vect{p},\vect{q}) =  \argmin_{\vect{q}} D_{KL}(\vect{p},\vect{q})
123 | \end{equation*}
124 | \end{frame}
125 | 
126 | \begin{frame}[fragile]{Entropia cruzada e verossimilhança}
127 | 
128 | Assumindo que $\vect{y}$ é one-hot temos que: 
129 | 
130 | \Large{
131 | \begin{align*}
132 | L(\vect{y}^{(i)}, {\hat{\vect{y}}}^{(i)}) &= CE(\vect{y}^{(i)}, {\hat{\vect{y}}}^{(i)})\\
133 | &= -\sum_{k=1}^{n} \vect{y}^{(i)}_{k}\log p(y=k| \vect{x}^{(i)}; \vect{\theta})\\
134 | &= - \log p(y^{(i)}| \vect{x}^{(i)}; \vect{\theta})\\
135 | \end{align*}
136 | }
137 | \end{frame}
138 | 
139 | \begin{frame}{Entropia cruzada e verossimilhança}
140 | E a função que queremos minimizar é
141 | \Large{
142 | \begin{align*}
143 | J(\vect{\theta}) &= \frac{1}{N}\sum_{i=1}^{N} L(\vect{y}^{(i)}, {\hat{\vect{y}}}^{(i)})\\
144 | &= - \frac{1}{N}\sum_{i=1}^{N}\log p(y^{(i)}| \vect{x}^{(i)}; \vect{\theta})\\
145 | &= - \mathcal{L}(\vect{\theta})
146 | \end{align*}
147 | 
148 | \vspace{0.2cm}
149 | \begin{equation*}
150 | \argmax_{\vect{\theta}} \mathcal{L}(\vect{\theta}) =  \argmin_{\vect{\theta}} J(\vect{\theta})
151 | \end{equation*}
152 | }
153 | \end{frame}
154 | 
155 | \begin{frame}{Treinando um modelo}
156 | \Large{
157 | \begin{itemize}
158 | \item $\hat{\vect{y}} = f(\vect{x}; \vect{\theta})$ 
159 | \item $J(\vect{\theta}) =  \frac{1}{m}\sum_{i=1}^{m} L(\vect{y}^{(i)}, {\hat{\vect{y}}}^{(i)})$ 
160 | \item \text{algum algoritmo de otimização (e.g., \textbf{SGD})}:
161 | \begin{equation*}
162 | \vect{\theta}^{novo}  \leftarrow \vect{\theta}^{velho} - \eta \grad{\vect{\theta}}{J(\vect{\theta})}
163 | \end{equation*}
164 | \vspace{0.3cm}
165 | \end{itemize}
166 | }
167 | 
168 | Vamos ver como computar $\grad{\vect{\theta}}{J(\vect{\theta})}$ de modo eficiente para uma função arbitrária $J$.
169 | 
170 | \end{frame}
171 | 
172 | \section{Grafo de computação (caso escalar)}
173 | 
174 | \begin{frame}{Grafo de computação}
175 | 
176 | Considere os seguintes  conjuntos de funções:
177 | \Large{
178 | \begin{itemize}
179 | \item $OP_1 = \{ \lambda x. -x, \lambda x. x^2,  \lambda x. e^x, \lambda x. log(x), \lambda x. x \}$
180 | \item $OP_2 = \{ \lambda xy. x + y, \lambda  xy. x * y, \lambda xy. \frac{x}{y} \}$
181 | \item $OP = OP_1 \cup OP_2$
182 | \end{itemize}
183 | }
184 | \end{frame}
185 | 
186 | \begin{frame}{Grafo de computação}
187 | Um grafo de computação definido em $OP$ $\mathcal{G} = (\mathcal{V}, \mathcal{E}_1, \mathcal{E}_2)$ é um grafo acíclico dirigido (DAG) tal que cada elemento $u \in \mathcal{V}$ indica uma variável, se $(x,y) \in \mathcal{E}_1$ então $f(x)=y$ onde $f \in OP_1 \cup \{g(x,\alpha) | \alpha \in \mathbb{R} , g \in OP_2\}$, e se $(x,y) \in \mathcal{E}_2$ então $f(x)=y$ onde $f \in \{g(\alpha, x) | \alpha \in \mathbb{R} , g \in OP_2\}$.
188 | 
189 | \vspace{0.3cm}
190 | \begin{itemize}
191 | \item $Pa(x) = \{y \in \mathcal{V} | (y,x) \in \mathcal{E}_1 \cup \mathcal{E}_2 \}$.
192 | \item $S(x) = \{y \in \mathcal{V} | (x,y) \in \mathcal{E}_1 \cup \mathcal{E}_2 \}$.
193 | \end{itemize} 
194 | 
195 | \end{frame}
196 | 
197 | 
198 | \begin{frame}{Grafo de computação}
199 | 
200 | \input{TikzFiles/simple_example0}
201 | \Large{
202 | \begin{itemize}
203 | \item $y  = x^2$
204 | \item $u = e^{y}$
205 | \end{itemize}
206 | }
207 | \end{frame}
208 | 
209 | \begin{frame}{Grafo de computação}
210 | 
211 | \input{TikzFiles/simple_example}
212 | \Large{
213 | \begin{itemize}
214 | \item $z  = x + y$
215 | \item $u = \log(z)$
216 | \end{itemize}
217 | }
218 | 
219 | \end{frame}
220 | 
221 | \begin{frame}{Grafo de computação}
222 | \Large{
223 | Queremos representar uma função $L$ por um grafo definido em $OP$ pois as derivadas parciais das funções de $OP$ são simples de calcular. E com a \alert{a regra da cadeia} podemos combinar as derivadas das funções locais para obter a derivada parcial de $L$ com respeito a quaisquer parâmetros.
224 | }
225 | \end{frame}
226 | 
227 | \begin{frame}{Grafo de computação}
228 | \Large{
229 | Como todas as funções em $OP$ são diferenciáveis, podemos extender $\mathcal{G}$ em $\mathcal{G}^{\prime}$ adicionando todas as derivadas parciais dos filhos em relação aos pais junto com as respectivas dependências. 
230 | }
231 | \end{frame}
232 | 
233 | 
234 | \begin{frame}{Extendendo o grafo de operações básicas: soma}
235 | \input{TikzFiles/soma}
236 | \end{frame}
237 | 
238 | \begin{frame}{Extendendo o grafo de operações básicas: multiplicação}
239 | \input{TikzFiles/mult}
240 | \end{frame}
241 | 
242 | \begin{frame}{Extendendo o grafo de operações básicas: divisão}
243 | \input{TikzFiles/div}
244 | \end{frame}
245 | 
246 | \begin{frame}{Extendendo o grafo de operações básicas: negativo}
247 | \input{TikzFiles/minus1}
248 | \end{frame}
249 | 
250 | \begin{frame}{Extendendo o grafo de operações básicas: exponenciação}
251 | \input{TikzFiles/exp}
252 | \end{frame}
253 | 
254 | \begin{frame}{Extendendo o grafo de operações básicas: logarítimo}
255 | \input{TikzFiles/log}
256 | \end{frame}
257 | 
258 | \begin{frame}{Extendendo o grafo de operações básicas: ao quadrado}
259 | \input{TikzFiles/squ}
260 | \end{frame}
261 | 
262 | 
263 | \begin{frame}{Regra da cadeia}
264 | \Large{
265 | \begin{itemize}
266 | \item $f:\mathbb{R} \rightarrow\mathbb{R}$, $g:\mathbb{R} \rightarrow\mathbb{R}$. 
267 | \item $y = g(x)$
268 | \item $u = f(g(x)) = f(y)$
269 | 
270 | \end{itemize}
271 | 
272 | \input{TikzFiles/simple_exampleCR}
273 | 
274 | \[
275 | \frac{\partial u}{\partial x} = \frac{\partial u}{\partial y} \frac{\partial y}{\partial x} 
276 | \]
277 | }
278 | \end{frame}
279 | 
280 | \begin{frame}{Aplicando a regra da cadeia}
281 | \input{TikzFiles/chain_rule_nodes}
282 | \Large{
283 | \begin{itemize}
284 | \item $\frac{\partial u_{n}}{\partial u_{j}} = \frac{\partial u_{n}}{\partial u_{j+1}} \frac{\partial u_{j+1}}{\partial u_{j}}$
285 | \end{itemize}
286 | }
287 | 
288 | \end{frame}
289 | 
290 | \begin{frame}{Exemplo 1: regressão linear}
291 | \Large{
292 | \begin{align*}
293 | J(\vect{w}) & = \frac{1}{N}\sum_{i=1}^{N}L(y_{i}, \hat{y}_{i})\\
294 |             & = \frac{1}{N}\sum_{i=1}^{N}(\hat{y}_{i} - y_{i})^{2}\\
295 |             & = \frac{1}{N}\sum_{i=1}^{N}(\vect{w}^\top\vect{x}_{i} - y_{i})^{2}\\
296 | \end{align*}
297 | }
298 | \end{frame}
299 | 
300 | \begin{frame}{Simplificação}
301 | \Large{
302 | \begin{itemize}
303 | \item $\vect{w} = \begin{bmatrix}w_{1}  \\ w_{2}\end{bmatrix}$
304 | 
305 | \vspace{0.8cm}
306 | 
307 | \item $\vect{x} = \begin{bmatrix}x_1 \\ x_2\end{bmatrix}$ 
308 | 
309 | \end{itemize}
310 | }
311 | \end{frame}
312 | 
313 | 
314 | 
315 | \begin{frame}{Grafo de $L(\hat{y}, y)$}
316 | \input{TikzFiles/lr_graph}
317 | \end{frame}
318 | 
319 | 
320 | \begin{frame}{Caminho de $w_1$}
321 | \input{TikzFiles/lr_graph1}
322 | \end{frame}
323 | 
324 | \begin{frame}{Derivade de $L$ em relação a $w_1$}
325 | \input{TikzFiles/lr_graph_grad}
326 | \end{frame}
327 | 
328 | 
329 | \begin{frame}{Regra da cadeia para várias variáveis}
330 | \Large{
331 | \begin{itemize}
332 | \item $z = f(x,y)$
333 | \item $x = f_{1}(a)$. 
334 | \item $y = f_{2}(a)$
335 | \end{itemize}
336 | \[
337 | \frac{\partial z}{ \partial a} = \frac{\partial z}{\partial x} \frac{\partial x}{\partial a} + \frac{\partial z}{\partial y} \frac{\partial y}{\partial a} 
338 | \]
339 | }
340 | \end{frame}
341 | 
342 | 
343 | \begin{frame}{Exemplo}
344 | \input{TikzFiles/multiple_paths}
345 | \end{frame}
346 | 
347 | \begin{frame}{Exemplo 2: regressão logística}
348 | \Large{
349 | \begin{equation*}
350 | \hat{\vect{y}} = softmax(\vect{W}\vect{x} + \vect{b})
351 | \end{equation*}
352 | \begin{equation*}
353 | L(\vect{y},\hat{\vect{y}}) = CE(\vect{y},\hat{\vect{y}})
354 | \end{equation*}
355 | 
356 | 
357 | \begin{equation*}
358 | L(\vect{y},\hat{\vect{y}}) = - \sum_{i}\vect{y}_{i} \log \left(\frac{exp(\sum_{k} \vect{W}_{i,k}\vect{x}_{k} + \vect{b}_{i})}{\sum_{j}exp(\sum_{k}\vect{W}_{j,k}\vect{x}_{k} + \vect{b}_{j})} \right)
359 | \end{equation*}
360 | }
361 | \end{frame}
362 | 
363 | \begin{frame}{Simplificação}
364 | \Large{
365 | \begin{itemize}
366 | \item $\begin{bmatrix}z_1\\z_2\end{bmatrix} = \begin{bmatrix}w_{11}  & w_{12}\\w_{21}  & w_{22}\end{bmatrix}* \begin{bmatrix}x_1\\x_2\end{bmatrix} + \begin{bmatrix}b_1\\b_2\end{bmatrix}$
367 | 
368 | \vspace{0.4cm}
369 | 
370 | \item $\begin{bmatrix}h_1\\h_2\end{bmatrix} = \begin{bmatrix}exp(z_1)\\exp(z_2)\end{bmatrix}$ 
371 | 
372 | \vspace{0.4cm}
373 | 
374 | \item $H = h_1 + h_2$ 
375 | 
376 | \vspace{0.4cm}
377 | 
378 | \item $\begin{bmatrix}\hat{y}_1\\\hat{y}_2\end{bmatrix} = \begin{bmatrix}\frac{h_1}{H}\\\frac{h_2}{H}\end{bmatrix}$ 
379 | \end{itemize}
380 | }
381 | \end{frame}
382 | 
383 | 
384 | 
385 | \begin{frame}{Grafo de $L(\hat{\vect{y}}, \vect{y})$}
386 | \input{TikzFiles/expanded_graph_0}
387 | \end{frame}
388 | 
389 | \begin{frame}{Caminho de $b_1$: 1}
390 | \input{TikzFiles/b1_path1}
391 | \end{frame}
392 | 
393 | \begin{frame}{Caminho de $b_1$: 2}
394 | \input{TikzFiles/b1_path2}
395 | \end{frame}
396 | 
397 | \begin{frame}{Caminho de $b_1$: 3}
398 | \input{TikzFiles/b1_path3}
399 | \end{frame}
400 | 
401 | \begin{frame}{Derivada parcial de L com respeito a $b_1$: 1}
402 | \input{TikzFiles/b1_path1_grad}
403 | \end{frame}
404 | 
405 | \begin{frame}{Derivada parcial de L com respeito a $b_1$: 2}
406 | \input{TikzFiles/b1_path2_grad}
407 | \end{frame}
408 | 
409 | \begin{frame}{Derivada parcial de L com respeito a $b_1$: 3}
410 | \input{TikzFiles/b1_path3_grad}
411 | \end{frame}
412 | 
413 | \begin{frame}{Derivada parcial de L com respeito a $b_1$}
414 | \Large{
415 | \begin{align*}
416 | \frac{\partial L}{\partial b_1} &= -y_1 + y_1\frac{h_1}{H} + y_2\frac{h_1}{H}\\
417 | \vspace{0.2cm}
418 | \visible<2->{&= y_1\left(\frac{h_1}{H} -1\right) + y_2\left(\frac{h_1}{H} -0\right)\\}
419 | \visible<3->{&= y_1\left(\hat{y}_1 -1\right) + y_2\left(\hat{y}_1 -0\right)\\}
420 | \visible<4->{&= \hat{y}_1 - y_1 \;\;\;\; \text{(quando} \;\; y \;\; \text{é um vetor one-hot)}}
421 | \end{align*}
422 | }
423 | \end{frame}
424 | 
425 | \begin{frame}{Exemplo}
426 | \input{TikzFiles/examples_values}
427 | \end{frame}
428 | 
429 | \begin{frame}{Forward}
430 | \input{TikzFiles/expanded_graph_0}
431 | \end{frame}
432 | 
433 | \begin{frame}{Forward}
434 | \input{TikzFiles/expanded_graph_1}
435 | \end{frame}
436 | 
437 | \begin{frame}{Forward}
438 | \input{TikzFiles/expanded_graph_2}
439 | \end{frame}
440 | 
441 | \begin{frame}{Forward}
442 | \input{TikzFiles/expanded_graph_3}
443 | \end{frame}
444 | 
445 | \begin{frame}{Forward}
446 | \input{TikzFiles/expanded_graph_4}
447 | \end{frame}
448 | 
449 | \begin{frame}{Forward}
450 | \input{TikzFiles/expanded_graph_5}
451 | \end{frame}
452 | 
453 | \begin{frame}{Forward}
454 | \input{TikzFiles/expanded_graph_6}
455 | \end{frame}
456 | 
457 | \begin{frame}{Forward}
458 | \input{TikzFiles/expanded_graph_7}
459 | \end{frame}
460 | 
461 | \begin{frame}{Forward}
462 | \input{TikzFiles/expanded_graph_8}
463 | \end{frame}
464 | 
465 | \begin{frame}{Forward}
466 | \input{TikzFiles/expanded_graph_9}
467 | \end{frame}
468 | 
469 | \begin{frame}{Forward}
470 | \input{TikzFiles/expanded_graph_10}
471 | \end{frame}
472 | 
473 | \begin{frame}{Forward}
474 | \input{TikzFiles/expanded_graph_11}
475 | \end{frame}
476 | 
477 | \begin{frame}{Forward}
478 | \input{TikzFiles/expanded_graph_12}
479 | \end{frame}
480 | 
481 | \begin{frame}{Forward}
482 | \input{TikzFiles/expanded_graph_13}
483 | \end{frame}
484 | 
485 | \begin{frame}{Backward}
486 | \input{TikzFiles/expanded_graph_14}
487 | \end{frame}
488 | 
489 | 
490 | \begin{frame}{Algoritmo de back-propagation (caso escalar)}
491 | \begin{algorithm}[H]
492 | \begin{algorithmic}[1]
493 | \STATE \textbf{Require:} Computational graph $\mathcal{G} = (\{ u_1, \dots, u_n \}, \mathcal{E}_1, \mathcal{E}_2)$, where $u_n$ is a leaf node.
494 | \STATE Initialize $grad\_table$, a data structure that will store the derivatives that have been computed (at the end $grad\_table[u_i] = \frac{\partial u_n}{\partial u_i}$).
495 | \STATE $grad\_table[u_n] \leftarrow 1$
496 | \FOR{$j=n-1$ down to $1$}
497 | \STATE $grad\_table[u_j] \leftarrow \sum_{u_{i} \in S(u_{j})}grad\_table[u_i]\frac{\partial u_i}{\partial u_j}$
498 | \ENDFOR
499 | \RETURN $grad\_table$
500 | \end{algorithmic}
501 | \caption{Back-propagation (scalar case)}
502 | \label{alg:seq}
503 | \end{algorithm}
504 | \end{frame}
505 | 
506 | 
507 | \begin{frame}[allowframebreaks]{Referências}
508 | 
509 |   \bibliography{my_references}
510 |   \bibliographystyle{abbrv}
511 | 
512 | \end{frame}
513 | 
514 | 
515 | 
516 | 
517 | \end{document}


--------------------------------------------------------------------------------
/slides/backprop1/my_references.bib:
--------------------------------------------------------------------------------
 1 | % RL
 2 | 
 3 | @Book{Sutton98a,
 4 |   Title                    = {Reinforcement Learning : An Introduction},
 5 |   Author                   = {Richard S. Sutton and Andrew G. Barto},
 6 |   Publisher                = {MIT Press},
 7 |   Year                     = {1998},
 8 | }
 9 | 
10 | % machine learning
11 | 
12 | @book{DeepLearningbook,
13 |     author    = "Ian Goodfellow and Yoshua Bengio and Aaron Courville",
14 |     title     = "Deep Learning",
15 |     year      = "2017",
16 |     publisher = "MIT Press",
17 | }
18 | 
19 | @book{learningfromdata,
20 |    title =     {Learning From Data: A short course},
21 |    author =    {Yaser S. Abu-Mostafa, Malik Magdon-Ismail, Hsuan-Tien Lin},
22 |    publisher = {AMLBook.com},
23 |    isbn =      {1600490069, 978-1600490064},
24 |    year =      {2012},
25 |    series =    {},
26 |    edition =   {1},
27 |    volume =    {},
28 |    url =       {http://gen.lib.rus.ec/book/index.php?md5=BCF7C1FF782654437CA474770AB041D5}
29 | }
30 | 


--------------------------------------------------------------------------------
/slides/backprop1/pdf/BackpropLecture1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/slides/backprop1/pdf/BackpropLecture1.pdf


--------------------------------------------------------------------------------
/slides/backprop2/Makefile:
--------------------------------------------------------------------------------
 1 | BASE_NAME := main
 2 | BUILD_DIR := build
 3 | PDF_NAME := BackpropLecture2.pdf
 4 | 
 5 | PDFLATEX_OPTIONS = -halt-on-error -aux-directory=$(BUILD_DIR) -output-directory=$(BUILD_DIR) --shell-escape
 6 | LATEX     := latex
 7 | PDFLATEX  = pdflatex $(PDFLATEX_OPTIONS)
 8 | BIBTEX    := bibtex
 9 | 
10 | pdf: $(BASE_NAME).pdf
11 | 
12 | $(BASE_NAME).pdf: $(BASE_NAME).tex 
13 | 	mkdir -p $(BUILD_DIR)
14 | 	$(PDFLATEX) $<
15 | 	$(BIBTEX) $(BUILD_DIR)/$(BASE_NAME) 
16 | 	$(PDFLATEX) $< 
17 | 	$(PDFLATEX) $<
18 | 	$(PDFLATEX) $<
19 | 	cp $(BUILD_DIR)/$(BASE_NAME).pdf $(PDF_NAME)
20 | 
21 | clean:
22 | 	rm -rf build $(PDF_NAME)
23 | 


--------------------------------------------------------------------------------
/slides/backprop2/all_imports.tex:
--------------------------------------------------------------------------------
 1 | \usepackage[utf8]{inputenc}
 2 | \usepackage[T1]{fontenc}
 3 | \usepackage{lmodern}
 4 | \usepackage{appendixnumberbeamer}
 5 | \usepackage{hyperref}
 6 | \usepackage{booktabs}
 7 | \usepackage{amsmath}
 8 | \usepackage{bm}
 9 | \usepackage[scale=2]{ccicons}
10 | \usepackage[outputdir=build]{minted}
11 | \usepackage{pgfplots}
12 | \usepackage{array,colortbl,xcolor}
13 | \usepgfplotslibrary{dateplot}
14 | \usepackage{setspace}
15 | \usepackage{etoolbox}
16 | \usepackage{xspace}
17 | \usepackage{tikz}
18 | \usetikzlibrary{shapes,arrows,positioning,fit,backgrounds}
19 | \usepackage{tkz-euclide}
20 | \usepackage{soul}
21 | \usepackage{ragged2e}
22 | \usepackage{algorithm,algorithmic}
23 | 


--------------------------------------------------------------------------------
/slides/backprop2/all_new_commands.tex:
--------------------------------------------------------------------------------
 1 | \newcommand{\themename}{\textbf{\textsc{metropolis}}\xspace}
 2 | \newcommand{\vect}[1]{\bm{#1}}
 3 | \newcommand{\myprime}[1]{{#1}^{\prime}}
 4 | \newcommand{\grad}[2]{\nabla_{#1} {#2}}
 5 | \newcommand{\dotp}[2]{{#1}^{\top}{#2}}
 6 | \newcommand{\dotpPright}[2]{{#1}^{\top}\left({#2}\right)}
 7 | \newcommand{\outerp}[2]{\left({#1}\right){#2}^{\top}}
 8 | \newcommand{\Jacobian}[2]{\frac{\partial #1}{\partial #2}}
 9 | \newcommand{\Vocab}{\mathbb{V}}
10 | \DeclareMathOperator*{\argmin}{arg\,min}
11 | \DeclareMathOperator*{\argmax}{arg\,max}
12 | \DeclareMathOperator{\E}{\mathbb{E}}
13 | 
14 | 
15 | % Quote with author reference at the end
16 | \let\oldquote\quote
17 | \let\endoldquote\endquote
18 | \renewenvironment{quote}[2][]
19 |   {\if\relax\detokenize{#1}\relax
20 |      \def\quoteauthor{#2}%
21 |    \else
22 |      \def\quoteauthor{#2~---~#1}%
23 |    \fi
24 |    \oldquote}
25 |   {\par\nobreak\smallskip\hfill(\quoteauthor)%
26 |    \endoldquote\addvspace{\bigskipamount}}
27 | %-----------------------------------------   
28 | 


--------------------------------------------------------------------------------
/slides/backprop2/definitions/colors.tex:
--------------------------------------------------------------------------------
 1 | \definecolor{blue}{RGB}{159, 192, 176}
 2 | \definecolor{blue2}{RGB}{38,139,210}
 3 | \definecolor{green}{RGB}{160, 227, 127}
 4 | \definecolor{green2}{RGB}{132, 164, 76}
 5 | \definecolor{orange}{RGB}{243, 188, 125}
 6 | \definecolor{red}{RGB}{253, 123, 84}
 7 | \definecolor{nephritis}{RGB}{39, 174, 96}
 8 | \definecolor{emerald}{RGB}{46, 204, 113}
 9 | \definecolor{turquoise}{RGB}{39, 174, 96}
10 | \definecolor{green-sea}{RGB}{22, 160, 133}
11 | \definecolor{base02}{RGB}{7,54,66}
12 | \definecolor{base03}{RGB}{0,43,54}
13 | \definecolor{cyan}{RGB}{42,161,152}


--------------------------------------------------------------------------------
/slides/backprop2/definitions/styles.tex:
--------------------------------------------------------------------------------
 1 | % Tikzstyles for Computation Graphs
 2 | 
 3 | % nodes
 4 | \tikzstyle{noop} = [circle, draw=none, fill=red, minimum size = 10pt]
 5 | \tikzstyle{op} = [circle, draw=red, line width=1.5pt, fill=red!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt]
 6 | \tikzstyle{op2} = [circle, draw=orange, line width=1.5pt, fill=orange!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt]
 7 | \tikzstyle{op3} = [circle, draw=orange, line width=1.5pt, fill=orange!70, text=black, text centered, font=\bf \scriptsize, minimum size = 7pt]
 8 | \tikzstyle{placeholder} = [circle, draw=red, line width=1.5pt, fill=red!30, text=black, text centered, font=\bf  \normalsize, minimum size = 25pt]
 9 | \tikzstyle{state} = [circle, draw=blue, line width=1.5pt, fill=blue!70, text=black, text centered, font=\bf \normalsize, minimum size = 25pt]
10 | \tikzstyle{gradient} = [circle, draw=nephritis, line width=1.5pt, fill=nephritis!60, text=black, text centered, font=\bf \normalsize, minimum size = 25pt]
11 | \tikzstyle{gradient2} = [circle, draw=green2, line width=1.5pt, fill=green2!60, text=black, text centered, font=\bf \normalsize, minimum size = 25pt]
12 | \tikzstyle{textonly} = [draw=none, fill=none, text centered, font=\bf \normalsize]
13 | 
14 | % edges
15 | % \tikzstyle{tedge}  = [draw, thick, >=stealth, ->]
16 | \tikzstyle{tedge}  = [draw, thick, >=latex, ->]
17 | \tikzstyle{tedge_dashed}  = [draw, thick, >=latex, ->, dashed]
18 | 
19 | % namedscope
20 | \tikzstyle{namedscope} = [circle, draw=orange, line width=1.5pt, fill=orange!60, align=center, inner sep=0pt]
21 | 
22 | % \tikzstyle{container} = [draw=none, rectangle, dotted, inner ysep=1.5em]
23 | % \tikzstyle{novertex} = [draw=none, fill=none, text centered]
24 | % \tikzstyle{predicate} = [ellipse, draw, thick, text centered, rounded corners, minimum size=30pt]
25 | % \tikzstyle{aux} = [rectangle, draw, thick, text centered, rounded corners, minimum size=30pt]
26 | % \tikzstyle{ledge}  = [draw, dashed, thick, >=stealth, ->]
27 | % \tikzstyle{pedge}  = [draw, thick, >=stealth, ->]
28 | 


--------------------------------------------------------------------------------
/slides/backprop2/header.tex:
--------------------------------------------------------------------------------
 1 | \title{MAC0460 - Introdução ao aprendizado de máquina \\ \vspace{0.2 cm} Back-propagation 2} 
 2 | \date{\today}
 3 | 
 4 | \author{
 5 |   Felipe Salvatore\\
 6 |   \url{https://felipessalvatore.github.io/}\vspace{0.4 cm}
 7 |   \and\\ 
 8 |   Nina S. T. Hirata\\
 9 |   \url{https://www.ime.usp.br/~nina/}
10 |   \vspace{0.4 cm}
11 | }
12 | 
13 | \institute{\textbf{IME-USP}: Institute of Mathematics and Statistics, University of São Paulo}
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/slides/backprop2/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/slides/backprop2/images/logo.png


--------------------------------------------------------------------------------
/slides/backprop2/my_references.bib:
--------------------------------------------------------------------------------
 1 | % machine learning
 2 | 
 3 | @book{DeepLearningbook,
 4 |     author    = "Ian Goodfellow and Yoshua Bengio and Aaron Courville",
 5 |     title     = "Deep Learning",
 6 |     year      = "2017",
 7 |     publisher = "MIT Press",
 8 | }
 9 | 
10 | @book{learningfromdata,
11 |    title =     {Learning From Data: A short course},
12 |    author =    {Yaser S. Abu-Mostafa, Malik Magdon-Ismail, Hsuan-Tien Lin},
13 |    publisher = {AMLBook.com},
14 |    isbn =      {1600490069, 978-1600490064},
15 |    year =      {2012},
16 |    series =    {},
17 |    edition =   {1},
18 |    volume =    {},
19 |    url =       {http://gen.lib.rus.ec/book/index.php?md5=BCF7C1FF782654437CA474770AB041D5}
20 | }
21 | 
22 | @article{DBLP:journals/corr/abs-1802-01528,
23 |   author    = {Terence Parr and
24 |                Jeremy Howard},
25 |   title     = {The Matrix Calculus You Need For Deep Learning},
26 |   journal   = {CoRR},
27 |   volume    = {abs/1802.01528},
28 |   year      = {2018},
29 |   url       = {http://arxiv.org/abs/1802.01528},
30 |   archivePrefix = {arXiv},
31 |   eprint    = {1802.01528},
32 |   timestamp = {Thu, 01 Mar 2018 15:00:45 +0100},
33 |   biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1802-01528},
34 |   bibsource = {dblp computer science bibliography, https://dblp.org}
35 | }
36 | 
37 | @misc{VectorCalculus,
38 |   title = {{Vector Calculus} (in Mathematics for Machine Learning)},
39 |   howpublished = {\url{https://mml-book.github.io/book/chapter05.pdf}}
40 | }
41 | 
42 | @misc{graphsbackprop,
43 |   title = {{Computational Graphs, and Backpropagation} (course notes for NLP by Michael Collins)},
44 |   howpublished = {\url{http://www.cs.columbia.edu/~mcollins/ff2.pdf}}
45 | }


--------------------------------------------------------------------------------
/slides/backprop2/pdf/BackpropLecture2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/slides/backprop2/pdf/BackpropLecture2.pdf


--------------------------------------------------------------------------------
/slides/images/cc-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MLIME/MAC0460/cc4733f6522b6b7492dfec12fbeede8a904d72e7/slides/images/cc-logo.png


--------------------------------------------------------------------------------
/slides/install.sh:
--------------------------------------------------------------------------------
1 | sudo apt-get install texlive-latex-base
2 | sudo apt-get install latex-beamer
3 | sudo apt-get install texlive-latex-extra
4 | sudo apt-get install texlive-fonts-extra
5 | sudo apt-get install texlive-science
6 | 


--------------------------------------------------------------------------------