├── .gitignore
├── Chapter00.ipynb
├── Chapter01.ipynb
├── Chapter02.1.ipynb
├── Chapter02.ipynb
├── Chapter03.ipynb
├── Chapter04.ipynb
├── Chapter05.ipynb
├── Chapter06.ipynb
├── Chapter07.ipynb
├── Chapter08.ipynb
├── Chapter09.ipynb
├── Chapter10.ipynb
├── Chapter11.ipynb
├── ChapterExtra.ipynb
├── LICENSE
├── README.md
├── apt.txt
├── config.py
├── data_generation
    ├── ball.py
    ├── image_classification.py
    ├── nlp.py
    ├── rps.py
    ├── simple_linear_regression.py
    └── square_sequences.py
├── data_preparation
    ├── v0.py
    ├── v1.py
    └── v2.py
├── environment.yml
├── helpers.py
├── images
    ├── 1conv1.png
    ├── 1conv2.png
    ├── 3channel1.png
    ├── 3channel2.png
    ├── 3channel_filters1.png
    ├── aiayn_dot.png
    ├── aiayn_multihead.png
    ├── alexnet.png
    ├── alice_dorothy.png
    ├── architecture_lenet.png
    ├── attention.png
    ├── attn_narrow_2heads.png
    ├── attn_narrow_first_head.png
    ├── attn_narrow_transf.png
    ├── basic_attention.png
    ├── bert_embeddings.png
    ├── bert_input_embed.png
    ├── bert_mlm.png
    ├── bert_nsp.png
    ├── bidirect_rnn.png
    ├── blank1.png
    ├── blank2.png
    ├── blank_center.png
    ├── blank_end.png
    ├── block_tokens.png
    ├── book10.png
    ├── book9.png
    ├── bow.png
    ├── cbow.png
    ├── classification.png
    ├── classification_equiv.png
    ├── classification_relu2.png
    ├── classification_softmax.png
    ├── cls_hidden_state.png
    ├── context_translate.png
    ├── context_vector.png
    ├── conv1.png
    ├── conv1_ma.png
    ├── conv1d.png
    ├── conv1d_dilated.png
    ├── conv1d_edges.png
    ├── conv2.png
    ├── conv3.png
    ├── conv5.png
    ├── conv6.png
    ├── conv7.png
    ├── conv8.png
    ├── cross_attn.png
    ├── dec_both.png
    ├── decoder.png
    ├── decoder_self.png
    ├── decoder_self_simplified.png
    ├── dropout_paper.png
    ├── elmo_embed.png
    ├── elmo_lstm.png
    ├── embed_arithmetic.png
    ├── enc_both.png
    ├── enc_dec_attn_translate.png
    ├── encdec_attn.png
    ├── encdec_self_simplified.png
    ├── encoded_distances.png
    ├── encoder.png
    ├── encoder_decoder.png
    ├── encoder_lost_seq.png
    ├── encoder_self.png
    ├── encoder_self_detail.png
    ├── encoder_self_simplified.png
    ├── fill1.png
    ├── fill2.png
    ├── full_transformer.png
    ├── full_transformer_and_class.png
    ├── gru_cell.png
    ├── inception_model.png
    ├── inception_modules.png
    ├── kq_matches.png
    ├── layer_vs_batch_norm.png
    ├── logistic_model.png
    ├── lstm_cell.png
    ├── multiattn.png
    ├── multihead_chunking.png
    ├── new_books.png
    ├── ngrams.png
    ├── norm_first.png
    ├── ohe1.png
    ├── ohe2.png
    ├── ohe3.png
    ├── packed_seq_data.png
    ├── packed_seq_inddata.png
    ├── padding1.png
    ├── padding2.png
    ├── padding3.png
    ├── paddings.png
    ├── paths.png
    ├── pooling1.png
    ├── posenc_mod4mod8.png
    ├── posenc_modnorm4.png
    ├── posenc_modnorm_deg.png
    ├── posenc_modnorm_mult.png
    ├── posenc_modnorm_sincos.png
    ├── posenc_norm1k.png
    ├── posenc_norm4.png
    ├── posenc_norm4_long.png
    ├── residual.png
    ├── rest_continuous.png
    ├── rest_discrete.png
    ├── rnn1.png
    ├── rnn2.png
    ├── rnn_cell_diagram.png
    ├── rnn_cell_diagram_seq.png
    ├── score_alignment.png
    ├── score_alignment_translate.png
    ├── shifted_target.png
    ├── sincos_distance.png
    ├── stacked_encdec.png
    ├── stacked_layers.png
    ├── stacked_rnn.png
    ├── stride1.png
    ├── strider2.png
    ├── strider3.png
    ├── sublayer.png
    ├── transf_classes.png
    ├── transf_decself.png
    ├── transf_encdecself.png
    ├── transf_encself.png
    ├── translation_att.png
    ├── vit_model.png
    ├── w2v_cbow.png
    ├── w2v_embed.png
    └── w2v_logits.png
├── model_configuration
    ├── v0.py
    ├── v1.py
    ├── v2.py
    ├── v3.py
    └── v4.py
├── model_training
    ├── v0.py
    ├── v1.py
    ├── v2.py
    ├── v3.py
    ├── v4.py
    └── v5.py
├── plots
    ├── chapter0.py
    ├── chapter1.py
    ├── chapter10.py
    ├── chapter11.py
    ├── chapter2.py
    ├── chapter2_1.py
    ├── chapter3.py
    ├── chapter4.py
    ├── chapter5.py
    ├── chapter6.py
    ├── chapter7.py
    ├── chapter8.py
    ├── chapter9.py
    ├── chapterextra.py
    └── replay.py
├── postBuild
├── revision
    └── v1.2
    │   ├── Revision_Volume1_v1.2.pdf
    │   ├── Revision_Volume2_v1.2.pdf
    │   └── Revision_Volume3_v1.2.pdf
├── runs
    └── .gitkeep
├── seq2seq.py
├── stepbystep
    ├── v0.py
    ├── v1.py
    ├── v2.py
    ├── v3.py
    └── v4.py
└── tensorboardserverextension.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Daniel Voigt Godoy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Deep Learning with PyTorch Step-by-Step
  2 | 
  3 | ## New book: "A Hands-On Guide to Fine-Tuning LLMs"
  4 | 
  5 | <p align="center">
  6 |   <img src="https://github.com/dvgodoy/FineTuningLLMs/blob/main/images/cover.png?raw=True" width="200"/>
  7 |   <br>
  8 |   <strong><a href="https://www.amazon.com/dp/B0DV3Y1GMP">Kindle</a> | <a href="https://www.amazon.com/dp/B0DV4H7YW2">Paperback</a> | <a href="https://leanpub.com/finetuning">PDF [Leanpub]<a> | <a href="https://danielgodoy.gumroad.com/l/finetuning">PDF [Gumroad]<a></strong>
  9 | </p>
 10 | 
 11 | ## Revised for PyTorch 2.x!
 12 | The revised version addresses changes in PyTorch, Torchvision, HuggingFace, and other libraries. The chapters most affected were Chapter 4 (in Volume II) and Chapter 11 (in Volume III).
 13 | 
 14 | Please check the PDFs below containing the changes (check the paragraphs highlighted in red):
 15 | - [Changes to Volume I](https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/master/revision/v1.2/Revision_Volume1_v1.2.pdf)
 16 | - [Changes to Volume II](https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/master/revision/v1.2/Revision_Volume2_v1.2.pdf)
 17 | - [Changes to Volume III](https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/master/revision/v1.2/Revision_Volume3_v1.2.pdf)
 18 | 
 19 | [![](images/new_books.png)](https://pytorchstepbystep.com)
 20 | 
 21 | This is the official repository of my book "**Deep Learning with PyTorch Step-by-Step**". Here you will find **one Jupyter notebook** for every **chapter** in the book.
 22 | 
 23 | Each notebook contains **all the code shown** in its corresponding chapter, and you should be able to
 24 | **run its cells in sequence** to get the **same outputs as shown in the book**. I strongly believe that being able to **reproduce the results** brings **confidence** to the reader.
 25 | 
 26 | There are **three** options for you to run the Jupyter notebooks:
 27 | 
 28 | ### Google Colab
 29 | 
 30 | You can easily **load the notebooks directly from GitHub** using Colab and run them using a **GPU** provided by Google. You need to be logged in a Google Account of your own.
 31 | 
 32 | You can go through the chapters already using the links below:
 33 | 
 34 | #### Part I - Fundamentals
 35 | 
 36 | - [Chapter 0 - Visualizing Gradient Descent](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter00.ipynb)
 37 | - [Chapter 1 - A Simple Regression Problem](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter01.ipynb)
 38 | - [Chapter 2 - Rethinking the Training Loop](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter02.ipynb)
 39 | - [Chapter 2.1 - Going Classy](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter02.1.ipynb)
 40 | - [Chapter 3 - A Simple Classification Problem](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter03.ipynb)
 41 | 
 42 | #### Part II - Computer Vision
 43 | 
 44 | - [Chapter 4 - Classifying Images](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter04.ipynb)
 45 | - [Chapter 5 - Convolutions](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter05.ipynb)
 46 | - [Chapter 6 - Rock, Paper, Scissors](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter06.ipynb)
 47 | - [Chapter 7 - Transfer Learning](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter07.ipynb)
 48 | - [Extra Chapter - Vanishing and Exploding Gradients](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/ChapterExtra.ipynb)
 49 | 
 50 | #### Part III - Sequences
 51 | 
 52 | - [Chapter 8 - Sequences](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter08.ipynb)
 53 | - [Chapter 9 - Sequence-to-Sequence](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter09.ipynb)
 54 | - [Chapter 10 - Transform and Roll Out](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter10.ipynb)
 55 | 
 56 | #### Part IV - Natural Language Processing
 57 | 
 58 | - [Chapter 11 - Down the Yellow Brick Rabbit Hole](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter11.ipynb)
 59 | 
 60 | ### Binder
 61 | 
 62 | You can also **load the notebooks directly from GitHub** using Binder, but the process is slightly different. It will create an environment on the cloud and allow you to access **Jupyter's Home Page** in your browser, listing all available notebooks, just like in your own computer.
 63 | 
 64 | If you **make changes** to the notebooks, **make sure to download** them, since Binder **does not keep the changes** once you close it.
 65 | 
 66 | You can start your environment on the cloud right now using the button below:
 67 | 
 68 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/dvgodoy/PyTorchStepByStep/master)
 69 | 
 70 | ### Local Installation
 71 | 
 72 | This option will give you more **flexibility**, but it will require **more effort to set up**. I encourage you to try setting up your own environment. It may seem daunting at first, but you can surely accomplish it following **seven easy steps**:
 73 | 
 74 | **1 - Anaconda**
 75 | 
 76 | If you don’t have [**Anaconda’s Individual Edition**](https://www.anaconda.com/products/individual) installed yet, that would be a good time to do it - it is a very handy way to start - since it contains most of the Python libraries a data scientist will ever need to develop and train models. 
 77 | 
 78 | Please follow **the installation instructions** for your OS:
 79 | 
 80 | - [Windows](https://docs.anaconda.com/anaconda/install/windows/)
 81 | - [macOS](https://docs.anaconda.com/anaconda/install/mac-os/)
 82 | - [Linux](https://docs.anaconda.com/anaconda/install/linux/)
 83 | 
 84 | Make sure you choose **Python 3.X** version since Python 2 was discontinued in January 2020.
 85 | 
 86 | **2 - Conda (Virtual) Environments**
 87 | 
 88 | Virtual environments are a convenient way to isolate Python installations associated with different projects.
 89 | 
 90 | First, you need to choose a **name** for your environment :-) Let’s call ours `pytorchbook` (or anything else you find easier to remember). Then, you need to open a **terminal** (in Ubuntu) or **Anaconda Prompt** (in Windows or macOS) and type the following command:
 91 | 
 92 | `conda create -n pytorchbook anaconda`
 93 | 
 94 | The command above creates a conda environment named `pytorchbook` and includes **all anaconda packages** in it (time to get a coffee, it will take a while...). If you want to learn more about creating and using conda environments, please check Anaconda’s [**Managing Environments**](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) user guide.
 95 | 
 96 | Did it finish creating the environment? Good! It is time to **activate it**, meaning, making that Python installation the one to be used now. In the same terminal (or Anaconda Prompt), just type:
 97 | 
 98 | `conda activate pytorchbook`
 99 | 
100 | Your prompt should look like this (if you’re using Linux)...
101 | 
102 | `(pytorchbook)$`
103 | 
104 | or like this (if you’re using Windows):
105 | 
106 | `(pytorchbook)C:\>`
107 | 
108 | Done! You are using a **brand new conda environment** now. You’ll need to **activate it** every time you open a new terminal or, if you’re a Windows or macOS user, you can open the corresponding Anaconda Prompt (it will show up as **Anaconda Prompt (pytorchbook)**, in our case), which will have it activated from start.
109 | 
110 | **IMPORTANT**: From now on, I am assuming you’ll activate the `pytorchbook` environment every time you open a terminal / Anaconda Prompt. Further installation steps **must** be executed inside the environment.
111 | 
112 | **3 - PyTorch**
113 | 
114 | It is time to install the star of the show :-) We can go straight to the [**Start Locally**](https://pytorch.org/get-started/locally/) section of its website and it will automatically select the options that best suit your local environment and it will show you the command to run.
115 | 
116 | Your choices should look like:
117 | 
118 | - PyTorch Build: "Stable"
119 | - Your OS: your operating system
120 | - Package: "Conda"
121 | - Language: "Python"
122 | - CUDA: "None" if you **don't** have a **GPU**, or the latest version (e.g. "10.1"), if you **have** a **GPU**.
123 | 
124 | The installation command will be shown right below your choices, so you can copy it. If you have a **Windows** computer and **no GPU**, you'd have to run the following command in your **Anaconda Prompt (pytorchbook)**:
125 | 
126 | `(pytorchbook) C:\> conda install pytorch torchvision cpuonly -c pytorch`
127 | 
128 | **4 - TensorBoard**
129 | 
130 | TensorBoard is a powerful tool and we can use it even if we are developing models in PyTorch. Luckily, you don’t need to install the whole TensorFlow to get it, you can easily **install TensorBoard alone** using **conda**. You just need to run this command in your **terminal** or **Anaconda Prompt** (again, after activating the environment):
131 | 
132 | `(pytorchbook)C:\> conda install -c conda-forge tensorboard`
133 | 
134 | **5 - GraphViz and TorchViz (optional)**
135 | 
136 | > This step is optional, mostly because the installation of GraphViz can be challenging sometimes (especially on Windows). If, for any reason, you do not succeed in installing it correctly, or if you
137 | > decide to skip this installation step, you will still be **able to execute the code in this book** (except for a couple of cells that generate images of a model’s structure in the Dynamic Computation Graph section of Chapter 1).
138 | 
139 | We need to install GraphViz to be able to use **TorchViz**, a neat package that allows us to visualize a  model’s structure. Please check the [**installation instructions**]( https://www.graphviz.org/download/) for your OS. 
140 | 
141 | > If you are using **Windows**, please use the installer at [GraphViz's Windows Package](https://graphviz.gitlab.io/_pages/Download/windows/graphviz-2.38.msi). You also need to  add GraphViz to the PATH (environment variable) in Windows. Most likely, you can find GraphViz executable file at `C:\ProgramFiles(x86)\Graphviz2.38\bin`. Once you found it, you need to set or change the PATH accordingly, adding GraphViz's location to it. For more details on how to do that, please refer to [How to Add to Windows PATH Environment Variable](https://bit.ly/3fIwYA5).
142 | 
143 | For additional information, you can also check the [How to Install Graphviz Software](https://bit.ly/30Ayct3) guide. 
144 | 
145 | If you installed GraphViz successfully, you can install the [torchviz](https://github.com/szagoruyko/pytorchviz) package. This package is not part of Anaconda Distribution Repository and is only available at PyPI , the Python Package Index, so we need to pip install it.
146 | 
147 | Once again, open a **terminal** or **Anaconda Prompt** and run this command (just once
148 | more: after activating the environment):
149 | 
150 | `(pytorchbook)C:\> pip install torchviz`
151 | 
152 | **6 - Git**
153 | 
154 | It is way beyond the scope of this guide to introduce you to version control and its most popular tool: `git`. If you are familiar with it already, great, you can skip this section altogether!
155 | 
156 | Otherwise, I’d recommend you to learn more about it, it will **definitely** be useful for you later down the line. In the meantime, I will show you the bare minimum, so you can use `git` to **clone this repository** containing all code used in this book - so you have your own, local copy of it and can modify and experiment with it as you please.
157 | 
158 | First, you need to install it. So, head to its [downloads](https://git-scm.com/downloads) page and follow instructions for your OS. Once installation is complete, please open a **new terminal** or **Anaconda Prompt** (it's OK to close the previous one). In the new terminal or Anaconda Prompt, you should be able to **run `git` commands**. To clone this repository, you only need to run:
159 | 
160 | `(pytorchbook)C:\> git clone https://github.com/dvgodoy/PyTorchStepByStep.git`
161 | 
162 | The command above will create a `PyTorchStepByStep` folder which contains a **local copy** of everything available on this GitHub’s repository.
163 | 
164 | **7 - Jupyter**
165 | 
166 | After cloning the repository, navigate to the `PyTorchStepByStep` and, **once inside it**, you only need to **start Jupyter** on your terminal or Anaconda Prompt:
167 | 
168 | `(pytorchbook)C:\> jupyter notebook`
169 | 
170 | This will open your browser up and you will see **Jupyter's Home Page** containing this repository's notebooks and code.
171 | 
172 | Congratulations! You are ready to go through the chapters' notebooks!
173 | 
174 | 


--------------------------------------------------------------------------------
/apt.txt:
--------------------------------------------------------------------------------
1 | graphviz
2 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import errno
  4 | import requests
  5 | import subprocess
  6 | import shutil
  7 | from IPython.display import HTML, display
  8 | from tensorboard import manager
  9 | 
 10 | def tensorboard_cleanup():
 11 |     info_dir = manager._get_info_dir()
 12 |     shutil.rmtree(info_dir)
 13 | 
 14 | FOLDERS = {
 15 |     0: ['plots'],
 16 |     1: ['plots'],
 17 |     2: ['plots', 'data_generation', 'data_preparation', 'model_configuration', 'model_training'],
 18 |     21: ['plots', 'data_generation', 'data_preparation', 'model_configuration', 'stepbystep'],
 19 |     3: ['plots', 'stepbystep'],
 20 |     4: ['plots', 'stepbystep', 'data_generation'],
 21 |     5: ['plots', 'stepbystep', 'data_generation', ''],
 22 |     6: ['plots', 'stepbystep', 'stepbystep', 'data_generation', 'data_generation', 'data_preparation'],
 23 |     7: ['plots', 'stepbystep', 'data_generation'],
 24 |     71: ['plots', 'stepbystep', 'data_generation'],
 25 |     8: ['plots', 'plots', 'stepbystep', 'data_generation'],
 26 |     9: ['plots', 'plots', 'plots', 'stepbystep', 'data_generation'],
 27 |     10: ['plots', 'plots', 'plots', 'plots', 'stepbystep', 'data_generation', 'data_generation', '', ''],
 28 |     11: ['plots', 'stepbystep', 'data_generation', ''],
 29 | }
 30 | FILENAMES = {
 31 |     0: ['chapter0.py'],
 32 |     1: ['chapter1.py'],
 33 |     2: ['chapter2.py', 'simple_linear_regression.py', 'v0.py', 'v0.py', 'v0.py'],
 34 |     21: ['chapter2_1.py', 'simple_linear_regression.py', 'v2.py', '', 'v0.py'],
 35 |     3: ['chapter3.py', 'v0.py'],
 36 |     4: ['chapter4.py', 'v0.py', 'image_classification.py'],
 37 |     5: ['chapter5.py', 'v1.py', 'image_classification.py', 'helpers.py'],
 38 |     6: ['chapter6.py', 'v2.py', 'v3.py', 'rps.py', 'simple_linear_regression.py', 'v2.py'],
 39 |     7: ['chapter7.py', 'v3.py', 'rps.py'],
 40 |     71: ['chapterextra.py', 'v3.py', 'ball.py'],
 41 |     8: ['chapter8.py', 'replay.py', 'v4.py', 'square_sequences.py'],
 42 |     9: ['chapter8.py', 'chapter9.py', 'replay.py', 'v4.py', 'square_sequences.py'],
 43 |     10: ['chapter8.py', 'chapter9.py', 'chapter10.py', 'replay.py', 'v4.py', 'square_sequences.py', 'image_classification.py', 'helpers.py', 'seq2seq.py'],
 44 |     11: ['chapter11.py', 'v4.py', 'nlp.py', 'seq2seq.py'],
 45 | }
 46 | 
 47 | try:
 48 |     host = os.environ['BINDER_SERVICE_HOST']
 49 |     IS_BINDER = True
 50 | except KeyError:
 51 |     IS_BINDER = False
 52 |     
 53 | try:
 54 |     import google.colab
 55 |     IS_COLAB = True
 56 | except ModuleNotFoundError:
 57 |     IS_COLAB = False
 58 | 
 59 | IS_LOCAL = (not IS_BINDER) and (not IS_COLAB)
 60 | 
 61 | def download_to_colab(chapter, branch='master'):    
 62 |     base_url = 'https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/{}/'.format(branch)
 63 | 
 64 |     folders = FOLDERS[chapter]
 65 |     filenames = FILENAMES[chapter]
 66 |     for folder, filename in zip(folders, filenames):
 67 |         if len(folder):
 68 |             try:
 69 |                 os.mkdir(folder)
 70 |             except OSError as e:
 71 |                 if e.errno != errno.EEXIST:
 72 |                     raise
 73 | 
 74 |         if len(filename):
 75 |             path = os.path.join(folder, filename)
 76 |             url = '{}{}'.format(base_url, path)
 77 |             r = requests.get(url, allow_redirects=True)
 78 |             open(path, 'wb').write(r.content)
 79 | 
 80 |     try:
 81 |         os.mkdir('runs')
 82 |     except OSError as e:
 83 |         if e.errno != errno.EEXIST:
 84 |             raise
 85 | 
 86 | TB_LINK = ''
 87 | if IS_BINDER:
 88 |     TB_LINK = HTML('''
 89 |     <a href="" target="_blank" id="tb">Click here to open TensorBoard</a>
 90 |     <script>
 91 |         var address=document.location.href;
 92 |         a = document.getElementById('tb');
 93 |         a.href = address.substr(0, address.lastIndexOf("/")-9).concat("proxy/6006/");
 94 |     </script>
 95 |     ''')
 96 |     
 97 | def config_chapter0(branch='master'):
 98 |     if IS_COLAB:
 99 |         print('Downloading files from GitHub repo to Colab...')
100 |         download_to_colab(0, branch)
101 |         print('Finished!')
102 |     
103 | def config_chapter1(branch='master'):
104 |     if IS_COLAB:
105 |         print('Installing torchviz...')
106 |         subprocess.run([sys.executable, '-m', 'pip', 'install', 'torchviz'])
107 |         print('Downloading files from GitHub repo to Colab...')
108 |         download_to_colab(1, branch)
109 |         print('Creating folders...')
110 |         folders = ['data_preparation', 'model_configuration', 'model_training']
111 | 
112 |         for folder in folders:
113 |             try:
114 |                 os.mkdir(folder)
115 |             except OSError as e:
116 |                 e.errno
117 |                 if e.errno != errno.EEXIST:
118 |                     raise
119 |         print('Finished!')
120 |         
121 | def config_chapter2(branch='master'):
122 |     if IS_COLAB:
123 |         print('Downloading files from GitHub repo to Colab...')
124 |         download_to_colab(2, branch)
125 |         print('Finished!')
126 | 
127 | def config_chapter2_1(branch='master'):
128 |     if IS_COLAB:
129 |         print('Downloading files from GitHub repo to Colab...')
130 |         download_to_colab(21, branch)
131 |         print('Finished!')
132 | 
133 | def config_chapter3(branch='master'):
134 |     if IS_COLAB:
135 |         print('Downloading files from GitHub repo to Colab...')
136 |         download_to_colab(3, branch)
137 |         print('Finished!')
138 | 
139 | def config_chapter4(branch='master'):
140 |     if IS_COLAB:
141 |         print('Downloading files from GitHub repo to Colab...')
142 |         download_to_colab(4, branch)
143 |         print('Finished!')
144 | 
145 | def config_chapter5(branch='master'):
146 |     if IS_COLAB:
147 |         print('Downloading files from GitHub repo to Colab...')
148 |         download_to_colab(5, branch)
149 |         print('Finished!')
150 | 
151 | def config_chapter6(branch='master'):
152 |     if IS_COLAB:
153 |         print('Downloading files from GitHub repo to Colab...')
154 |         download_to_colab(6, branch)
155 |         print('Finished!')
156 |         
157 | def config_chapter7(branch='master'):
158 |     if IS_COLAB:
159 |         print('Downloading files from GitHub repo to Colab...')
160 |         download_to_colab(7, branch)
161 |         print('Finished!')
162 |         
163 | def config_chapterextra(branch='master'):
164 |     if IS_COLAB:
165 |         print('Downloading files from GitHub repo to Colab...')
166 |         download_to_colab(71, branch)
167 |         print('Finished!')
168 |         
169 | def config_chapter8(branch='master'):
170 |     if IS_COLAB:
171 |         print('Downloading files from GitHub repo to Colab...')
172 |         download_to_colab(8, branch)
173 |         print('Finished!')
174 |         
175 | def config_chapter9(branch='master'):
176 |     if IS_COLAB:
177 |         print('Downloading files from GitHub repo to Colab...')
178 |         download_to_colab(9, branch)
179 |         print('Finished!')
180 | 
181 | def config_chapter10(branch='master'):
182 |     if IS_COLAB:
183 |         print('Downloading files from GitHub repo to Colab...')
184 |         download_to_colab(10, branch)
185 |         print('Finished!')
186 | 
187 | def config_chapter11(branch='master'):
188 |     if IS_COLAB:
189 |         print('Downloading files from GitHub repo to Colab...')
190 |         download_to_colab(11, branch)
191 |         print('Finished!')
192 | 


--------------------------------------------------------------------------------
/data_generation/ball.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def load_data(n_dims=10, n_points=1000, classif_radius_fraction=0.5, only_sphere=False, shuffle=True, seed=13):
 4 |     """
 5 |     Parameters
 6 |     ----------
 7 |     n_dims: int, optional
 8 |         Number of dimensions of the n-ball. Default is 10.
 9 |     n_points: int, optional
10 |         Number of points in each parabola. Default is 1,000.
11 |     classif_radius_fraction: float, optional
12 |         Points farther away from the center than
13 |         `classification_radius_fraction * ball radius` are
14 |         considered to be positive cases. The remaining
15 |         points are the negative cases.
16 |     only_sphere: boolean
17 |         If True, generates a n-sphere, that is, a hollow n-ball.
18 |         Default is False.
19 |     shuffle: boolean, optional
20 |         If True, the points are shuffled. Default is True.
21 |     seed: int, optional
22 |         Random seed. Default is 13.
23 |     Returns
24 |     -------
25 |     X, y: tuple of ndarray
26 |         X is an array of shape (n_points, n_dims) containing the
27 |         points in the n-ball.
28 |         y is an array of shape (n_points, 1) containing the
29 |         classes of the samples.
30 |     """
31 |     np.random.seed(seed)
32 |     radius = np.sqrt(n_dims)
33 |     points = np.random.normal(size=(n_points, n_dims))
34 |     sphere = radius * points / np.linalg.norm(points, axis=1).reshape(-1, 1)
35 |     if only_sphere:
36 |         X = sphere
37 |     else:
38 |         X = sphere * np.random.uniform(size=(n_points, 1))**(1 / n_dims)
39 | 
40 |     adjustment = 1 / np.std(X)
41 |     radius *= adjustment
42 |     X *= adjustment
43 | 
44 |     y = (np.abs(np.sum(X, axis=1)) > (radius * classif_radius_fraction)).astype(int)
45 | 
46 |     # But we must not feed the network with neatly organized inputs...
47 |     # so let's randomize them
48 |     if shuffle:
49 |         np.random.seed(seed)
50 |         shuffled = np.random.permutation(range(X.shape[0]))
51 |         X = X[shuffled]
52 |         y = y[shuffled].reshape(-1, 1)
53 | 
54 |     return (X, y)
55 | 


--------------------------------------------------------------------------------
/data_generation/image_classification.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def gen_img(start, target, fill=1, img_size=10):
 5 |     # Generates empty image
 6 |     img = np.zeros((img_size, img_size), dtype=float)
 7 | 
 8 |     start_row, start_col = None, None
 9 | 
10 |     if start > 0:
11 |         start_row = start
12 |     else:
13 |         start_col = np.abs(start)
14 | 
15 |     if target == 0:
16 |         if start_row is None:
17 |             img[:, start_col] = fill
18 |         else:
19 |             img[start_row, :] = fill
20 |     else:
21 |         if start_col == 0:
22 |             start_col = 1
23 |         
24 |         if target == 1:
25 |             if start_row is not None:
26 |                 up = (range(start_row, -1, -1), 
27 |                       range(0, start_row + 1))
28 |             else:
29 |                 up = (range(img_size - 1, start_col - 1, -1), 
30 |                       range(start_col, img_size))
31 |             img[up] = fill
32 |         else:
33 |             if start_row is not None:
34 |                 down = (range(start_row, img_size, 1), 
35 |                         range(0, img_size - start_row))
36 |             else:
37 |                 down = (range(0, img_size - 1 - start_col + 1), 
38 |                         range(start_col, img_size))
39 |             img[down] = fill
40 |     
41 |     return 255 * img.reshape(1, img_size, img_size)
42 | 
43 | 
44 | def generate_dataset(img_size=10, n_images=100, binary=True, seed=17):
45 |     np.random.seed(seed)
46 | 
47 |     starts = np.random.randint(-(img_size - 1), img_size, size=(n_images,))
48 |     targets = np.random.randint(0, 3, size=(n_images,))
49 |     
50 |     images = np.array([gen_img(s, t, img_size=img_size) 
51 |                        for s, t in zip(starts, targets)], dtype=np.uint8)
52 |     
53 |     if binary:
54 |         targets = (targets > 0).astype(int)
55 |     
56 |     return images, targets
57 | 


--------------------------------------------------------------------------------
/data_generation/nlp.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import zipfile
 3 | import os
 4 | import errno
 5 | import nltk
 6 | from nltk.tokenize import sent_tokenize
 7 | 
 8 | #ALICE_URL = 'https://ota.bodleian.ox.ac.uk/repository/xmlui/bitstream/handle/20.500.12024/1476/alice28-1476.txt'
 9 | #WIZARD_URL = 'https://ota.bodleian.ox.ac.uk/repository/xmlui/bitstream/handle/20.500.12024/1740/wizoz10-1740.txt'
10 | ALICE_URL = 'https://llds.ling-phil.ox.ac.uk/llds/xmlui/bitstream/handle/20.500.14106/1476/alice28-1476.txt'
11 | WIZARD_URL = 'https://llds.ling-phil.ox.ac.uk/llds/xmlui/bitstream/handle/20.500.14106/1740/wizoz10-1740.txt'
12 |                 
13 | def download_text(url, localfolder='texts'):
14 |     localfile = os.path.split(url)[-1]
15 |     try:
16 |         os.mkdir(f'{localfolder}')
17 |     except OSError as e:
18 |         if e.errno != errno.EEXIST:
19 |             raise
20 |     try:
21 |         r = requests.get(url, allow_redirects=True)
22 |         open(os.path.join(localfolder, localfile), 'wb').write(r.content)
23 |     except Exception as e:
24 |         print(f'Error downloading file: {str(e)}')
25 |         
26 | def sentence_tokenize(source, quote_char='\\', sep_char=',',
27 |                       include_header=True, include_source=True, 
28 |                       extensions=('txt'), **kwargs):
29 |     nltk.download('punkt')
30 |     # If source is a folder, goes through all files inside it
31 |     # that match the desired extensions ('txt' by default)
32 |     if os.path.isdir(source):
33 |         filenames = [f for f in os.listdir(source)
34 |                      if os.path.isfile(os.path.join(source, f)) and
35 |                         os.path.splitext(f)[1][1:] in extensions]
36 |     elif isinstance(source, str):
37 |         filenames = [source]
38 |     
39 |     # If there is a configuration file, builds a dictionary with
40 |     # the corresponding start and end lines of each text file
41 |     config_file = os.path.join(source, 'lines.cfg')
42 |     config = {}
43 |     if os.path.exists(config_file):
44 |         with open(config_file, 'r') as f:
45 |             rows = f.readlines()
46 | 
47 |         for r in rows[1:]:
48 |             fname, start, end = r.strip().split(',')
49 |             config.update({fname: (int(start), int(end))})
50 |        
51 |     new_fnames = []
52 |     # For each file of text
53 |     for fname in filenames:
54 |         # If there's a start and end line for that file, use it
55 |         try:
56 |             start, end = config[fname]
57 |         except KeyError:
58 |             start = None
59 |             end = None
60 |             
61 |         # Opens the file, slices the configures lines (if any)
62 |         # cleans line breaks and uses the sentence tokenizer
63 |         with open(os.path.join(source, fname), 'r') as f:
64 |             contents = (''.join(f.readlines()[slice(start, end, None)])
65 |                         .replace('\n', ' ').replace('\r', ''))
66 |         corpus = sent_tokenize(contents, **kwargs)
67 |         
68 |         # Builds a CSV file containing tokenized sentences
69 |         base = os.path.splitext(fname)[0]
70 |         new_fname = f'{base}.sent.csv'
71 |         new_fname = os.path.join(source, new_fname)
72 |         with open(new_fname, 'w') as f:
73 |             # Header of the file
74 |             if include_header:
75 |                 if include_source:
76 |                     f.write('sentence,source\n')
77 |                 else:
78 |                     f.write('sentence\n')
79 |             # Writes one line for each sentence
80 |             for sentence in corpus:
81 |                 if include_source:
82 |                     f.write(f'{quote_char}{sentence}{quote_char}{sep_char}{fname}\n')
83 |                 else:
84 |                     f.write(f'{quote_char}{sentence}{quote_char}\n')
85 |         new_fnames.append(new_fname)
86 |         
87 |     # Returns list of the newly generated CSV files
88 |     return sorted(new_fnames)
89 | 


--------------------------------------------------------------------------------
/data_generation/rps.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import zipfile
 3 | import os
 4 | import errno
 5 | 
 6 | def download_rps(localfolder=''):
 7 |     filenames = ['rps.zip', 'rps-test-set.zip']
 8 |     for filename in filenames:
 9 |         try:
10 |             os.mkdir(f'{localfolder}{filename[:-4]}')
11 | 
12 |             localfile = f'{localfolder}{filename}'
13 |             # url = 'https://storage.googleapis.com/laurencemoroney-blog.appspot.com/{}'
14 |             # Updated from TFDS URL at
15 |             # https://github.com/tensorflow/datasets/blob/master/tensorflow_datasets/datasets/rock_paper_scissors/rock_paper_scissors_dataset_builder.py
16 |             url = 'https://storage.googleapis.com/download.tensorflow.org/data/{}'
17 |             r = requests.get(url.format(filename), allow_redirects=True)
18 |             open(localfile, 'wb').write(r.content)
19 |             with zipfile.ZipFile(localfile, 'r') as zip_ref:
20 |                 zip_ref.extractall(localfolder)        
21 |         except OSError as e:
22 |             if e.errno != errno.EEXIST:
23 |                 raise
24 |             else:
25 |                 print(f'{filename[:-4]} folder already exists!')
26 | 


--------------------------------------------------------------------------------
/data_generation/simple_linear_regression.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | true_b = 1
 4 | true_w = 2
 5 | N = 100
 6 | 
 7 | # Data Generation
 8 | np.random.seed(42)
 9 | x = np.random.rand(N, 1)
10 | y = true_b + true_w * x + (.1 * np.random.randn(N, 1))
11 | 
12 | # Shuffles the indices
13 | idx = np.arange(N)
14 | np.random.shuffle(idx)
15 | 
16 | # Uses first 80 random indices for train
17 | train_idx = idx[:int(N*.8)]
18 | # Uses the remaining indices for validation
19 | val_idx = idx[int(N*.8):]
20 | 
21 | # Generates train and validation sets
22 | x_train, y_train = x[train_idx], y[train_idx]
23 | x_val, y_val = x[val_idx], y[val_idx]
24 | 


--------------------------------------------------------------------------------
/data_generation/square_sequences.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def generate_sequences(n=128, variable_len=False, seed=13):
 4 |     basic_corners = np.array([[-1, -1], [-1, 1], [1, 1], [1, -1]])
 5 |     np.random.seed(seed)
 6 |     bases = np.random.randint(4, size=n)
 7 |     if variable_len:
 8 |         lengths = np.random.randint(3, size=n) + 2
 9 |     else:
10 |         lengths = [4] * n
11 |     directions = np.random.randint(2, size=n)
12 |     points = [basic_corners[[(b + i) % 4 for i in range(4)]][slice(None, None, d*2-1)][:l] + np.random.randn(l, 2) * 0.1 for b, d, l in zip(bases, directions, lengths)]
13 |     return points, directions


--------------------------------------------------------------------------------
/data_preparation/v0.py:
--------------------------------------------------------------------------------
1 | 
2 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
3 | 
4 | # Our data was in Numpy arrays, but we need to transform them
5 | # into PyTorch's Tensors and then we send them to the 
6 | # chosen device
7 | x_train_tensor = torch.as_tensor(x_train).float().to(device)
8 | y_train_tensor = torch.as_tensor(y_train).float().to(device)
9 | 


--------------------------------------------------------------------------------
/data_preparation/v1.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Our data was in Numpy arrays, but we need to transform them into PyTorch's Tensors
 3 | x_train_tensor = torch.from_numpy(x_train).float()
 4 | y_train_tensor = torch.from_numpy(y_train).float()
 5 | 
 6 | # Builds Dataset
 7 | train_data = TensorDataset(x_train_tensor, y_train_tensor)
 8 | 
 9 | # Builds DataLoader
10 | train_loader = DataLoader(dataset=train_data, batch_size=16, shuffle=True)
11 | 


--------------------------------------------------------------------------------
/data_preparation/v2.py:
--------------------------------------------------------------------------------
 1 | 
 2 | torch.manual_seed(13)
 3 | 
 4 | # Builds tensors from numpy arrays BEFORE split
 5 | x_tensor = torch.as_tensor(x).float()
 6 | y_tensor = torch.as_tensor(y).float()
 7 | 
 8 | # Builds dataset containing ALL data points
 9 | dataset = TensorDataset(x_tensor, y_tensor)
10 | 
11 | # Performs the split
12 | ratio = .8
13 | n_total = len(dataset)
14 | n_train = int(n_total * ratio)
15 | n_val = n_total - n_train
16 | 
17 | train_data, val_data = random_split(dataset, [n_train, n_val])
18 | 
19 | # Builds a loader of each set
20 | train_loader = DataLoader(dataset=train_data, batch_size=16, shuffle=True)
21 | val_loader = DataLoader(dataset=val_data, batch_size=16)
22 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: pytorchbook
 2 | channels:
 3 |   - conda-forge
 4 |   - pytorch
 5 |   - dglteam
 6 | dependencies:
 7 |   - python=3.10
 8 |   - pip
 9 |   - numpy
10 |   - matplotlib
11 |   - scikit-learn
12 |   - jupyter
13 |   - jupyterlab
14 |   - pytorch>=2.2.1
15 |   - torchvision>=0.17.1
16 |   - torchtext
17 |   - opencv
18 |   - librosa
19 |   - nb_conda_kernels
20 |   - pip:
21 |     - torchviz
22 |     - tensorboard
23 |     - jupyter-server-proxy
24 | 


--------------------------------------------------------------------------------
/helpers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch.utils.data import random_split, WeightedRandomSampler
  4 | 
  5 | def make_train_step_fn(model, loss_fn, optimizer):
  6 |     # Builds function that performs a step in the train loop
  7 |     def perform_train_step_fn(x, y):
  8 |         # Sets model to TRAIN mode
  9 |         model.train()
 10 |         
 11 |         # Step 1 - Computes our model's predicted output - forward pass
 12 |         yhat = model(x)
 13 |         # Step 2 - Computes the loss
 14 |         loss = loss_fn(yhat, y)
 15 |         # Step 3 - Computes gradients for both "a" and "b" parameters
 16 |         loss.backward()
 17 |         # Step 4 - Updates parameters using gradients and the learning rate
 18 |         optimizer.step()
 19 |         optimizer.zero_grad()
 20 |         
 21 |         # Returns the loss
 22 |         return loss.item()
 23 |     
 24 |     # Returns the function that will be called inside the train loop
 25 |     return perform_train_step_fn
 26 | 
 27 | def mini_batch(device, data_loader, step_fn):
 28 |     mini_batch_losses = []
 29 |     for x_batch, y_batch in data_loader:
 30 |         x_batch = x_batch.to(device)
 31 |         y_batch = y_batch.to(device)
 32 | 
 33 |         mini_batch_loss = step_fn(x_batch, y_batch)
 34 |         mini_batch_losses.append(mini_batch_loss)
 35 | 
 36 |     loss = np.mean(mini_batch_losses)
 37 |     return loss
 38 | 
 39 | def make_val_step_fn(model, loss_fn):
 40 |     # Builds function that performs a step in the validation loop
 41 |     def perform_val_step_fn(x, y):
 42 |         # Sets model to EVAL mode
 43 |         model.eval()
 44 |         
 45 |         # Step 1 - Computes our model's predicted output - forward pass
 46 |         yhat = model(x)
 47 |         # Step 2 - Computes the loss
 48 |         loss = loss_fn(yhat, y)
 49 |         # There is no need to compute Steps 3 and 4, since we don't update parameters during evaluation
 50 |         return loss.item()
 51 |     
 52 |     return perform_val_step_fn
 53 | 
 54 | def index_splitter(n, splits, seed=13):
 55 |     idx = torch.arange(n)
 56 |     # Makes the split argument a tensor
 57 |     splits_tensor = torch.as_tensor(splits)
 58 |     total = splits_tensor.sum().float()
 59 |     # If the total does not add up to one
 60 |     # divide every number by the total
 61 |     if not total.isclose(torch.ones(1)[0]):
 62 |         splits_tensor = splits_tensor / total
 63 |     # Uses PyTorch random_split to split the indices
 64 |     torch.manual_seed(seed)
 65 |     return random_split(idx, splits_tensor)
 66 | 
 67 | # def index_splitter(n, splits, seed=13):
 68 | #     idx = torch.arange(n)
 69 | #     # Makes the split argument a tensor
 70 | #     splits_tensor = torch.as_tensor(splits)
 71 | #     # Finds the correct multiplier, so we don't have
 72 | #     # to worry about summing up to N (or one)
 73 | #     multiplier = n / splits_tensor.sum()    
 74 | #     splits_tensor = (multiplier * splits_tensor).long()
 75 | #     # If there is a difference, throws at the first split
 76 | #     # so random_split does not complain
 77 | #     diff = n - splits_tensor.sum()
 78 | #     splits_tensor[0] += diff
 79 | #     # Uses PyTorch random_split to split the indices
 80 | #     torch.manual_seed(seed)
 81 | #     return random_split(idx, splits_tensor)
 82 | 
 83 | def make_balanced_sampler(y):
 84 |     # Computes weights for compensating imbalanced classes
 85 |     classes, counts = y.unique(return_counts=True)
 86 |     weights = 1.0 / counts.float()
 87 |     sample_weights = weights[y.squeeze().long()]
 88 |     # Builds sampler with compute weights
 89 |     generator = torch.Generator()
 90 |     sampler = WeightedRandomSampler(
 91 |         weights=sample_weights,
 92 |         num_samples=len(sample_weights),
 93 |         generator=generator,
 94 |         replacement=True
 95 |     )
 96 |     return sampler
 97 | 
 98 | def freeze_model(model):
 99 |     for parameter in model.parameters():
100 |         parameter.requires_grad = False
101 | 
102 | def preprocessed_dataset(model, loader, device=None):
103 |     if device is None:
104 |         device = next(model.parameters()).device
105 |     
106 |     features = None
107 |     labels = None
108 | 
109 |     for i, (x, y) in enumerate(loader):
110 |         model.eval()
111 |         x = x.to(device)
112 |         output = model(x)
113 |         if i == 0:
114 |             features = output.detach().cpu()
115 |             labels = y.cpu()
116 |         else:
117 |             features = torch.cat([features, output.detach().cpu()])
118 |             labels = torch.cat([labels, y.cpu()])
119 | 
120 |     dataset = TensorDataset(features, labels)
121 |     return dataset
122 | 
123 | def inception_loss(outputs, labels):
124 |     try:
125 |         main, aux = outputs
126 |     except ValueError:
127 |         main = outputs
128 |         aux = None
129 |         loss_aux = 0
130 |         
131 |     multi_loss_fn = nn.CrossEntropyLoss(reduction='mean')
132 |     loss_main = multi_loss_fn(main, labels)
133 |     if aux is not None:
134 |         loss_aux = multi_loss_fn(aux, labels)
135 |     return loss_main + 0.4 * loss_aux
136 | 
137 | 


--------------------------------------------------------------------------------
/images/1conv1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/1conv1.png


--------------------------------------------------------------------------------
/images/1conv2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/1conv2.png


--------------------------------------------------------------------------------
/images/3channel1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/3channel1.png


--------------------------------------------------------------------------------
/images/3channel2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/3channel2.png


--------------------------------------------------------------------------------
/images/3channel_filters1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/3channel_filters1.png


--------------------------------------------------------------------------------
/images/aiayn_dot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/aiayn_dot.png


--------------------------------------------------------------------------------
/images/aiayn_multihead.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/aiayn_multihead.png


--------------------------------------------------------------------------------
/images/alexnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/alexnet.png


--------------------------------------------------------------------------------
/images/alice_dorothy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/alice_dorothy.png


--------------------------------------------------------------------------------
/images/architecture_lenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/architecture_lenet.png


--------------------------------------------------------------------------------
/images/attention.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/attention.png


--------------------------------------------------------------------------------
/images/attn_narrow_2heads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/attn_narrow_2heads.png


--------------------------------------------------------------------------------
/images/attn_narrow_first_head.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/attn_narrow_first_head.png


--------------------------------------------------------------------------------
/images/attn_narrow_transf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/attn_narrow_transf.png


--------------------------------------------------------------------------------
/images/basic_attention.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/basic_attention.png


--------------------------------------------------------------------------------
/images/bert_embeddings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bert_embeddings.png


--------------------------------------------------------------------------------
/images/bert_input_embed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bert_input_embed.png


--------------------------------------------------------------------------------
/images/bert_mlm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bert_mlm.png


--------------------------------------------------------------------------------
/images/bert_nsp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bert_nsp.png


--------------------------------------------------------------------------------
/images/bidirect_rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bidirect_rnn.png


--------------------------------------------------------------------------------
/images/blank1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/blank1.png


--------------------------------------------------------------------------------
/images/blank2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/blank2.png


--------------------------------------------------------------------------------
/images/blank_center.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/blank_center.png


--------------------------------------------------------------------------------
/images/blank_end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/blank_end.png


--------------------------------------------------------------------------------
/images/block_tokens.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/block_tokens.png


--------------------------------------------------------------------------------
/images/book10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/book10.png


--------------------------------------------------------------------------------
/images/book9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/book9.png


--------------------------------------------------------------------------------
/images/bow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bow.png


--------------------------------------------------------------------------------
/images/cbow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/cbow.png


--------------------------------------------------------------------------------
/images/classification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/classification.png


--------------------------------------------------------------------------------
/images/classification_equiv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/classification_equiv.png


--------------------------------------------------------------------------------
/images/classification_relu2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/classification_relu2.png


--------------------------------------------------------------------------------
/images/classification_softmax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/classification_softmax.png


--------------------------------------------------------------------------------
/images/cls_hidden_state.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/cls_hidden_state.png


--------------------------------------------------------------------------------
/images/context_translate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/context_translate.png


--------------------------------------------------------------------------------
/images/context_vector.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/context_vector.png


--------------------------------------------------------------------------------
/images/conv1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv1.png


--------------------------------------------------------------------------------
/images/conv1_ma.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv1_ma.png


--------------------------------------------------------------------------------
/images/conv1d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv1d.png


--------------------------------------------------------------------------------
/images/conv1d_dilated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv1d_dilated.png


--------------------------------------------------------------------------------
/images/conv1d_edges.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv1d_edges.png


--------------------------------------------------------------------------------
/images/conv2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv2.png


--------------------------------------------------------------------------------
/images/conv3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv3.png


--------------------------------------------------------------------------------
/images/conv5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv5.png


--------------------------------------------------------------------------------
/images/conv6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv6.png


--------------------------------------------------------------------------------
/images/conv7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv7.png


--------------------------------------------------------------------------------
/images/conv8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv8.png


--------------------------------------------------------------------------------
/images/cross_attn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/cross_attn.png


--------------------------------------------------------------------------------
/images/dec_both.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/dec_both.png


--------------------------------------------------------------------------------
/images/decoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/decoder.png


--------------------------------------------------------------------------------
/images/decoder_self.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/decoder_self.png


--------------------------------------------------------------------------------
/images/decoder_self_simplified.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/decoder_self_simplified.png


--------------------------------------------------------------------------------
/images/dropout_paper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/dropout_paper.png


--------------------------------------------------------------------------------
/images/elmo_embed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/elmo_embed.png


--------------------------------------------------------------------------------
/images/elmo_lstm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/elmo_lstm.png


--------------------------------------------------------------------------------
/images/embed_arithmetic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/embed_arithmetic.png


--------------------------------------------------------------------------------
/images/enc_both.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/enc_both.png


--------------------------------------------------------------------------------
/images/enc_dec_attn_translate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/enc_dec_attn_translate.png


--------------------------------------------------------------------------------
/images/encdec_attn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encdec_attn.png


--------------------------------------------------------------------------------
/images/encdec_self_simplified.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encdec_self_simplified.png


--------------------------------------------------------------------------------
/images/encoded_distances.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoded_distances.png


--------------------------------------------------------------------------------
/images/encoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder.png


--------------------------------------------------------------------------------
/images/encoder_decoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder_decoder.png


--------------------------------------------------------------------------------
/images/encoder_lost_seq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder_lost_seq.png


--------------------------------------------------------------------------------
/images/encoder_self.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder_self.png


--------------------------------------------------------------------------------
/images/encoder_self_detail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder_self_detail.png


--------------------------------------------------------------------------------
/images/encoder_self_simplified.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder_self_simplified.png


--------------------------------------------------------------------------------
/images/fill1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/fill1.png


--------------------------------------------------------------------------------
/images/fill2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/fill2.png


--------------------------------------------------------------------------------
/images/full_transformer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/full_transformer.png


--------------------------------------------------------------------------------
/images/full_transformer_and_class.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/full_transformer_and_class.png


--------------------------------------------------------------------------------
/images/gru_cell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/gru_cell.png


--------------------------------------------------------------------------------
/images/inception_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/inception_model.png


--------------------------------------------------------------------------------
/images/inception_modules.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/inception_modules.png


--------------------------------------------------------------------------------
/images/kq_matches.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/kq_matches.png


--------------------------------------------------------------------------------
/images/layer_vs_batch_norm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/layer_vs_batch_norm.png


--------------------------------------------------------------------------------
/images/logistic_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/logistic_model.png


--------------------------------------------------------------------------------
/images/lstm_cell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/lstm_cell.png


--------------------------------------------------------------------------------
/images/multiattn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/multiattn.png


--------------------------------------------------------------------------------
/images/multihead_chunking.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/multihead_chunking.png


--------------------------------------------------------------------------------
/images/new_books.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/new_books.png


--------------------------------------------------------------------------------
/images/ngrams.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/ngrams.png


--------------------------------------------------------------------------------
/images/norm_first.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/norm_first.png


--------------------------------------------------------------------------------
/images/ohe1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/ohe1.png


--------------------------------------------------------------------------------
/images/ohe2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/ohe2.png


--------------------------------------------------------------------------------
/images/ohe3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/ohe3.png


--------------------------------------------------------------------------------
/images/packed_seq_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/packed_seq_data.png


--------------------------------------------------------------------------------
/images/packed_seq_inddata.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/packed_seq_inddata.png


--------------------------------------------------------------------------------
/images/padding1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/padding1.png


--------------------------------------------------------------------------------
/images/padding2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/padding2.png


--------------------------------------------------------------------------------
/images/padding3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/padding3.png


--------------------------------------------------------------------------------
/images/paddings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/paddings.png


--------------------------------------------------------------------------------
/images/paths.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/paths.png


--------------------------------------------------------------------------------
/images/pooling1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/pooling1.png


--------------------------------------------------------------------------------
/images/posenc_mod4mod8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_mod4mod8.png


--------------------------------------------------------------------------------
/images/posenc_modnorm4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_modnorm4.png


--------------------------------------------------------------------------------
/images/posenc_modnorm_deg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_modnorm_deg.png


--------------------------------------------------------------------------------
/images/posenc_modnorm_mult.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_modnorm_mult.png


--------------------------------------------------------------------------------
/images/posenc_modnorm_sincos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_modnorm_sincos.png


--------------------------------------------------------------------------------
/images/posenc_norm1k.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_norm1k.png


--------------------------------------------------------------------------------
/images/posenc_norm4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_norm4.png


--------------------------------------------------------------------------------
/images/posenc_norm4_long.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_norm4_long.png


--------------------------------------------------------------------------------
/images/residual.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/residual.png


--------------------------------------------------------------------------------
/images/rest_continuous.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rest_continuous.png


--------------------------------------------------------------------------------
/images/rest_discrete.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rest_discrete.png


--------------------------------------------------------------------------------
/images/rnn1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rnn1.png


--------------------------------------------------------------------------------
/images/rnn2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rnn2.png


--------------------------------------------------------------------------------
/images/rnn_cell_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rnn_cell_diagram.png


--------------------------------------------------------------------------------
/images/rnn_cell_diagram_seq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rnn_cell_diagram_seq.png


--------------------------------------------------------------------------------
/images/score_alignment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/score_alignment.png


--------------------------------------------------------------------------------
/images/score_alignment_translate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/score_alignment_translate.png


--------------------------------------------------------------------------------
/images/shifted_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/shifted_target.png


--------------------------------------------------------------------------------
/images/sincos_distance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/sincos_distance.png


--------------------------------------------------------------------------------
/images/stacked_encdec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/stacked_encdec.png


--------------------------------------------------------------------------------
/images/stacked_layers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/stacked_layers.png


--------------------------------------------------------------------------------
/images/stacked_rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/stacked_rnn.png


--------------------------------------------------------------------------------
/images/stride1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/stride1.png


--------------------------------------------------------------------------------
/images/strider2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/strider2.png


--------------------------------------------------------------------------------
/images/strider3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/strider3.png


--------------------------------------------------------------------------------
/images/sublayer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/sublayer.png


--------------------------------------------------------------------------------
/images/transf_classes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/transf_classes.png


--------------------------------------------------------------------------------
/images/transf_decself.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/transf_decself.png


--------------------------------------------------------------------------------
/images/transf_encdecself.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/transf_encdecself.png


--------------------------------------------------------------------------------
/images/transf_encself.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/transf_encself.png


--------------------------------------------------------------------------------
/images/translation_att.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/translation_att.png


--------------------------------------------------------------------------------
/images/vit_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/vit_model.png


--------------------------------------------------------------------------------
/images/w2v_cbow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/w2v_cbow.png


--------------------------------------------------------------------------------
/images/w2v_embed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/w2v_embed.png


--------------------------------------------------------------------------------
/images/w2v_logits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/w2v_logits.png


--------------------------------------------------------------------------------
/model_configuration/v0.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # This is redundant now, but it won't be when we introduce
 3 | # Datasets...
 4 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 5 | 
 6 | # Sets learning rate - this is "eta" ~ the "n"-like Greek letter
 7 | lr = 0.1
 8 | 
 9 | torch.manual_seed(42)
10 | # Now we can create a model and send it at once to the device
11 | model = nn.Sequential(nn.Linear(1, 1)).to(device)
12 | 
13 | # Defines a SGD optimizer to update the parameters 
14 | # (now retrieved directly from the model)
15 | optimizer = optim.SGD(model.parameters(), lr=lr)
16 | 
17 | # Defines a MSE loss function
18 | loss_fn = nn.MSELoss(reduction='mean')
19 | 


--------------------------------------------------------------------------------
/model_configuration/v1.py:
--------------------------------------------------------------------------------
 1 | 
 2 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 3 | 
 4 | # Sets learning rate - this is "eta" ~ the "n" like Greek letter
 5 | lr = 0.1
 6 | 
 7 | torch.manual_seed(42)
 8 | # Now we can create a model and send it at once to the device
 9 | model = nn.Sequential(nn.Linear(1, 1)).to(device)
10 | 
11 | # Defines a SGD optimizer to update the parameters (now retrieved directly from the model)
12 | optimizer = optim.SGD(model.parameters(), lr=lr)
13 | 
14 | # Defines a MSE loss function
15 | loss_fn = nn.MSELoss(reduction='mean')
16 | 
17 | # Creates the train_step function for our model, loss function and optimizer
18 | train_step_fn = make_train_step_fn(model, loss_fn, optimizer)
19 | 


--------------------------------------------------------------------------------
/model_configuration/v2.py:
--------------------------------------------------------------------------------
 1 | 
 2 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 3 | 
 4 | # Sets learning rate - this is "eta" ~ the "n" like Greek letter
 5 | lr = 0.1
 6 | 
 7 | torch.manual_seed(42)
 8 | # Now we can create a model and send it at once to the device
 9 | model = nn.Sequential(nn.Linear(1, 1)).to(device)
10 | 
11 | # Defines a SGD optimizer to update the parameters (now retrieved directly from the model)
12 | optimizer = optim.SGD(model.parameters(), lr=lr)
13 | 
14 | # Defines a MSE loss function
15 | loss_fn = nn.MSELoss(reduction='mean')
16 | 
17 | # Creates the train_step function for our model, loss function and optimizer
18 | train_step_fn = make_train_step_fn(model, loss_fn, optimizer)
19 | 
20 | # Creates the val_step function for our model and loss function
21 | val_step_fn = make_val_step_fn(model, loss_fn)
22 | 


--------------------------------------------------------------------------------
/model_configuration/v3.py:
--------------------------------------------------------------------------------
 1 | 
 2 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 3 | 
 4 | # Sets learning rate - this is "eta" ~ the "n" like Greek letter
 5 | lr = 0.1
 6 | 
 7 | torch.manual_seed(42)
 8 | # Now we can create a model and send it at once to the device
 9 | model = nn.Sequential(nn.Linear(1, 1)).to(device)
10 | 
11 | # Defines a SGD optimizer to update the parameters (now retrieved directly from the model)
12 | optimizer = optim.SGD(model.parameters(), lr=lr)
13 | 
14 | # Defines a MSE loss function
15 | loss_fn = nn.MSELoss(reduction='mean')
16 | 
17 | # Creates the train_step function for our model, loss function and optimizer
18 | train_step_fn = make_train_step_fn(model, loss_fn, optimizer)
19 | 
20 | # Creates the val_step function for our model and loss function
21 | val_step_fn = make_val_step_fn(model, loss_fn)
22 | 
23 | # Creates a Summary Writer to interface with TensorBoard
24 | writer = SummaryWriter('runs/simple_linear_regression')
25 | 
26 | # Fetches a single mini-batch so we can use add_graph
27 | x_sample, y_sample = next(iter(train_loader))
28 | writer.add_graph(model, x_sample.to(device))
29 | 


--------------------------------------------------------------------------------
/model_configuration/v4.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Sets learning rate - this is "eta" ~ the "n" like Greek letter
 3 | lr = 0.1
 4 | 
 5 | torch.manual_seed(42)
 6 | # Now we can create a model and send it at once to the device
 7 | model = nn.Sequential(nn.Linear(1, 1))
 8 | 
 9 | # Defines a SGD optimizer to update the parameters
10 | # (now retrieved directly from the model)
11 | optimizer = optim.SGD(model.parameters(), lr=lr)
12 | 
13 | # Defines a MSE loss function
14 | loss_fn = nn.MSELoss(reduction='mean')
15 | 


--------------------------------------------------------------------------------
/model_training/v0.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Defines number of epochs
 3 | n_epochs = 1000
 4 | 
 5 | for epoch in range(n_epochs):
 6 |     # Sets model to TRAIN mode
 7 |     model.train()
 8 | 
 9 |     # Step 1 - Computes model's predicted output - forward pass
10 |     yhat = model(x_train_tensor)
11 |     
12 |     # Step 2 - Computes the loss
13 |     loss = loss_fn(yhat, y_train_tensor)
14 | 
15 |     # Step 3 - Computes gradients for both "b" and "w" parameters
16 |     loss.backward()
17 |     
18 |     # Step 4 - Updates parameters using gradients and 
19 |     # the learning rate
20 |     optimizer.step()
21 |     optimizer.zero_grad()
22 | 


--------------------------------------------------------------------------------
/model_training/v1.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Defines number of epochs
 3 | n_epochs = 1000
 4 | 
 5 | losses = []
 6 | 
 7 | # For each epoch...
 8 | for epoch in range(n_epochs):
 9 |     # Performs one train step and returns the corresponding loss
10 |     loss = train_step_fn(x_train_tensor, y_train_tensor)
11 |     losses.append(loss)
12 | 


--------------------------------------------------------------------------------
/model_training/v2.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Defines number of epochs
 3 | n_epochs = 1000
 4 | 
 5 | losses = []
 6 | 
 7 | # For each epoch...
 8 | for epoch in range(n_epochs):
 9 |     # inner loop
10 |     mini_batch_losses = []
11 |     for x_batch, y_batch in train_loader:
12 |         # the dataset "lives" in the CPU, so do our mini-batches
13 |         # therefore, we need to send those mini-batches to the
14 |         # device where the model "lives"
15 |         x_batch = x_batch.to(device)
16 |         y_batch = y_batch.to(device)
17 | 
18 |         # Performs one train step and returns the corresponding loss 
19 |         # for this mini-batch
20 |         mini_batch_loss = train_step_fn(x_batch, y_batch)
21 |         mini_batch_losses.append(mini_batch_loss)
22 | 
23 |     # Computes average loss over all mini-batches - that's the epoch loss
24 |     loss = np.mean(mini_batch_losses)
25 |     
26 |     losses.append(loss)
27 | 


--------------------------------------------------------------------------------
/model_training/v3.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Defines number of epochs
 3 | n_epochs = 200
 4 | 
 5 | losses = []
 6 | 
 7 | for epoch in range(n_epochs):
 8 |     # inner loop
 9 |     loss = mini_batch(device, train_loader, train_step_fn)
10 |     losses.append(loss)
11 | 


--------------------------------------------------------------------------------
/model_training/v4.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Defines number of epochs
 3 | n_epochs = 200
 4 | 
 5 | losses = []
 6 | val_losses = []
 7 | 
 8 | for epoch in range(n_epochs):
 9 |     # inner loop
10 |     loss = mini_batch(device, train_loader, train_step_fn)
11 |     losses.append(loss)
12 |     
13 |     # VALIDATION
14 |     # no gradients in validation!
15 |     with torch.no_grad():
16 |         val_loss = mini_batch(device, val_loader, val_step_fn)
17 |         val_losses.append(val_loss)    
18 | 


--------------------------------------------------------------------------------
/model_training/v5.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Defines number of epochs
 3 | n_epochs = 200
 4 | 
 5 | losses = []
 6 | val_losses = []
 7 | 
 8 | for epoch in range(n_epochs):
 9 |     # inner loop
10 |     loss = mini_batch(device, train_loader, train_step_fn)
11 |     losses.append(loss)
12 |     
13 |     # VALIDATION
14 |     # no gradients in validation!
15 |     with torch.no_grad():
16 |         val_loss = mini_batch(device, val_loader, val_step_fn)
17 |         val_losses.append(val_loss)
18 |     
19 |     # Records both losses for each epoch under the main tag "loss"
20 |     writer.add_scalars(main_tag='loss',
21 |                        tag_scalar_dict={'training': loss, 'validation': val_loss},
22 |                        global_step=epoch)
23 | 
24 | # Closes the writer
25 | writer.close()
26 | 


--------------------------------------------------------------------------------
/plots/chapter1.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from sklearn.linear_model import LinearRegression
 4 | plt.style.use('fivethirtyeight')
 5 | 
 6 | def fit_model(x_train, y_train):
 7 |     # Fits a linear regression to find the actual b and w that minimize the loss
 8 |     regression = LinearRegression()
 9 |     regression.fit(x_train, y_train)
10 |     b_minimum, w_minimum = regression.intercept_[0], regression.coef_[0][0]
11 |     return b_minimum, w_minimum
12 | 
13 | def figure1(x_train, y_train, x_val, y_val):
14 |     fig, ax = plt.subplots(1, 2, figsize=(12, 6))
15 |     
16 |     ax[0].scatter(x_train, y_train)
17 |     ax[0].set_xlabel('x')
18 |     ax[0].set_ylabel('y')
19 |     ax[0].set_ylim([0, 3.1])
20 |     ax[0].set_title('Generated Data - Train')
21 | 
22 |     ax[1].scatter(x_val, y_val, c='r')
23 |     ax[1].set_xlabel('x')
24 |     ax[1].set_ylabel('y')
25 |     ax[1].set_ylim([0, 3.1])
26 |     ax[1].set_title('Generated Data - Validation')
27 |     fig.tight_layout()
28 |     
29 |     return fig, ax
30 | 
31 | def figure3(x_train, y_train):
32 |     b_minimum, w_minimum = fit_model(x_train, y_train)
33 |     # Generates evenly spaced x feature
34 |     x_range = np.linspace(0, 1, 101)
35 |     # Computes yhat
36 |     yhat_range = b_minimum + w_minimum * x_range
37 | 
38 |     fig, ax = plt.subplots(1, 1, figsize=(6, 6))
39 |     ax.set_xlabel('x')
40 |     ax.set_ylabel('y')
41 |     ax.set_ylim([0, 3.1])
42 | 
43 |     # Dataset
44 |     ax.scatter(x_train, y_train)
45 |     # Predictions
46 |     ax.plot(x_range, yhat_range, label='Final model\'s predictions', c='k', linestyle='--')
47 | 
48 |     # Annotations
49 |     ax.annotate('b = {:.4f} w = {:.4f}'.format(b_minimum, w_minimum), xy=(.4, 1.5), c='k', rotation=34)
50 |     ax.legend(loc=0)
51 |     fig.tight_layout()
52 |     return fig, ax
53 | 


--------------------------------------------------------------------------------
/plots/chapter10.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from matplotlib import pyplot as plt
  4 | 
  5 | def hist_encoding(encoding):
  6 |     encoding = encoding.cpu().detach().numpy()
  7 |     fig, axs = plt.subplots(1, 4, figsize=(15, 4))
  8 |     axs = axs.flatten()
  9 |     for i in range(4):
 10 |         data_point = encoding[i][0]
 11 |         axs[i].hist(data_point, bins=np.linspace(-3, 3, 15), alpha=.5)
 12 |         axs[i].set_xlabel(f'Data Point #{i}')
 13 |         axs[i].set_ylabel('# of features')
 14 |         axs[i].set_title(f'mean={data_point.mean():.4f}\n var={data_point.var():.4f}', fontsize=16)
 15 |         axs[i].set_ylim([0, 10])
 16 |         axs[i].label_outer()
 17 |     fig.tight_layout()    
 18 |     return fig
 19 | 
 20 | def hist_layer_normed(encoding, normed):
 21 |     encoding = encoding.cpu().detach().numpy()
 22 |     normed = normed.cpu().detach()
 23 |     fig, axs = plt.subplots(1, 4, figsize=(15, 4))
 24 |     for i in range(4):
 25 |         data_point = encoding[i][0]
 26 |         normed_point = normed.detach()[i][0]
 27 |         axs[i].hist(data_point, bins=np.linspace(-3, 3, 15), alpha=.5, label='Original')
 28 |         axs[i].hist(normed_point.numpy(), bins=np.linspace(-3, 3, 15), alpha=.5, label='Standardized')
 29 |         axs[i].set_xlabel(f'Data Point #{i}')
 30 |         axs[i].set_ylabel('# of features')
 31 |         axs[i].set_title(f'mean={normed.mean().numpy():.4f}\n std={normed.std(unbiased=False).numpy():.4f}', fontsize=16)
 32 |         axs[i].legend()
 33 |         axs[i].set_ylim([0, 80])
 34 |         axs[i].label_outer()
 35 |     fig.tight_layout()
 36 |     return fig
 37 | 
 38 | def plot_patches(patches, kernel_size=3):
 39 |     n, p1, p2, v = patches.shape
 40 |     fig, axs = plt.subplots(p1, p2, figsize=(3, 3))
 41 |     for i in range(p1):
 42 |         for j in range(p2):
 43 |             axs[i, j].imshow(patches.squeeze()[i, j].view(kernel_size, -1).cpu().detach().numpy(), cmap=plt.cm.gray)
 44 |             axs[i, j].grid(False)
 45 |             axs[i, j].set_xticklabels([])
 46 |             axs[i, j].set_yticklabels([])
 47 |     return fig
 48 | 
 49 | def plot_seq_patches(seq_patches):
 50 |     seq_patches = seq_patches.cpu().detach().numpy()
 51 |     fig, axs = plt.subplots(1, seq_patches.shape[0], figsize=(3.5, 4))
 52 |     for i in range(seq_patches.shape[0]):
 53 |         axs[i].imshow(seq_patches[i].reshape(-1, 1), cmap=plt.cm.gray)
 54 |         axs[i].grid(False)
 55 |         axs[i].set_xticklabels([])
 56 |         axs[i].set_xlabel(i)
 57 |         axs[i].set_ylabel('Features')
 58 |         axs[i].label_outer()
 59 |     fig.suptitle('Sequence')
 60 |     fig.tight_layout(pad=0.3)
 61 |     fig.subplots_adjust(top=0.9)
 62 |     return fig
 63 | 
 64 | def plot_seq_patches_transp(seq_patches, add_cls=False, title=None):
 65 |     seq_patches = seq_patches.cpu().detach().numpy()
 66 |     seq_patches = np.atleast_3d(seq_patches)
 67 |     n, l, d = seq_patches.shape
 68 |     fig, saxs = plt.subplots(1+seq_patches.shape[1]+add_cls, n, figsize=(n*6, 6), sharex=True)
 69 |     
 70 |     if title is None:
 71 |         title = 'Sequence'
 72 |     for seq_n in range(n):
 73 |         axs = saxs[:, seq_n]
 74 |         if add_cls:
 75 |             sub_patches = np.concatenate([np.zeros_like(seq_patches[seq_n, :1]), seq_patches[seq_n]])
 76 |         else:
 77 |             sub_patches = seq_patches[seq_n]
 78 |         axs[0].text(4, 1, f'{title} #{seq_n}', fontsize=16)
 79 |         axs[0].grid(False)
 80 |         axs[0].set_yticks([])
 81 |         for i in range(sub_patches.shape[0]):
 82 |             label = i
 83 |             if add_cls:
 84 |                 label = i-1 if i > 0 else '[CLS]'
 85 |             axs[i+1].imshow(sub_patches[i].reshape(1, -1), cmap=plt.cm.gray)
 86 |             axs[i+1].set_yticklabels([label], rotation=0)
 87 |             axs[i+1].grid(False)
 88 |             axs[i+1].set_yticks([0])
 89 |             #axs[i+1].set_ylabel(i, rotation=0)
 90 |             #axs[i+1].label_outer()
 91 |         axs[-1].set_xlabel('Features')
 92 |     fig.tight_layout()
 93 |     return fig
 94 | 
 95 | def plot_images(imgs, title=True):
 96 |     imgs = imgs.squeeze(1).cpu().detach().numpy()
 97 |     imgs = np.atleast_3d(imgs)
 98 |     fig, axs = plt.subplots(1, imgs.shape[0], figsize=(6, 3))
 99 |     if imgs.shape[0] == 1:
100 |         axs = [axs]
101 |     for i in range(imgs.shape[0]):
102 |         axs[i].imshow(imgs[i], cmap=plt.cm.gray)
103 |         axs[i].grid(False)
104 |         axs[i].set_xticks([])
105 |         axs[i].set_yticks([])
106 |         if title:
107 |             axs[i].set_title(f'Image #{i}')
108 |         
109 |     return fig


--------------------------------------------------------------------------------
/plots/chapter11.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib
 3 | from matplotlib import pyplot as plt
 4 | 
 5 | def plot_word_vectors(wv, words, other=None):
 6 |     vectors = []
 7 |     for word in words:
 8 |         try:
 9 |             vectors.append(wv[word])
10 |         except KeyError:
11 |             if other is not None:
12 |                 vectors.append(other[word])
13 |     
14 |     vectors = np.array(vectors)
15 | 
16 |     fig, axs = plt.subplots(len(words), 1, figsize=(18, len(words)*.7))
17 |     if len(words) == 1:
18 |         axs = [axs]
19 |     
20 |     for i, word in enumerate(words):
21 |         axs[i].imshow(vectors[i].reshape(1, -1), cmap=plt.cm.RdBu, vmin=vectors.min(), vmax=vectors.max())
22 |         axs[i].set_xticklabels([])
23 |         axs[i].set_yticklabels(['', word, ''])
24 |         axs[i].grid(False)
25 |         
26 |     fig.tight_layout()
27 |     return fig
28 | 
29 | def plot_attention(tokens, alphas):
30 |     n_tokens = max(list(map(len, tokens)))
31 |     batch_size, n_heads, _ = alphas[:, :, 0, :].shape
32 |     alphas = alphas.detach().cpu().numpy()[:, :, 0, :n_tokens]
33 |     fig, axs = plt.subplots(n_heads, batch_size, figsize=(n_tokens * batch_size, n_heads))
34 | 
35 |     textcolors=["white", "black"]
36 |     kw = dict(horizontalalignment="center", verticalalignment="center")
37 |     valfmt = matplotlib.ticker.StrMethodFormatter("{x:.2f}")
38 | 
39 |     for i, axr in enumerate(axs): # row
40 |         for j, ax in enumerate(axr): # col
41 |             data = alphas[j, i]
42 |             im = ax.imshow(np.array(data.tolist()).reshape(1,-1), vmin=0, vmax=1, cmap=plt.cm.gray)
43 |             ax.grid(False) 
44 |             if i == 0:
45 |                 ax.set_xticks(np.arange(len(tokens[j])))
46 |                 ax.set_xticklabels(tokens[j])
47 |             else:
48 |                 ax.set_xticks([])
49 |             ax.set_yticks([-.5, 0, .5], minor=True)
50 |             ax.set_yticklabels(['', f'Head #{i}', ''])
51 |             ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False)
52 | 
53 |             for jp in range(data.shape[0]):
54 |                 kw.update(color=textcolors[int(im.norm(data[jp]) > .5)])
55 |                 text = im.axes.text(jp, 0, valfmt(data[jp], None), **kw)
56 |     return fig


--------------------------------------------------------------------------------
/plots/chapter2.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | plt.style.use('fivethirtyeight')
 4 | 
 5 | def plot_losses(losses, val_losses):
 6 |     fig = plt.figure(figsize=(10, 4))
 7 |     plt.plot(losses, label='Training Loss', c='b')
 8 |     plt.plot(val_losses, label='Validation Loss', c='r')
 9 |     plt.yscale('log')
10 |     plt.xlabel('Epochs')
11 |     plt.ylabel('Loss')
12 |     plt.legend()
13 |     plt.tight_layout()
14 |     return fig
15 | 
16 | 
17 | def plot_resumed_losses(saved_epoch, saved_losses, saved_val_losses, n_epochs, losses, val_losses):
18 |     range_before = range(0, saved_epoch)
19 |     range_after = range(saved_epoch, saved_epoch + n_epochs)
20 | 
21 |     fig = plt.figure(figsize=(10, 4))
22 |     # Checkpointed loss
23 |     plt.plot(range_before, saved_losses, 
24 |              label='Checkpointed Training Loss', c='b', linestyle='--')
25 |     plt.plot(range_before, saved_val_losses, 
26 |              label='Checkpointed Validation Loss', c='r', linestyle='--')
27 |     # Losses after resuming
28 |     plt.plot(range_after, losses, label='Training Loss', c='b')
29 |     plt.plot(range_after, val_losses, label='Validation Loss', c='r')
30 |     # Divider
31 |     plt.plot([saved_epoch, saved_epoch],
32 |              [np.min(saved_losses + losses), np.max(saved_losses + losses)], 
33 |              c='k', linewidth=1, linestyle='--', label='Checkpoint')
34 |     plt.yscale('log')
35 |     plt.xlabel('Epochs')
36 |     plt.ylabel('Loss')
37 |     plt.legend()
38 |     plt.tight_layout()
39 |     return fig


--------------------------------------------------------------------------------
/plots/chapter2_1.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | plt.style.use('fivethirtyeight')
 4 | 
 5 | def figure1(x, y):
 6 |     fig, ax = plt.subplots(1, 1, figsize=(6, 6))
 7 | 
 8 |     ax.scatter(x, y)
 9 |     ax.set_xlabel('x')
10 |     ax.set_ylabel('y')
11 |     ax.set_ylim([0, 3.1])
12 |     ax.set_title('Generated Data - Full Dataset')
13 |     fig.tight_layout()
14 |     return fig


--------------------------------------------------------------------------------
/plots/chapter3.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | from operator import itemgetter
  5 | from mpl_toolkits.mplot3d import Axes3D
  6 | from matplotlib.colors import ListedColormap
  7 | from sklearn.metrics import confusion_matrix, roc_curve, precision_recall_curve
  8 | plt.style.use('fivethirtyeight')
  9 | 
 10 | def odds(prob):
 11 |     return prob / (1 - prob)
 12 | 
 13 | def log_odds(prob):
 14 |     return np.log(odds(prob))
 15 | 
 16 | def sigmoid(z):
 17 |     return 1 / (1 + np.exp(-z))
 18 | 
 19 | def split_cm(cm):
 20 |     # Actual negatives go in the top row, 
 21 |     # above the probability line
 22 |     actual_negative = cm[0]
 23 |     # Predicted negatives go in the first column
 24 |     tn = actual_negative[0]
 25 |     # Predicted positives go in the second column
 26 |     fp = actual_negative[1]
 27 | 
 28 |     # Actual positives go in the bottow row, 
 29 |     # below the probability line
 30 |     actual_positive = cm[1]
 31 |     # Predicted negatives go in the first column
 32 |     fn = actual_positive[0]
 33 |     # Predicted positives go in the second column
 34 |     tp = actual_positive[1]
 35 |     
 36 |     return tn, fp, fn, tp
 37 | 
 38 | def tpr_fpr(cm):
 39 |     tn, fp, fn, tp = split_cm(cm)
 40 |     
 41 |     tpr = tp / (tp + fn)
 42 |     fpr = fp / (fp + tn)
 43 |     
 44 |     return tpr, fpr
 45 | 
 46 | def precision_recall(cm):
 47 |     tn, fp, fn, tp = split_cm(cm)
 48 |     
 49 |     precision = tp / (tp + fp)
 50 |     recall = tp / (tp + fn)
 51 |     
 52 |     return precision, recall
 53 | 
 54 | def probability_line(ax, y, probs, threshold, shift=0.0, annot=False, colors=None):
 55 |     if colors is None:
 56 |         colors = ['r', 'b']
 57 |     ax.grid(False)
 58 |     ax.set_ylim([-.1, .1])
 59 |     ax.axes.get_yaxis().set_visible(False)
 60 |     ax.plot([0, 1], [0, 0], linewidth=2, c='k', zorder=1)
 61 |     ax.plot([0, 0], [-.1, .1], c='k', zorder=1)
 62 |     ax.plot([1, 1], [-.1, .1], c='k', zorder=1)
 63 | 
 64 |     tn = (y == 0) & (probs < threshold)
 65 |     fn = (y == 0) & (probs >= threshold)
 66 |     tp = (y == 1) & (probs >= threshold)
 67 |     fp = (y == 1) & (probs < threshold)
 68 | 
 69 |     ax.plot([threshold, threshold], [-.1, .1], c='k', zorder=1, linestyle='--')
 70 |     ax.scatter(probs[tn], np.zeros(tn.sum()) + shift, c=colors[0], s=150, zorder=2, edgecolor=colors[0], linewidth=3)
 71 |     ax.scatter(probs[fn], np.zeros(fn.sum()) + shift, c=colors[0], s=150, zorder=2, edgecolor=colors[1], linewidth=3)
 72 | 
 73 |     ax.scatter(probs[tp], np.zeros(tp.sum()) - shift, c=colors[1], s=150, zorder=2, edgecolor=colors[1], linewidth=3)
 74 |     ax.scatter(probs[fp], np.zeros(fp.sum()) - shift, c=colors[1], s=150, zorder=2, edgecolor=colors[0], linewidth=3)
 75 | 
 76 |     ax.set_xlabel(r'$\sigma(z) = P(y=1)$')
 77 |     ax.set_title('Threshold = {}'.format(threshold))
 78 | 
 79 |     if annot:
 80 |         ax.annotate('TN', xy=(.20, .03), c='k', weight='bold', fontsize=20)
 81 |         ax.annotate('FN', xy=(.20, -.08), c='k', weight='bold', fontsize=20)
 82 |         ax.annotate('FP', xy=(.70, .03), c='k', weight='bold', fontsize=20)
 83 |         ax.annotate('TP', xy=(.70, -.08), c='k', weight='bold', fontsize=20)
 84 |     return ax
 85 | 
 86 | def probability_contour(ax, model, device, X, y, threshold, cm=None, cm_bright=None):
 87 |     if cm is None:
 88 |         cm = plt.cm.RdBu
 89 |     if cm_bright is None:
 90 |         cm_bright = ListedColormap(['#FF0000', '#0000FF'])
 91 | 
 92 |     h = .02  # step size in the mesh
 93 | 
 94 |     x_min, x_max = -2.25, 2.25
 95 |     y_min, y_max = -2.25, 2.25
 96 | 
 97 |     xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
 98 |                          np.arange(y_min, y_max, h))
 99 | 
100 |     logits = model(torch.as_tensor(np.c_[xx.ravel(), yy.ravel()]).float().to(device))
101 |     logits = logits.detach().cpu().numpy().reshape(xx.shape)
102 | 
103 |     yhat = sigmoid(logits)
104 | 
105 |     ax.contour(xx, yy, yhat, levels=[threshold], cmap="Greys", vmin=0, vmax=1)
106 |     contour = ax.contourf(xx, yy, yhat, 25, cmap=cm, alpha=.8, vmin=0, vmax=1)
107 |     # Plot the training points
108 |     ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright, edgecolors='k')
109 |     # Plot the testing points
110 |     #ax.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=cm_bright, edgecolors='k', alpha=0.6)
111 | 
112 |     ax.set_xlim(xx.min(), xx.max())
113 |     ax.set_ylim(yy.min(), yy.max())
114 |     ax.set_xlabel(r'$X_1$')
115 |     ax.set_ylabel(r'$X_2$')
116 |     ax.set_title(r'$\sigma(z) = P(y=1)$')
117 |     ax.grid(False)
118 | 
119 |     ax_c = plt.colorbar(contour)
120 |     ax_c.set_ticks([0, .25, .5, .75, 1])
121 |     return ax
122 | 
123 | def eval_curves_from_probs(y, probabilities, threshs, line=False, annot=False):
124 |     cms = [confusion_matrix(y, (probabilities >= threshold)) for threshold in threshs]
125 |     rates = np.array(list(map(tpr_fpr, cms)))
126 |     precrec = np.array(list(map(precision_recall, cms)))
127 |     return eval_curves(rates[:, 1], rates[:, 0], precrec[:, 1], precrec[:, 0], threshs, line=line, annot=annot)    
128 | 
129 | def eval_curves(fprs, tprs, recalls, precisions, thresholds, thresholds2=None, line=False, annot=False):
130 |     fig, axs = plt.subplots(1, 2, figsize=(10, 5))
131 |     
132 |     if thresholds2 is None:
133 |         thresholds2 = thresholds[:]
134 | 
135 |     marker = '.r-' if line else '.r'
136 |     
137 |     axs[0].plot(fprs, tprs, marker, markersize=12, linewidth=2)
138 |     axs[0].set_xlim([-.05, 1.05])
139 |     axs[0].set_ylim([-.05, 1.05])
140 |     axs[0].set_xlabel('False Positive Rate')
141 |     axs[0].set_ylabel('True Positive Rate')
142 |     axs[0].set_title('ROC Curve')
143 | 
144 |     axs[1].plot(recalls, precisions, marker, markersize=12, linewidth=2)
145 |     axs[1].set_xlim([-.05, 1.05])
146 |     axs[1].set_ylim([-.05, 1.05])
147 |     axs[1].set_xlabel('Recall')
148 |     axs[1].set_ylabel('Precision')
149 |     axs[1].set_title('Precision-Recall Curve')
150 |     
151 |     if annot:
152 |         for thresh, fpr, tpr, prec, rec in zip(thresholds, fprs, tprs, precisions, recalls):
153 |             axs[0].annotate(str(thresh), xy=(fpr - .03, tpr - .07))
154 |             
155 |         for thresh, fpr, tpr, prec, rec in zip(thresholds2, fprs, tprs, precisions, recalls):
156 |             axs[1].annotate(str(thresh), xy=(rec - .03, prec - .07))
157 | 
158 |     fig.tight_layout()
159 |     return fig
160 | 
161 | 
162 | 
163 | def figure1(X_train, y_train, X_val, y_val, cm_bright=None):
164 |     if cm_bright is None:
165 |         cm_bright = ListedColormap(['#FF0000', '#0000FF'])
166 | 
167 |     fig, ax = plt.subplots(1, 2, figsize=(12, 6))
168 | 
169 |     ax[0].scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)#, edgecolors='k')
170 |     ax[0].set_xlabel(r'$X_1$')
171 |     ax[0].set_ylabel(r'$X_2$')
172 |     ax[0].set_xlim([-2.3, 2.3])
173 |     ax[0].set_ylim([-2.3, 2.3])
174 |     ax[0].set_title('Generated Data - Train')
175 | 
176 |     ax[1].scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=cm_bright)#, edgecolors='k')
177 |     ax[1].set_xlabel(r'$X_1$')
178 |     ax[1].set_ylabel(r'$X_2$')
179 |     ax[1].set_xlim([-2.3, 2.3])
180 |     ax[1].set_ylim([-2.3, 2.3])
181 |     ax[1].set_title('Generated Data - Validation')
182 |     fig.tight_layout()
183 |     
184 |     return fig
185 | 
186 | def figure2(prob1):
187 |     fig, ax = plt.subplots(1, 2, figsize=(10, 5))
188 |     prob = np.linspace(.01, .99, 99)
189 | 
190 |     for i in [0, 1]:
191 |         ax[i].plot(prob, odds(prob), linewidth=2)
192 |         ax[i].set_xlabel('Probability')
193 |         if i:
194 |             ax[i].set_yscale('log')
195 |             ax[i].set_ylabel('Odds Ratio (log scale)')
196 |             ax[i].set_title('Odds Ratio (log scale)')
197 |         else:
198 |             ax[i].set_ylabel('Odds Ratio')
199 |             ax[i].set_title('Odds Ratio')
200 |         ax[i].scatter([prob1, .5, (1-prob1)], [odds(prob1), odds(.5), odds(1-prob1)], c='r')
201 | 
202 |     fig.tight_layout()
203 |     
204 |     return fig
205 | 
206 | def figure3(prob1):
207 |     fig, ax = plt.subplots(1, 2, figsize=(10, 5))
208 |     prob = np.linspace(.01, .99, 99)
209 | 
210 |     ax[0].plot(prob, log_odds(prob), linewidth=2)
211 |     ax[0].set_xlabel('Probability')
212 |     ax[0].set_ylabel('Log Odds Ratio')
213 |     ax[0].set_title('Log Odds Ratio')
214 |     ax[0].scatter([prob1, .5, (1-prob1)], [log_odds(prob1), log_odds(.5), log_odds(1-prob1)], c='r')
215 | 
216 |     ax[1].plot(log_odds(prob), prob, linewidth=2)
217 |     ax[1].set_ylabel('Probability')
218 |     ax[1].set_xlabel('Log Odds Ratio')
219 |     ax[1].set_title('Probability')
220 |     ax[1].scatter([log_odds(prob1), log_odds(.5), log_odds(1-prob1)], [prob1, .5, (1-prob1)], c='r')
221 |     fig.tight_layout()
222 | 
223 |     return fig
224 | 
225 | def figure4(prob1):
226 |     fig, ax = plt.subplots(1, 1, figsize=(5, 5))
227 |     prob = np.linspace(.01, .99, 99)
228 | 
229 |     ax.plot(log_odds(prob), prob, linewidth=2, c='r')
230 |     ax.set_ylabel('Probability')
231 |     ax.set_xlabel('Log Odds Ratio')
232 |     ax.set_title('Sigmoid')
233 |     ax.scatter([log_odds(prob1), log_odds(.5), log_odds(1-prob1)], [prob1, .5, (1-prob1)], c='r')
234 |     fig.tight_layout()
235 | 
236 |     return fig
237 | 
238 | def figure7(X, y, model, device, cm=None, cm_bright=None):
239 |     if cm is None:
240 |         cm = plt.cm.RdBu
241 |     if cm_bright is None:
242 |         cm_bright = ListedColormap(['#FF0000', '#0000FF'])
243 |     fig = plt.figure(figsize=(15, 4.5))
244 | 
245 |     h = .02  # step size in the mesh
246 | 
247 |     # x_min, x_max = X_train[:, 0].min() - .5, X_train[:, 0].max() + .5
248 |     # y_min, y_max = X_train[:, 1].min() - .5, X_train[:, 1].max() + .5
249 |     
250 |     x_min, x_max = -2.25, 2.25
251 |     y_min, y_max = -2.25, 2.25
252 |     
253 |     xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
254 |                          np.arange(y_min, y_max, h))
255 | 
256 |     logits = model(torch.as_tensor(np.c_[xx.ravel(), yy.ravel()]).float().to(device))
257 |     logits = logits.detach().cpu().numpy().reshape(xx.shape)
258 | 
259 |     yhat = sigmoid(logits)
260 | 
261 |     # 1st plot
262 |     ax = plt.subplot(1, 3, 1)
263 | 
264 |     contour = ax.contourf(xx, yy, logits, 25, cmap=cm, alpha=.8)
265 |     # Plot the training points
266 |     ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright)
267 |     # Plot the testing points
268 |     #ax.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=cm_bright, edgecolors='k', alpha=0.6)
269 | 
270 |     ax.set_xlim(xx.min(), xx.max())
271 |     ax.set_ylim(yy.min(), yy.max())
272 |     ax.set_xlabel(r'$X_1$')
273 |     ax.set_ylabel(r'$X_2$')
274 |     ax.set_title(r'$z = b + w_1x_1 + w_2x_2$')
275 |     ax.grid(False)
276 |     ax_c = plt.colorbar(contour)
277 |     ax_c.set_label("$z$", rotation=0)
278 | 
279 |     # 2nd plot
280 |     ax = fig.add_subplot(1, 3, 2, projection='3d')
281 | 
282 |     surf = ax.plot_surface(xx, yy, yhat, rstride=1, cstride=1, alpha=.5, cmap=cm, linewidth=0, antialiased=True, vmin=0, vmax=1)
283 |     # Plot the training points
284 |     ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright)
285 |     # Plot the testing points
286 |     #ax.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=cm_bright, edgecolors='k', alpha=0.6)
287 | 
288 |     ax.set_xlim(xx.min(), xx.max())
289 |     ax.set_ylim(yy.min(), yy.max())
290 |     ax.set_xlabel(r'$X_1$')
291 |     ax.set_ylabel(r'$X_2$')
292 |     ax.set_title(r'$\sigma(z) = P(y=1)$')
293 | 
294 |     ax_c = plt.colorbar(surf)
295 |     ax_c.set_ticks([0, .25, .5, .75, 1])
296 |     ax.view_init(30, 220)
297 | 
298 |     # 3rd plot
299 |     ax = plt.subplot(1, 3, 3)
300 | 
301 |     ax.contour(xx, yy, yhat, levels=[.5], cmap="Greys", vmin=0, vmax=1)
302 |     contour = ax.contourf(xx, yy, yhat, 25, cmap=cm, alpha=.8, vmin=0, vmax=1)
303 |     # Plot the training points
304 |     ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright)
305 |     # Plot the testing points
306 |     #ax.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=cm_bright, edgecolors='k', alpha=0.6)
307 | 
308 |     ax.set_xlim(xx.min(), xx.max())
309 |     ax.set_ylim(yy.min(), yy.max())
310 |     ax.set_xlabel(r'$X_1$')
311 |     ax.set_ylabel(r'$X_2$')
312 |     ax.set_title(r'$\sigma(z) = P(y=1)$')
313 |     ax.grid(False)
314 | 
315 |     ax_c = plt.colorbar(contour)
316 |     ax_c.set_ticks([0, .25, .5, .75, 1])
317 | 
318 |     plt.tight_layout()
319 |     
320 |     return fig
321 | 
322 | def one_dimension(x, y, colors=None):
323 |     if colors is None:
324 |         colors = ['r', 'b']
325 |     fig, ax = plt.subplots(1, 1, figsize=(10, 2))
326 | 
327 |     ax.grid(False)
328 |     ax.set_ylim([-.1, .1])
329 |     ax.axes.get_yaxis().set_visible(False)
330 |     ax.plot([-3, 3], [0, 0], linewidth=2, c='k', zorder=1)
331 |     ax.plot([0, 0], [-.03, .03], c='k', zorder=1)
332 | 
333 |     ax.scatter(x[y==1], np.zeros_like(x[y==1]), c=colors[1], s=150, zorder=2, linewidth=3)
334 |     ax.scatter(x[y==0], np.zeros_like(x[y==0]), c=colors[0], s=150, zorder=2, linewidth=3)
335 |     ax.set_xlabel(r'$X_1$')
336 |     ax.set_title('One Dimension')
337 |     
338 |     fig.tight_layout()
339 |     
340 |     return fig
341 | 
342 | def two_dimensions(x, y, colors=None):
343 |     if colors is None:
344 |         colors = ['r', 'b']
345 |     
346 |     x2 = np.concatenate([x.reshape(-1, 1), (x ** 2).reshape(-1, 1)], axis=1)
347 | 
348 |     fig = plt.figure(figsize=(10, 4.5))
349 |     gs = fig.add_gridspec(3, 2)
350 | 
351 |     ax = fig.add_subplot(gs[2, 0])
352 | 
353 |     ax.grid(False)
354 |     ax.set_ylim([-.1, .1])
355 |     ax.axes.get_yaxis().set_visible(False)
356 |     ax.plot([-3, 3], [0, 0], linewidth=2, c='k', zorder=1)
357 |     ax.plot([0, 0], [-.03, .03], c='k', zorder=1)
358 | 
359 |     ax.scatter(x[y==1], np.zeros_like(x[y==1]), c=colors[1], s=150, zorder=2, linewidth=3)
360 |     ax.scatter(x[y==0], np.zeros_like(x[y==0]), c=colors[0], s=150, zorder=2, linewidth=3)
361 |     ax.set_xlabel(r'$X_1$')
362 |     ax.set_title('One Dimension')
363 | 
364 |     ax = fig.add_subplot(gs[:, 1])
365 | 
366 |     ax.scatter(*x2[y==1, :].T, c='b', s=150, zorder=2, linewidth=3)
367 |     ax.scatter(*x2[y==0, :].T, c='r', s=150, zorder=2, linewidth=3)
368 |     ax.plot([-2, 2], [1, 1], 'k--', linewidth=2)
369 |     ax.set_xlabel(r'$X_1$')
370 |     ax.set_ylabel(r'$X_2=X_1^2$')
371 |     ax.set_title('Two Dimensions')
372 | 
373 |     fig.tight_layout()
374 |     return fig
375 | 
376 | def figure9(x, y, model, device, probabilities, threshold, shift=0.0, annot=False, cm=None, cm_bright=None):
377 |     fig = plt.figure(figsize=(15, 5))
378 |     gs = fig.add_gridspec(3, 3)
379 | 
380 |     ax = fig.add_subplot(gs[:, 0])
381 |     probability_contour(ax, model, device, x, y, threshold, cm, cm_bright)
382 |     
383 |     if cm_bright is None:
384 |         colors = ['r', 'b']
385 |     else:
386 |         colors = cm_bright.colors
387 | 
388 |     ax = fig.add_subplot(gs[1, 1:])
389 |     probability_line(ax, y, probabilities, threshold, shift, annot, colors)
390 | 
391 |     fig.tight_layout()
392 |     return fig
393 | 
394 | def figure10(y, probabilities, threshold, shift, annot, colors=None):
395 |     fig, ax = plt.subplots(1, 1, figsize=(10, 2))
396 |     probability_line(ax, y, probabilities, threshold, shift, annot, colors)
397 |     fig.tight_layout()
398 |     return fig
399 | 
400 | def figure17(y, probabilities, threshs):
401 |     cms = [confusion_matrix(y, (probabilities >= threshold)) for threshold in threshs]
402 |     rates = np.array(list(map(tpr_fpr, cms)))
403 |     precrec = np.array(list(map(precision_recall, cms)))
404 |     precrec = np.nan_to_num(precrec, nan=1.)
405 |     fig = eval_curves(rates[:, 1], rates[:, 0], precrec[:, 1], precrec[:, 0], threshs, line=True, annot=False)
406 |     return fig
407 | 
408 | def figure19(y, probabilities, threshs=(.4, .5, .57), colors=None):
409 |     fig, axs = plt.subplots(3, 1, figsize=(10, 6))
410 |     probability_line(axs[0], y, probabilities, threshs[0], 0.0, False, colors)
411 |     probability_line(axs[1], y, probabilities, threshs[1], 0.0, False, colors)
412 |     probability_line(axs[2], y, probabilities, threshs[2], 0.0, False, colors)
413 |     fig.tight_layout()
414 |     return fig
415 |              
416 | def figure20(y):
417 |     fpr_perfect, tpr_perfect, thresholds1_perfect = roc_curve(y, y)
418 |     prec_perfect, rec_perfect, thresholds2_perfect = precision_recall_curve(y, y)
419 |     fig = eval_curves(fpr_perfect, tpr_perfect, rec_perfect, prec_perfect, thresholds1_perfect, thresholds2_perfect, line=True)
420 |     return fig
421 | 
422 | def figure21(y, probabilities):
423 |     fpr_random, tpr_random, thresholds1_random = roc_curve(y, probabilities)
424 |     prec_random, rec_random, thresholds2_random = precision_recall_curve(y, probabilities)
425 |     fig = eval_curves(fpr_random, tpr_random, rec_random, prec_random, thresholds1_random, thresholds2_random, line=True)
426 |     axs = fig.axes
427 |     axs[0].plot([0, 1], [0, 1], 'k--', linewidth=2)
428 |     axs[1].plot([0, 1], [y.mean(), y.mean()], 'k--', linewidth=2)
429 |     return fig


--------------------------------------------------------------------------------
/plots/chapter4.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | plt.style.use('fivethirtyeight')
  6 | 
  7 | def plot_images(images, targets, n_plot=30):
  8 |     n_rows = n_plot // 6 + ((n_plot % 6) > 0)
  9 |     fig, axes = plt.subplots(n_rows, 6, figsize=(9, 1.5 * n_rows))
 10 |     axes = np.atleast_2d(axes)
 11 |     
 12 |     for i, (image, target) in enumerate(zip(images[:n_plot], targets[:n_plot])):
 13 |         row, col = i // 6, i % 6    
 14 |         ax = axes[row, col]
 15 |         ax.set_title('#{} - Label:{}'.format(i, target), {'size': 12})
 16 |         # plot filter channel in grayscale
 17 |         ax.imshow(image.squeeze(), cmap='gray', vmin=0, vmax=1)
 18 | 
 19 |     for ax in axes.flat:
 20 |         ax.set_xticks([])
 21 |         ax.set_yticks([])
 22 |         ax.label_outer()
 23 | 
 24 |     plt.tight_layout()
 25 |     return fig
 26 | 
 27 | def image_channels(red, green, blue, rgb, gray, rows=(0, 1, 2)):
 28 |     fig, axs = plt.subplots(len(rows), 4, figsize=(15, 5.5))
 29 | 
 30 |     zeros = np.zeros((5, 5), dtype=np.uint8)
 31 | 
 32 |     titles1 = ['Red', 'Green', 'Blue', 'Grayscale Image']
 33 |     titles0 = ['image_r', 'image_g', 'image_b', 'image_gray']
 34 |     titles2 = ['as first channel', 'as second channel', 'as third channel', 'RGB Image']
 35 | 
 36 |     idx0 = np.argmax(np.array(rows) == 0)
 37 |     idx1 = np.argmax(np.array(rows) == 1)
 38 |     idx2 = np.argmax(np.array(rows) == 2)
 39 |     
 40 |     for i, m in enumerate([red, green, blue, gray]):
 41 |         if 0 in rows:
 42 |             axs[idx0, i].axis('off')
 43 |             axs[idx0, i].invert_yaxis()
 44 |             if (1 in rows) or (i < 3):
 45 |                 axs[idx0, i].text(0.15, 0.25, str(m.astype(np.uint8)), verticalalignment='top')    
 46 |                 axs[idx0, i].set_title(titles0[i], fontsize=16)
 47 | 
 48 |         if 1 in rows:
 49 |             axs[idx1, i].set_title(titles1[i], fontsize=16)
 50 |             axs[idx1, i].set_xlabel('5x5', fontsize=14)
 51 |             axs[idx1, i].imshow(m, cmap=plt.cm.gray)
 52 | 
 53 |         if 2 in rows:
 54 |             axs[idx2, i].set_title(titles2[i], fontsize=16)
 55 |             axs[idx2, i].set_xlabel(f'5x5x3 - {titles1[i][0]} only', fontsize=14)
 56 |             if i < 3:
 57 |                 stacked = [zeros] * 3
 58 |                 stacked[i] = m
 59 |                 axs[idx2, i].imshow(np.stack(stacked, axis=2))
 60 |             else:
 61 |                 axs[idx2, i].imshow(rgb)
 62 | 
 63 |         for r in [1, 2]:
 64 |             if r in rows:
 65 |                 idx = idx1 if r == 1 else idx2
 66 |                 axs[idx, i].set_xticks([])
 67 |                 axs[idx, i].set_yticks([])
 68 |                 for k, v in axs[idx, i].spines.items():
 69 |                     v.set_color('black')
 70 |                     v.set_linewidth(.8)
 71 | 
 72 |     if 1 in rows:
 73 |         axs[idx1, 0].set_ylabel('Single\nChannel\n(grayscale)', rotation=0, labelpad=40, fontsize=12)
 74 |         axs[idx1, 3].set_xlabel('5x5 = 0.21R + 0.72G + 0.07B')
 75 |     if 2 in rows:
 76 |         axs[idx2, 0].set_ylabel('Three\nChannels\n(color)', rotation=0, labelpad=40, fontsize=12)
 77 |         axs[idx2, 3].set_xlabel('5x5x3 = (R, G, B) stacked')
 78 |     fig.tight_layout()
 79 |     return fig
 80 | 
 81 | def figure5(sbs_logistic, sbs_nn):
 82 |     fig, axs = plt.subplots(1, 2, figsize=(15, 6))
 83 |     axs[0].plot(sbs_logistic.losses, 'b--', label='Logistic - Training')
 84 |     axs[1].plot(sbs_logistic.val_losses, 'r--', label='Logistic - Validation')
 85 |     axs[0].plot(sbs_nn.losses, 'b', label='3-layer Network - Training', alpha=.5)
 86 |     axs[1].plot(sbs_nn.val_losses, 'r', label='3-layer Network - Validation', alpha=.5)
 87 |     axs[0].set_xlabel('Epochs')
 88 |     axs[0].set_ylabel('Losses')
 89 |     axs[0].set_ylim([0.45, 0.75])
 90 |     axs[0].legend()
 91 |     axs[1].set_xlabel('Epochs')
 92 |     axs[1].set_ylabel('Losses')
 93 |     axs[1].set_ylim([0.45, 0.75])
 94 |     axs[1].legend()
 95 |     fig.tight_layout()
 96 |     return fig
 97 | 
 98 | def figure7(weights):
 99 |     fig, axs = plt.subplots(1, 5, figsize=(15, 4))
100 | 
101 |     for i, m in enumerate(weights):
102 |         axs[i].imshow(m.reshape(-1, 5).tolist(), cmap='gray')
103 |         axs[i].grid(False)
104 |         axs[i].set_xticks([])
105 |         axs[i].set_yticks([])
106 |         axs[i].set_title(r'$w_{0' + str(i) + '}$')
107 | 
108 |     fig.suptitle('Hidden Layer #0')
109 |     fig.subplots_adjust(top=0.6) 
110 |     fig.tight_layout()
111 |     return fig
112 | 
113 | def figure5b(sbs_logistic, sbs_nn, sbs_relu):
114 |     fig, axs = plt.subplots(1, 2, figsize=(15, 6))
115 |     axs[0].plot(sbs_logistic.losses, 'b--', label='Logistic - Training')
116 |     axs[1].plot(sbs_logistic.val_losses, 'r--', label='Logistic - Validation')
117 | 
118 |     axs[0].plot(sbs_nn.losses, 'b', label='3-layer Network - Training', alpha=.5)
119 |     axs[1].plot(sbs_nn.val_losses, 'r', label='3-layer Network - Validation', alpha=.5)
120 | 
121 |     axs[0].plot(sbs_relu.losses, 'b', label='ReLU Network - Training', alpha=.8)
122 |     axs[1].plot(sbs_relu.val_losses, 'r', label='ReLU Network - Validation', alpha=.8)
123 | 
124 |     axs[0].set_xlabel('Epochs')
125 |     axs[0].set_ylabel('Losses')
126 |     axs[0].legend()
127 |     axs[1].set_xlabel('Epochs')
128 |     axs[1].set_ylabel('Losses')
129 |     axs[1].legend()
130 |     fig.tight_layout()
131 |     return fig
132 | 
133 | def plot_activation(func, name=None):
134 |     z = torch.linspace(-5, 5, 1000)
135 |     z.requires_grad_(True)
136 |     func(z).sum().backward()
137 |     sig = func(z).detach()
138 | 
139 |     fig, ax = plt.subplots(1, 1, figsize=(8, 5))
140 | 
141 |     # Move left y-axis and bottim x-axis to centre, passing through (0,0)
142 |     if name is None:
143 |         try:
144 |             name = func.__name__
145 |         except AttributeError:
146 |             name = ''
147 | 
148 |     if name == 'sigmoid':
149 |         ax.set_ylim([0, 1.1])
150 |     elif name == 'tanh':
151 |         ax.set_ylim([-1.1, 1.1])
152 |     elif name == 'relu':
153 |         ax.set_ylim([-.1, 5.01])
154 |     else:
155 |         ax.set_ylim([-1.1, 5.01])
156 |         
157 |     ax.set_xticks(np.arange(-5, 6, 1))
158 |     ax.set_xlabel('z')
159 |     ax.set_ylabel(r'$\sigma(z)$')
160 | 
161 |     # Eliminate upper and right axes
162 |     ax.spines['right'].set_color('none')
163 |     ax.spines['top'].set_color('none')
164 | 
165 |     # Show ticks in the left and lower axes only
166 |     ax.xaxis.set_ticks_position('bottom')
167 |     ax.yaxis.set_ticks_position('left')
168 | 
169 |     ax.set_title(name, fontsize=16)
170 |     ax.plot(z.detach().numpy(), sig.numpy(), c='k', label='Activation')
171 |     ax.plot(z.detach().numpy(), z.grad.numpy(), c='r', label='Gradient')
172 |     ax.legend(loc=2)
173 | 
174 |     fig.tight_layout()
175 |     fig.show()
176 |     return fig
177 | 
178 | def weights_comparison(w_logistic_output, w_nn_equiv):
179 |     fig = plt.figure(figsize=(15, 6))
180 |     ax0 = plt.subplot2grid((1, 3), (0, 0), colspan=2)
181 |     ax1 = plt.subplot2grid((1, 3), (0, 2))
182 | 
183 |     ax0.bar(np.arange(25), w_logistic_output.cpu().numpy().squeeze(), alpha=1, label='Logistic')
184 |     ax0.bar(np.arange(25), w_nn_equiv.cpu().numpy().squeeze(), alpha=.5, label='3-layer Network (Composed)')
185 |     ax0.set_title('Weights')
186 |     ax0.set_xlabel('Parameters')
187 |     ax0.set_ylabel('Value')
188 |     ax0.legend()
189 | 
190 |     ax1.scatter(w_logistic_output.cpu().numpy(), w_nn_equiv.cpu().numpy(), alpha=.5)
191 |     ax1.set_xlabel('Logistic')
192 |     ax1.set_ylabel('3-layer network (Composed)')
193 |     ax1.set_title('Weights')
194 |     ax1.set_xlim([-2, 2])
195 |     ax1.set_ylim([-2, 2])
196 | 
197 |     fig.tight_layout()
198 |     return fig
199 | 


--------------------------------------------------------------------------------
/plots/chapter5.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | plt.style.use('fivethirtyeight')
 4 | 
 5 | def plot_images(images, targets, n_plot=30):
 6 |     n_rows = n_plot // 10 + ((n_plot % 10) > 0)
 7 |     fig, axes = plt.subplots(n_rows, 10, figsize=(15, 1.5 * n_rows))
 8 |     axes = np.atleast_2d(axes)
 9 | 
10 |     for i, (image, target) in enumerate(zip(images[:n_plot], targets[:n_plot])):
11 |         row, col = i // 10, i % 10    
12 |         ax = axes[row, col]
13 |         ax.set_title('#{} - Label:{}'.format(i, target), {'size': 12})
14 |         # plot filter channel in grayscale
15 |         ax.imshow(image.squeeze(), cmap='gray', vmin=0, vmax=1)
16 | 
17 |     for ax in axes.flat:
18 |         ax.set_xticks([])
19 |         ax.set_yticks([])
20 |         ax.label_outer()
21 | 
22 |     plt.tight_layout()
23 |     return fig


--------------------------------------------------------------------------------
/plots/chapter6.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn.functional as F
  4 | import matplotlib.pyplot as plt
  5 | import pandas as pd
  6 | from copy import deepcopy
  7 | from PIL import Image
  8 | from stepbystep.v2 import StepByStep
  9 | from torchvision.transforms import ToPILImage
 10 | from sklearn.linear_model import LinearRegression
 11 | from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, MultiStepLR, CyclicLR, LambdaLR
 12 | 
 13 | def EWMA(past_value, current_value, alpha):
 14 |     return (1 - alpha) * past_value + alpha * current_value
 15 | 
 16 | def calc_ewma(values, period):
 17 |     alpha = 2 / (period + 1)
 18 |     result = []
 19 |     for v in values:
 20 |         try:
 21 |             prev_value = result[-1]
 22 |         except IndexError:
 23 |             prev_value = 0
 24 | 
 25 |         new_value = EWMA(prev_value, v, alpha)
 26 |         result.append(new_value)
 27 |     return np.array(result)
 28 | 
 29 | def correction(averaged_value, beta, steps):
 30 |     return averaged_value / (1 - (beta ** steps))
 31 | 
 32 | def figure1(folder='rps'):
 33 |     paper = Image.open(f'{folder}/paper/paper02-089.png')
 34 |     rock = Image.open(f'{folder}/rock/rock06ck02-100.png')
 35 |     scissors = Image.open(f'{folder}/scissors/testscissors02-006.png')
 36 | 
 37 |     images = [rock, paper, scissors]
 38 |     titles = ['Rock', 'Paper', 'Scissors']
 39 | 
 40 |     fig, axs = plt.subplots(1, 3, figsize=(12, 5))
 41 |     for ax, image, title in zip(axs, images, titles):
 42 |         ax.imshow(image)
 43 |         ax.set_xticks([])
 44 |         ax.set_yticks([])
 45 |         ax.set_title(title)
 46 |         
 47 |     return fig
 48 | 
 49 | def calc_corrected_ewma(values, period):
 50 |     ewma = calc_ewma(values, period)
 51 |     
 52 |     alpha = 2 / (period + 1)
 53 |     beta = 1 - alpha
 54 |     
 55 |     result = []
 56 |     for step, v in enumerate(ewma):
 57 |         adj_value = correction(v, beta, step + 1)
 58 |         result.append(adj_value)
 59 |         
 60 |     return np.array(result)
 61 | 
 62 | def figure2(first_images, first_labels):
 63 |     fig, axs = plt.subplots(1, 6, figsize=(12, 4))
 64 |     titles = ['Paper', 'Rock', 'Scissors']
 65 |     for i in range(6):
 66 |         image, label = ToPILImage()(first_images[i]), first_labels[i]
 67 |         axs[i].imshow(image)
 68 |         axs[i].set_xticks([])
 69 |         axs[i].set_yticks([])
 70 |         axs[i].set_title(titles[label], fontsize=12)
 71 |     fig.tight_layout()
 72 |     return fig
 73 | 
 74 | def plot_dist(ax, distrib_outputs, p):
 75 |     ax.hist(distrib_outputs, bins=np.linspace(0, 20, 21))
 76 |     ax.set_xlabel('Sum of Adjusted Outputs')
 77 |     ax.set_ylabel('# of Scenarios')
 78 |     ax.set_title('p = {:.2f}'.format(p))
 79 |     ax.set_ylim([0, 500])
 80 |     mean_value = distrib_outputs.mean()
 81 |     ax.plot([mean_value, mean_value], [0, 500], c='r', linestyle='--', label='Mean = {:.2f}'.format(mean_value))
 82 |     ax.legend()
 83 | 
 84 | def figure7(p, distrib_outputs):
 85 |     fig, ax = plt.subplots(1, 1, figsize=(6, 4))
 86 |     plot_dist(ax, distrib_outputs, p)
 87 |     fig.tight_layout()
 88 |     return fig
 89 | 
 90 | def figure8(ps=(0.1, 0.3, 0.5, 0.9)):
 91 |     spaced_points = torch.linspace(.1, 1.1, 11)
 92 |     fig, axs = plt.subplots(1, 4, figsize=(15, 4))
 93 |     for ax, p in zip(axs.flat, ps):
 94 |         torch.manual_seed(17)
 95 |         distrib_outputs = torch.tensor([F.linear(F.dropout(spaced_points, p=p), 
 96 |                                                  weight=torch.ones(11), bias=torch.tensor(0)) 
 97 |                                         for _ in range(1000)])    
 98 |         plot_dist(ax, distrib_outputs, p)
 99 |         ax.label_outer()
100 |     fig.tight_layout()
101 |     return fig
102 | 
103 | def figure9(first_images, seed=17, p=.33):
104 |     torch.manual_seed(seed)
105 |     fig, axs = plt.subplots(1, 3, figsize=(12, 4))
106 |     axs[0].imshow(ToPILImage()(first_images[0]))
107 |     axs[0].set_title('Original Image')
108 |     axs[0].grid(False)
109 |     axs[0].set_xticks([])
110 |     axs[0].set_yticks([])
111 |     axs[1].imshow(ToPILImage()(F.dropout(first_images[:1], p=p)[0]))
112 |     axs[1].set_title('Regular Dropout')
113 |     axs[1].grid(False)
114 |     axs[1].set_xticks([])
115 |     axs[1].set_yticks([])
116 |     axs[2].imshow(ToPILImage()(F.dropout2d(first_images[:1], p=p)[0]))
117 |     axs[2].set_title('Two-Dimensional Dropout')
118 |     axs[2].grid(False)
119 |     axs[2].set_xticks([])
120 |     axs[2].set_yticks([])
121 |     fig.tight_layout()
122 |     return fig
123 | 
124 | def figure11(losses, val_losses, losses_nodrop, val_losses_nodrop):
125 |     fig, axs = plt.subplots(1, 1, figsize=(10, 5))
126 |     axs.plot(losses, 'b', label='Training Losses - Dropout')
127 |     axs.plot(val_losses, 'r', label='Validation Losses - Dropout')
128 |     axs.plot(losses_nodrop, 'b--', label='Training Losses - No Dropout')
129 |     axs.plot(val_losses_nodrop, 'r--', label='Validation Losses - No Dropout')
130 |     plt.yscale('log')
131 |     plt.xlabel('Epochs')
132 |     plt.ylabel('Loss')
133 |     plt.title('Regularizing Effect')
134 |     fig.legend(loc='lower left')
135 |     fig.tight_layout()
136 |     return fig
137 | 
138 | def figure15(alpha=1/3, periods=5, steps=10):
139 |     t = np.arange(1, steps+1)
140 |     fig, ax = plt.subplots(1, 1, figsize=(6, 4))
141 |     ax.bar(t-1, alpha*(1-alpha)**(t-1), label='EWMA')
142 |     ax.bar(t-1, [1/periods]*periods + [0]*(10-periods), color='r', alpha=.3, label='MA')
143 |     ax.set_xticks(t-1)
144 |     ax.grid(False)
145 |     ax.set_xlabel('Lag')
146 |     ax.set_ylabel('Weight')
147 |     ax.set_title(r'$EWMA\ \alpha=\frac{1}{3}$ vs MA (5 periods)')
148 |     ax.legend()
149 |     fig.tight_layout()
150 |     return fig
151 | 
152 | def ma_vs_ewma(values, periods=19):
153 |     ma19 = pd.Series(values).rolling(min_periods=0, window=periods).mean()
154 |     fig, ax = plt.subplots(1, 1, figsize=(6, 4))
155 |     ax.plot(values, c='k', label='Temperatures')
156 |     ax.plot(ma19, c='k', linestyle='--', label='MA')
157 |     ax.plot(calc_ewma(values, periods), c='r', linestyle='--', label='EWMA')
158 |     ax.plot(calc_corrected_ewma(values, periods), c='r', linestyle='-', label='Bias-corrected EWMA')
159 |     ax.set_title('MA vs EWMA')
160 |     ax.set_ylabel('Temperature')
161 |     ax.set_xlabel('Days')
162 |     ax.legend(fontsize=12)
163 |     fig.tight_layout()
164 |     return fig
165 | 
166 | def figure17(gradients, corrected_gradients, corrected_sq_gradients, adapted_gradients):
167 |     fig, axs = plt.subplots(1, 3, figsize=(15, 5))
168 |     ax = axs[0]
169 |     ax.plot(gradients, c='k', label=r'$Gradients$')
170 |     ax.plot(corrected_gradients, c='r', linestyle='-', label=r'$Bias-corrected\ EWMA(grad)$')
171 |     ax.set_title('EWMA for Smoothing')
172 |     ax.set_ylabel('Gradient')
173 |     ax.set_xlabel('Mini-batches')
174 |     ax.set_ylim([-1.5, 1.5])
175 |     ax.legend(fontsize=12)
176 | 
177 |     ax = axs[1]
178 |     ax.plot(1/(np.sqrt(corrected_sq_gradients)+1e-8), c='b', linestyle='-', label=r'$\frac{1}{\sqrt{Bias-corrected\ EWMA(grad^2)}}$')
179 |     ax.set_title('EWMA for Scaling')
180 |     ax.set_ylabel('Factor')
181 |     ax.set_xlabel('Mini-batches')
182 |     ax.set_ylim([0, 5])
183 |     ax.legend(fontsize=12)
184 | 
185 |     ax = axs[2]
186 |     ax.plot(gradients, c='k', label='Gradients')
187 |     ax.plot(adapted_gradients, c='g', label='Adapted Gradients')
188 |     ax.set_title('Gradients')
189 |     ax.set_ylabel('Gradient')
190 |     ax.set_xlabel('Mini-batches')
191 |     ax.set_ylim([-1.5, 1.5])
192 |     ax.legend(fontsize=12)
193 |     fig.tight_layout()
194 |     return fig
195 | 
196 | def contour_data(x_tensor, y_tensor):
197 |     linr = LinearRegression()
198 |     linr.fit(x_tensor, y_tensor)
199 |     b, w = linr.intercept_, linr.coef_[0]
200 | 
201 |     # we have to split the ranges in 100 evenly spaced intervals each
202 |     b_range = np.linspace(.7, 2.3, 101)
203 |     w_range = np.linspace(.7, 2.3, 101)
204 |     # meshgrid is a handy function that generates a grid of b and w
205 |     # values for all combinations
206 |     bs, ws = np.meshgrid(b_range, w_range)
207 |     all_predictions = np.apply_along_axis(
208 |         func1d=lambda x: bs + ws * x, 
209 |         axis=1, 
210 |         arr=x_tensor.numpy()
211 |     )
212 |     all_labels = y_tensor.numpy().reshape(-1, 1, 1)
213 |     all_errors = (all_predictions - all_labels)
214 |     all_losses = (all_errors ** 2).mean(axis=0)
215 |     return b, w, bs, ws, all_losses
216 | 
217 | def plot_paths(results, b, w, bs, ws, all_losses, axs=None):
218 |     if axs is None:
219 |         fig, axs = plt.subplots(1, len(results), figsize=(5 * len(results), 5))
220 |     axs = np.atleast_2d(axs)
221 |     axs = [ax for row in axs for ax in row]
222 |     for i, (ax, desc) in enumerate(zip(axs, results.keys())):
223 |         biases = np.array(results[desc]['parms']['']['linear.bias']).squeeze()
224 |         weights = np.array(results[desc]['parms']['']['linear.weight']).squeeze()
225 |         ax.plot(biases, weights, '-o', linewidth=1, zorder=1, c='k', markersize=4)
226 |         # Loss surface
227 |         CS = ax.contour(bs[0, :], ws[:, 0], all_losses, cmap=plt.cm.jet, levels=12)
228 |         ax.clabel(CS, inline=1, fontsize=10)
229 |         ax.scatter(b, w, c='r', zorder=2, s=40)
230 |         ax.set_xlim([.7, 2.3])
231 |         ax.set_ylim([.7, 2.3])
232 |         ax.set_xlabel('Bias')
233 |         ax.set_ylabel('Weight')
234 |         ax.set_title(desc)
235 |         ax.label_outer()
236 |     fig = ax.get_figure()
237 |     fig.tight_layout()
238 |     return fig
239 | 
240 | def plot_losses(results, axs=None):
241 |     n = len(results.keys())
242 |     if axs is None:
243 |         fig, axs = plt.subplots(1, n, figsize=(5*n, 4))
244 |     else:
245 |         fig = axs[0].get_figure()
246 |     for ax, k in zip(axs, results.keys()):
247 |         ax.plot(results[k]['losses'], label='Training Loss', c='b')
248 |         ax.plot(results[k]['val_losses'], label='Validation Loss', c='r')
249 |         ax.set_yscale('log')
250 |         ax.set_xlabel('Epochs')
251 |         ax.set_ylabel('Loss')
252 |         ax.set_ylim([1e-3, 1])
253 |         ax.set_title(k)
254 |         ax.legend()
255 |     fig.tight_layout()
256 |     return fig
257 | 
258 | def momentum(past_value, current_value, beta):
259 |     return beta * past_value + current_value
260 | 
261 | def calc_momentum(values, beta):
262 |     result = []
263 |     for v in values:
264 |         try:
265 |             prev_value = result[-1]
266 |         except IndexError:
267 |             prev_value = 0
268 | 
269 |         new_value = momentum(prev_value, v, beta)
270 |         result.append(new_value)
271 |     return np.array(result)
272 | 
273 | def calc_nesterov(values, beta):
274 |     result = calc_momentum(values, beta)
275 |     return beta * result + values
276 | 
277 | def figure21(results):
278 |     parm = 'linear.weight'
279 | 
280 |     fig, axs = plt.subplots(1, 3, figsize=(15, 5))
281 | 
282 |     for i, ax in enumerate(axs):
283 |         desc = list(results.keys())[i]
284 |         gradients = np.array(results[desc]['grads'][''][parm]).squeeze()
285 |         momentums = calc_momentum(gradients, 0.9)
286 |         nesterovs = calc_nesterov(gradients, 0.9)
287 |         ax.plot(gradients, c='k', label='Gradients')
288 |         if i > 0:
289 |             ax.plot(momentums, c='r', label='Momentums')
290 |         if i > 1:
291 |             ax.plot(nesterovs, c='b', label='Nesterov Momentums')
292 |         ax.set_title(desc)
293 |         ax.set_ylabel('Gradient')
294 |         ax.set_xlabel('Mini-batches')
295 |         ax.set_ylim([-2, 1.5])
296 |         ax.legend(fontsize=12)
297 | 
298 |     fig.tight_layout()
299 |     return fig
300 | 
301 | def plot_scheduler(dummy_optimizer, dummy_scheduler, logscale=True, ax=None):
302 |     learning_rates = []
303 |     for i in range(12):
304 |         current_lr = list(map(lambda d: d['lr'], dummy_scheduler.optimizer.state_dict()['param_groups']))
305 |         learning_rates.append(current_lr)
306 |         dummy_optimizer.step()
307 |         if isinstance(dummy_scheduler, ReduceLROnPlateau):
308 |             dummy_loss = 0.1
309 |             dummy_scheduler.step(dummy_loss)
310 |         else:
311 |             dummy_scheduler.step()
312 | 
313 |     if ax is None:
314 |         fig, ax = plt.subplots(1, 1, figsize=(5, 4))
315 |     
316 |     ax.plot(learning_rates)
317 |     if logscale:
318 |         ax.set_yscale('log')
319 |     ax.set_xlabel('Steps')
320 |     ax.set_ylabel('Learning Rate')
321 |     ax.set_title(type(dummy_scheduler).__name__)
322 |     fig = ax.get_figure()
323 |     fig.tight_layout()
324 |     return fig
325 | 
326 | def figure26(dummy_optimizer, dummy_schedulers):
327 |     fig, axs = plt.subplots(1, 3, figsize=(15, 4))
328 |     fig = plot_scheduler(dummy_optimizer, dummy_schedulers[0], ax=axs[0], logscale=False)
329 |     fig = plot_scheduler(dummy_optimizer, dummy_schedulers[1], ax=axs[1], logscale=False)
330 |     fig = plot_scheduler(dummy_optimizer, dummy_schedulers[2], ax=axs[2], logscale=False)
331 |     axs[0].set_ylim([9e-5, 1e-3])
332 |     axs[1].set_ylim([9e-5, 1e-3])
333 |     axs[2].set_ylim([9e-5, 1e-3])
334 |     axs[0].set_title('CyclicLR - mode=triangular')
335 |     axs[1].set_title('CyclicLR - mode=triangular2')
336 |     axs[2].set_title('CyclicLR - mode=exp_range')
337 |     fig.tight_layout()
338 |     return fig
339 | 
340 | def compare_optimizers(model, loss_fn, optimizers, train_loader, val_loader=None, schedulers=None, layers_to_hook='', n_epochs=50):
341 |     from stepbystep.v3 import StepByStep
342 |     results = {}
343 |     model_state = deepcopy(model).state_dict()
344 | 
345 |     for desc, opt in optimizers.items():
346 |         model.load_state_dict(model_state)
347 |         
348 |         optimizer = opt['class'](model.parameters(), **opt['parms'])
349 | 
350 |         sbs = StepByStep(model, loss_fn, optimizer)
351 |         sbs.set_loaders(train_loader, val_loader)
352 |         
353 |         try:
354 |             if schedulers is not None:
355 |                 sched = schedulers[desc]
356 |                 scheduler = sched['class'](optimizer, **sched['parms'])
357 |                 sbs.set_lr_scheduler(scheduler)
358 |         except KeyError:
359 |             pass        
360 |         
361 |         sbs.capture_parameters(layers_to_hook)
362 |         sbs.capture_gradients(layers_to_hook)
363 |         sbs.train(n_epochs)
364 |         sbs.remove_hooks()
365 | 
366 |         parms = deepcopy(sbs._parameters)
367 |         grads = deepcopy(sbs._gradients)
368 |         
369 |         lrs = sbs.learning_rates[:]
370 |         if not len(lrs):
371 |             lrs = [list(map(lambda p: p['lr'], optimizer.state_dict()['param_groups']))] * n_epochs
372 | 
373 |         results.update({desc: {'parms': parms, 
374 |                                'grads': grads,
375 |                                'losses': np.array(sbs.losses),
376 |                                'val_losses': np.array(sbs.val_losses),
377 |                                'state': optimizer.state_dict(), 
378 |                                'lrs': lrs}})
379 |         
380 |     return results
381 | 
382 | def figure28(results, b, w, bs, ws, all_losses):
383 |     axs = []
384 |     fig = plt.figure(figsize=(15, 12))
385 |     for i in range(3):
386 |         axs.append(plt.subplot2grid((5, 3), (0, i), rowspan=2))
387 |     for i in range(3):
388 |         axs.append(plt.subplot2grid((5, 3), (3, i), rowspan=2))
389 |     for i in range(3):
390 |         axs.append(plt.subplot2grid((5, 3), (2, i)))
391 | 
392 |     lrs = [results[k]['lrs'] for k in ['SGD + Momentum', 'SGD + Momentum + Step', 'SGD + Momentum + Cycle']]
393 |     for ax, l, title in zip(axs[6:], lrs, ['No Scheduler', 'StepLR', 'CyclicLR']):
394 |         ax.plot(l)
395 |         ax.set_title(title)
396 |         if title == 'CyclicLR':
397 |             ax.set_xlabel('Mini-batches')
398 |         else:
399 |             ax.set_xlabel('Epochs')
400 |         ax.set_ylabel('Learning Rate')
401 |         ax.set_ylim([0.0, .11])
402 | 
403 |     fig = plot_paths(results, b, w, bs, ws, all_losses, axs=axs[:6])
404 |     for ax in axs[:6]:
405 |         ax.set_xlabel('Bias')
406 |     fig.tight_layout()
407 | 


--------------------------------------------------------------------------------
/plots/chapter7.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | def figure1():
 5 |     data = {'AlexNet': (61, .727, 41.8),
 6 |             'ResNet-18': (12, 2, 30.24 ),
 7 |             'ResNet-34': (22, 4, 26.7),
 8 |             'ResNet-50': (26, 4, 24.6),
 9 |             'ResNet-101': (45, 8, 23.4),
10 |             'ResNet-152': (60, 11, 23),
11 |             'VGG-16': (138, 16, 28.5),
12 |             'VGG-19': (144, 20, 28.7),
13 |             'Inception-V3': (27, 6, 22.5),
14 |             'GoogLeNet': (13, 2, 34.2),}
15 | 
16 |     names = list(data.keys())
17 |     stats = np.array(list(data.values()))
18 |     xoff = [0, 0, 0, -.5, 0, 0, 0, 0, -.7, 0]
19 |     yoff = [1.5, 0, -5., .5, 1.3, 1.5, 3.5, 3.5, .6, 0]
20 | 
21 |     fig, ax = plt.subplots(1, 1, figsize=(10, 6))
22 |     ax.scatter(stats[:, 1], 100-stats[:, 2], s=50*stats[:, 0], c=np.arange(12,2,-1), cmap=plt.cm.jet)
23 |     ax.scatter(stats[:, 1], 100-stats[:, 2], c='w', s=4)
24 |     for i, txt in enumerate(names):
25 |         ax.annotate(txt, (stats[i, 1]-.65+xoff[i], 100-stats[i, 2]+1.7+yoff[i]), fontsize=12)
26 |     ax.set_xlim([0, 22])
27 |     ax.set_ylim([50, 85])
28 |     ax.set_xlabel('Number of Operations - GFLOPS')
29 |     ax.set_ylabel('Top-1 Accuracy (%)')
30 |     ax.set_title('Comparing Architectures')
31 |     return fig
32 | 
33 | def compare_grayscale(converted, grayscale):
34 |     fig, axs = plt.subplots(1, 2, figsize=(8, 4))
35 |     for img, ax, title in zip([converted, grayscale], axs, ['Converted', 'Grayscale']):
36 |         ax.imshow(img, cmap=plt.cm.gray)
37 |         ax.grid(False)
38 |         ax.set_title(title)
39 |         ax.set_xticks([])
40 |         ax.set_yticks([])
41 |     fig.tight_layout()
42 |     return fig
43 | 
44 | def before_batchnorm(batch):
45 |     fig, axs = plt.subplots(1, 2, figsize=(12, 4))
46 |     for i in range(2):
47 |         feature = batch[0][:, i]
48 |         axs[i].hist(feature, bins=np.linspace(-3, 3, 15), alpha=.5)
49 |         axs[i].set_xlabel(f'Feature #{i}')
50 |         axs[i].set_ylabel('# of points')
51 |         axs[i].set_title(f'mean={feature.mean():.4f} var={feature.var():.4f}')
52 |         axs[i].set_ylim([0, 13])
53 |         axs[i].label_outer()
54 |     fig.tight_layout()
55 |     return fig
56 | 
57 | def after_batchnorm(batch, normalized):
58 |     fig, axs = plt.subplots(1, 2, figsize=(12, 4))
59 |     for i in range(2):
60 |         feature = batch[0][:, i]
61 |         normed = normalized[:, i]
62 |         axs[i].hist(feature, bins=np.linspace(-3, 3, 15), alpha=.5, label='Original')
63 |         axs[i].hist(normed, bins=np.linspace(-3, 3, 15), alpha=.5, label='Standardized')
64 |         axs[i].set_xlabel(f'Feature #{i}')
65 |         axs[i].set_ylabel('# of points')
66 |         axs[i].set_title(f'mean={normed.mean():.4f} std={normed.std(unbiased=False):.4f}')
67 |         axs[i].legend()
68 |         axs[i].set_ylim([0, 13])
69 |         axs[i].label_outer()
70 |     fig.tight_layout()
71 |     return fig
72 | 
73 | def compare_skip(image, noskip_image, skip_image):
74 |     fig, axs = plt.subplots(1, 3, figsize=(12, 4))
75 |     for img, ax, title in zip([image, noskip_image, skip_image], axs, ['Original', 'No Skip', 'Skip']):
76 |         ax.imshow(img, cmap=plt.cm.gray)
77 |         ax.grid(False)
78 |         ax.set_title(title)
79 |         ax.set_xticks([])
80 |         ax.set_yticks([])
81 |     fig.tight_layout()
82 |     return fig
83 | 


--------------------------------------------------------------------------------
/plots/chapterextra.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.optim as optim
  6 | from collections import namedtuple
  7 | from matplotlib import animation
  8 | from matplotlib import pyplot as plt
  9 | import seaborn as sns
 10 | import matplotlib
 11 | matplotlib.rcParams['animation.writer'] = 'ffmpeg'
 12 | 
 13 | class Basic(object):
 14 |     """Basic plot class, NOT to be instantiated directly.
 15 |     """
 16 |     def __init__(self, ax):
 17 |         self._title = ''
 18 |         self._custom_title = ''
 19 |         self.n_epochs = 0
 20 | 
 21 |         self.ax = ax
 22 |         self.ax.clear()
 23 |         self.fig = ax.get_figure()
 24 | 
 25 |     @property
 26 |     def title(self):
 27 |         title = self._title
 28 |         if not isinstance(title, tuple):
 29 |             title = (self._title,)
 30 |         title = tuple([' '.join([self._custom_title, t]) for t in title])
 31 |         return title
 32 | 
 33 |     @property
 34 |     def axes(self):
 35 |         return (self.ax,)
 36 | 
 37 |     def load_data(self, **kwargs):
 38 |         self._prepare_plot()
 39 |         return self
 40 | 
 41 |     def _prepare_plot(self):
 42 |         pass
 43 | 
 44 |     @staticmethod
 45 |     def _update(i, object, epoch_start=0):
 46 |         pass
 47 | 
 48 |     def set_title(self, title):
 49 |         """Prepends a custom title to the plot.
 50 |         Parameters
 51 |         ----------
 52 |         title: String
 53 |             Custom title to prepend.
 54 |         Returns
 55 |         -------
 56 |         None
 57 |         """
 58 |         self._custom_title = title
 59 | 
 60 |     def plot(self, epoch):
 61 |         """Plots data at a given epoch.
 62 |         Parameters
 63 |         ----------
 64 |         epoch: int
 65 |             Epoch to use for the plotting.
 66 |         Returns
 67 |         -------
 68 |         fig: figure
 69 |             Figure containing the plot.
 70 |         """
 71 |         self.__class__._update(epoch, self)
 72 |         self.fig.tight_layout()
 73 |         return self.fig
 74 | 
 75 |     def animate(self, epoch_start=0, epoch_end=-1):
 76 |         """Animates plotted data from `epoch_start` to `epoch_end`.
 77 |         Parameters
 78 |         ----------
 79 |         epoch_start: int, optional
 80 |             Epoch to start the animation from.
 81 |         epoch_end: int, optional
 82 |             Epoch to end the animation.
 83 |         Returns
 84 |         -------
 85 |         anim: FuncAnimation
 86 |             Animation function for the data.
 87 |         """
 88 |         if epoch_end == -1:
 89 |             epoch_end = self.n_epochs
 90 | 
 91 |         anim = animation.FuncAnimation(self.fig, self.__class__._update,
 92 |                                        fargs=(self, epoch_start),
 93 |                                        frames=(epoch_end - epoch_start),
 94 |                                        blit=True)
 95 |         return anim
 96 | 
 97 | class LayerViolins(Basic):
 98 |     def __init__(self, ax, title=None):
 99 |         super(LayerViolins, self).__init__(ax)
100 |         self.values = None
101 |         self.names = None
102 |         self._title = title
103 | 
104 |     def load_data(self, layer_violins_data):
105 |         self.values = layer_violins_data.values
106 |         self.names = layer_violins_data.names
107 |         self.palette = dict(zip(self.names, sns.palettes.husl_palette(len(self.names), .7)))
108 |         self.n_epochs = len(self.values)
109 |         self._prepare_plot()
110 |         self._update(0, self)
111 |         return self
112 | 
113 |     def _prepare_plot(self):
114 |         self.line = self.ax.plot([], [])
115 | 
116 |     @staticmethod
117 |     def _update(i, lv, epoch_start=0):
118 |         assert len(lv.names) == len(lv.values[i]), "Layer names and values have different lengths!"
119 |         epoch = i + epoch_start
120 | 
121 |         df = pd.concat([pd.DataFrame(layer_values.ravel(),
122 |                                      columns=[layer_name]).melt(var_name='layers', value_name='values')
123 |                         for layer_name, layer_values in zip(lv.names, lv.values[i])])
124 | 
125 |         lv.ax.clear()
126 |         sns.violinplot(data=df, x='layers', y='values', ax=lv.ax, cut=0, palette=lv.palette, density_norm='width', linewidth=1.5, hue='layers')
127 |         lv.ax.set_xticklabels(df.layers.unique())
128 |         lv.ax.set_xlabel('Layers')
129 |         if lv._title is not None:
130 |             lv.ax.set_ylabel(lv._title)
131 |         lv.ax.set_ylim([df['values'].min(), df['values'].max()])
132 |         lv.ax.set_title('{} - Epoch: {}'.format(lv.title[0], epoch))
133 | 
134 |         return lv.line
135 |     
136 | LayerViolinsData = namedtuple('LayerViolinsData', ['names', 'values'])
137 | 
138 | def build_model(input_dim, n_layers, units, activation, use_bn=False):
139 |     if isinstance(units, list):
140 |         assert len(units) == n_layers
141 |     else:
142 |         units = [units] * n_layers
143 |         
144 |     model = nn.Sequential()
145 |     # Adds first hidden layer with input_dim parameter
146 |     model.add_module('h1', nn.Linear(input_dim, units[0], bias=not use_bn))
147 |     model.add_module('a1', activation())
148 |     if use_bn:
149 |         model.add_module('bn1', nn.BatchNorm1d(units[0], affine=False))
150 |     
151 |     # Adds remaining hidden layers
152 |     for i in range(2, n_layers + 1):
153 |         model.add_module('h{}'.format(i), nn.Linear(units[i-2], units[i-1], bias=not use_bn))
154 |         model.add_module('a{}'.format(i), activation())
155 |         if use_bn:
156 |             model.add_module('bn{}'.format(i), nn.BatchNorm1d(units[i-1], affine=False))
157 | 
158 |     # Adds output layer
159 |     model.add_module('o', nn.Linear(units[n_layers-1], 1))
160 |     return model
161 | 
162 | def get_plot_data(train_loader, n_layers=5, hidden_units=100, activation_fn=None, use_bn=False, before=True, model=None):
163 |     import sys
164 |     sys.path.append('..')
165 |     from stepbystep.v3 import StepByStep
166 | 
167 |     if model is None:
168 |         n_features = train_loader.dataset.tensors[0].shape[1]
169 |         if activation_fn is None:
170 |             activation_fn = nn.ReLU
171 |         model = build_model(n_layers, n_features, hidden_units, activation_fn, use_bn, before)
172 |     
173 |     loss_fn = nn.BCEWithLogitsLoss()
174 |     optimizer = optim.SGD(model.parameters(), lr=1e-2)
175 | 
176 |     n_layers = len(list(filter(lambda c: c[0][0] == 'h', model.named_children())))
177 |     
178 |     sbs = StepByStep(model, loss_fn, optimizer)
179 |     sbs.set_loaders(train_loader)
180 |     sbs.capture_parameters([f'h{i}' for i in range(1, n_layers + 1)])
181 |     sbs.capture_gradients([f'h{i}' for i in range(1, n_layers + 1)])
182 |     sbs.attach_hooks([f'a{i}' for i in range(1, n_layers + 1)])
183 |     sbs.train(1)
184 |     
185 |     names = [f'h{i}' for i in range(1, n_layers + 1)]
186 | 
187 |     parameters = [[np.array(sbs._parameters[f'h{i}']['weight']).reshape(-1,) for i in range(1, n_layers + 1)]]
188 |     parms_data = LayerViolinsData(names=names, values=parameters)
189 | 
190 |     gradients = [[np.array(sbs._gradients[f'h{i}']['weight']).reshape(-1,) for i in range(1, n_layers + 1)]]
191 |     gradients_data = LayerViolinsData(names=names, values=gradients)
192 | 
193 |     activations = [[np.array(sbs.visualization[f'a{i}']).reshape(-1,) for i in range(1, n_layers + 1)]]
194 |     activations_data = LayerViolinsData(names=names, values=activations)
195 |         
196 |     return parms_data, gradients_data, activations_data
197 | 
198 | def plot_violins(parms, gradients, activations):
199 |     fig, axs = plt.subplots(1, 3, figsize=(15, 5))
200 |     titles = ['Weights', 'Activations', 'Gradients']
201 |     parms_plot = LayerViolins(axs[0], 'Weights').load_data(parms)
202 |     act_plot = LayerViolins(axs[1], 'Activations').load_data(activations)
203 |     grad_plot = LayerViolins(axs[2], 'Gradients').load_data(gradients)
204 |     axs[0].set_ylim(np.array(axs[0].axes.get_ylim()) * 1.1)
205 |     axs[1].set_ylim(np.array(axs[1].axes.get_ylim()) + np.array([-.2, .2]))
206 |     for i in range(3): axs[i].set_title(titles[i])
207 |     fig.tight_layout()
208 |     return fig
209 | 
210 | def make_init_fn(config):
211 |     def weights_init(m):
212 |         for c in config.keys():
213 |             if isinstance(m, c):
214 |                 try:
215 |                     weight_init_fn = config[c]['w']
216 |                     weight_init_fn(m.weight)
217 |                 except KeyError:
218 |                     pass
219 |                 
220 |                 if m.bias is not None:
221 |                     try:
222 |                         bias_init_fn = config[c]['b']
223 |                         bias_init_fn(m.bias)
224 |                     except KeyError:
225 |                         pass
226 |     return weights_init
227 | 
228 | def plot_schemes(n_features, n_layers, hidden_units, loader):
229 |     fig, axs = plt.subplots(2, 3, figsize=(15, 5))
230 |     act_fns = [nn.Sigmoid, nn.Tanh, nn.ReLU]
231 |     winits = [lambda m: nn.init.normal_(m, mean=0.0, std=0.1),
232 |               lambda m: nn.init.xavier_uniform_(m),
233 |               lambda m: nn.init.kaiming_uniform_(m, nonlinearity='relu')]
234 | 
235 |     for i in range(3):
236 |         model = build_model(n_features, n_layers, hidden_units, act_fns[i], use_bn=False)   
237 | 
238 |         torch.manual_seed(13)    
239 |         weights_init = make_init_fn({nn.Linear: {'w': winits[i], 'b': nn.init.zeros_}})
240 |         with torch.no_grad():
241 |             model.apply(weights_init)
242 | 
243 |         parms, gradients, activations = get_plot_data(loader, model=model)
244 |         act_plot = LayerViolins(axs[0, i], 'Activations').load_data(activations)
245 |         grad_plot = LayerViolins(axs[1, i], 'Gradients').load_data(gradients)
246 | 
247 |     names = [r'$Sigmoid + N(0,\sigma=0.1)$', r'$Tanh + Xavier$', r'$ReLU + Kaiming$']
248 |     for j in range(2):
249 |         ylims = []
250 |         for i in range(3):
251 |             ylims.append(np.array(axs[j, i].axes.get_ylim()))
252 |             axs[0, i].set_title(names[i])
253 |             axs[1, i].set_title('')
254 |             axs[j, i].label_outer()
255 |         for i in range(3):
256 |             axs[j, i].set_ylim([1.1 * np.array(ylims).min(), 1.1 * np.array(ylims).max()])
257 | 
258 |     for i in range(3):    
259 |         axs[0, i].set_ylim([-1.1, 8])
260 |         axs[1, i].set_ylim([-0.05, 0.05])
261 | 
262 |     fig.tight_layout()
263 |     return fig
264 | 
265 | def plot_scheme_bn(n_features, n_layers, hidden_units, loader):
266 |     fig, axs = plt.subplots(2, 3, figsize=(15, 5))
267 | 
268 |     winits = [lambda m: nn.init.normal_(m, mean=0.0, std=0.1),
269 |               lambda m: nn.init.kaiming_uniform_(m, nonlinearity='relu'),
270 |               lambda m: nn.init.normal_(m, mean=0.0, std=0.1),]
271 | 
272 |     for i in range(3):
273 |         model = build_model(n_features, n_layers, hidden_units, nn.ReLU, use_bn=(i==2))
274 | 
275 |         torch.manual_seed(13)
276 |         weights_init = make_init_fn({nn.Linear: {'w': winits[i], 'b': nn.init.zeros_}})
277 |         with torch.no_grad():
278 |             model.apply(weights_init)
279 | 
280 |         parms, gradients, activations = get_plot_data(loader, model=model)
281 |         act_plot = LayerViolins(axs[0, i], 'Activations').load_data(activations)
282 |         grad_plot = LayerViolins(axs[1, i], 'Gradients').load_data(gradients)
283 | 
284 |     names = [r'$ReLU + N(0,\sigma=0.1)$', r'$ReLU + Kaiming$', r'$ReLU + N(0,\sigma=0.1) + BN$']
285 |     for j in range(2):
286 |         ylims = []
287 |         for i in range(3):
288 |             ylims.append(np.array(axs[j, i].axes.get_ylim()))
289 |             axs[0, i].set_title(names[i])
290 |             axs[1, i].set_title('')
291 |             axs[j, i].label_outer()
292 |     for i in range(3):
293 |         axs[j, i].set_ylim([1.1 * np.array(ylims).min(), 1.1 * np.array(ylims).max()])
294 | 
295 |     for i in range(3):    
296 |         axs[0, i].set_ylim([-0.5, 8])
297 |         axs[1, i].set_ylim([-0.05, 0.05])
298 | 
299 |     fig.tight_layout()
300 |     return fig
301 | 
302 | def distributions(X_reg, y_reg):
303 |     fig, axs = plt.subplots(1, 2, figsize=(10, 4))
304 |     axs[0].hist(X_reg.view(-1,).numpy())
305 |     axs[0].set_xlabel('Feature Values')
306 |     axs[0].set_ylabel('Count')
307 |     axs[0].set_title('Distribution of X')
308 |     axs[1].hist(y_reg.view(-1,).numpy())
309 |     axs[1].set_xlabel('Target Values')
310 |     axs[1].set_ylabel('Count')
311 |     axs[1].set_title('Distribution of y')
312 |     fig.tight_layout()
313 |     return fig
314 | 
315 | # https://stackoverflow.com/questions/34017866/arrow-on-a-line-plot-with-matplotlib
316 | def add_arrow(line, position=None, direction='right', size=15, color=None, lw=2, alpha=1.0, text=None, text_offset=(0 , 0)):
317 |     """
318 |     add an arrow to a line.
319 | 
320 |     line:       Line2D object
321 |     position:   x-position of the arrow. If None, mean of xdata is taken
322 |     direction:  'left' or 'right'
323 |     size:       size of the arrow in fontsize points
324 |     color:      if None, line color is taken.
325 |     """
326 |     if color is None:
327 |         color = line.get_color()
328 | 
329 |     xdata = line.get_xdata()
330 |     ydata = line.get_ydata()
331 | 
332 |     if position is None:
333 |         position = xdata.mean()
334 |     # find closest index
335 |     start_ind = np.argmin(np.absolute(xdata - position))
336 |     if direction == 'right':
337 |         end_ind = start_ind + 1
338 |     else:
339 |         end_ind = start_ind - 1
340 | 
341 |     line.axes.annotate('',
342 |         xytext=(xdata[start_ind], ydata[start_ind]),
343 |         xy=(xdata[end_ind], ydata[end_ind]),
344 |         arrowprops=dict(arrowstyle="->", color=color, lw=lw, linestyle='--' if alpha < 1 else '-', alpha=alpha),
345 |         size=size,
346 |     )
347 |     if text is not None:
348 |         line.axes.annotate(text, color=color,
349 |             xytext=(xdata[end_ind] + text_offset[0], ydata[end_ind] + text_offset[1]),
350 |             xy=(xdata[end_ind], ydata[end_ind]),
351 |             size=size,
352 |         )
353 | 
354 | def make_line(ax, point):
355 |     point = np.vstack([[0., 0.], np.array(point.squeeze().tolist())])
356 |     line = ax.plot(*point.T, lw=0)[0]
357 |     return line
358 | 
359 | def compare_grads(grads_before, grads_after):
360 |     fig, ax = plt.subplots(1, 1, figsize=(5, 3))
361 |     ax.set_xlim([0, 3])
362 |     ax.set_ylim([0, 1.5])
363 |     ax.set_xlabel('Parameter 0')
364 |     ax.set_ylabel('Parameter 1')
365 |     ax.set_title('Gradients')
366 |     add_arrow(make_line(ax, grads_before), lw=2, color='k', text=r'$grad$', 
367 |               size=12, alpha=1.0, text_offset=(-.13, .03))
368 |     add_arrow(make_line(ax, grads_after), lw=2, color='r', text=r'$clipped\ grad$', 
369 |               size=12, alpha=1.0, text_offset=(-.33, .03))
370 |     fig.tight_layout()
371 |     return fig
372 | 
373 | def gradient_distrib(sbs1, layer1, sbs2, layer2):
374 |     fig, axs = plt.subplots(1, 2, figsize=(10, 4))
375 |     axs[0].hist(np.array(sbs1._gradients[layer1]['weight']).reshape(-1,), bins=np.linspace(-10, 10, 41))
376 |     axs[0].set_ylim([0, 4000])
377 |     axs[0].set_xlabel('Gradients')
378 |     axs[0].set_ylabel('# Updates')
379 |     axs[0].set_title('Using clip_grad_value_')
380 |     axs[1].hist(np.array(sbs2._gradients[layer2]['weight']).reshape(-1,), bins=np.linspace(-10, 10, 41))
381 |     axs[1].set_ylim([0, 4000])
382 |     axs[1].set_xlabel('Gradients')
383 |     axs[1].label_outer()
384 |     axs[1].set_title('Using hooks')
385 |     fig.tight_layout()
386 |     return fig
387 | 


--------------------------------------------------------------------------------
/plots/replay.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.ticker as ticker
  3 | from matplotlib import pyplot as plt
  4 | from collections import namedtuple
  5 | from copy import deepcopy
  6 | from operator import itemgetter
  7 | import torch.nn as nn
  8 | import torch
  9 | 
 10 | def build_2d_grid(xlim, ylim, n_lines=11, n_points=1000):
 11 |     """Returns a 2D grid of boundaries given by `xlim` and `ylim`,
 12 |      composed of `n_lines` evenly spaced lines of `n_points` each.
 13 |     Parameters
 14 |     ----------
 15 |     xlim : tuple of 2 ints
 16 |         Boundaries for the X axis of the grid.
 17 |     ylim : tuple of 2 ints
 18 |         Boundaries for the Y axis of the grid.
 19 |     n_lines : int, optional
 20 |         Number of grid lines. Default is 11.
 21 |         If n_lines equals n_points, the grid can be used as
 22 |         coordinates for the surface of a contourplot.
 23 |     n_points: int, optional
 24 |         Number of points in each grid line. Default is 1,000.
 25 |     Returns
 26 |     -------
 27 |     lines : ndarray
 28 |         For the cases where n_lines is less than n_points, it
 29 |         returns an array of shape (2 * n_lines, n_points, 2)
 30 |         containing both vertical and horizontal lines of the grid.
 31 |         If n_lines equals n_points, it returns an array of shape
 32 |         (n_points, n_points, 2), containing all evenly spaced
 33 |         points inside the grid boundaries.
 34 |     """
 35 |     xs = np.linspace(*xlim, num=n_lines)
 36 |     ys = np.linspace(*ylim, num=n_points)
 37 |     x0, y0 = np.meshgrid(xs, ys)
 38 |     lines_x0 = np.atleast_3d(x0.transpose())
 39 |     lines_y0 = np.atleast_3d(y0.transpose())
 40 | 
 41 |     xs = np.linspace(*xlim, num=n_points)
 42 |     ys = np.linspace(*ylim, num=n_lines)
 43 |     x1, y1 = np.meshgrid(xs, ys)
 44 |     lines_x1 = np.atleast_3d(x1)
 45 |     lines_y1 = np.atleast_3d(y1)
 46 | 
 47 |     vertical_lines = np.concatenate([lines_x0, lines_y0], axis=2)
 48 |     horizontal_lines = np.concatenate([lines_x1, lines_y1], axis=2)
 49 | 
 50 |     if n_lines != n_points:
 51 |         lines = np.concatenate([vertical_lines, horizontal_lines], axis=0)
 52 |     else:
 53 |         lines = vertical_lines
 54 | 
 55 |     return lines
 56 | 
 57 | FeatureSpaceData = namedtuple('FeatureSpaceData', ['line', 'bent_line', 'prediction', 'target'])
 58 | FeatureSpaceLines = namedtuple('FeatureSpaceLines', ['grid', 'input', 'contour'])
 59 | 
 60 | class Basic(object):
 61 |     """Basic plot class, NOT to be instantiated directly.
 62 |     """
 63 |     def __init__(self, ax):
 64 |         self._title = ''
 65 |         self._custom_title = ''
 66 |         self.n_epochs = 0
 67 | 
 68 |         self.ax = ax
 69 |         self.ax.clear()
 70 |         self.fig = ax.get_figure()
 71 | 
 72 |     @property
 73 |     def title(self):
 74 |         title = self._title
 75 |         if not isinstance(title, tuple):
 76 |             title = (self._title,)
 77 |         title = tuple([' '.join([self._custom_title, t]) for t in title])
 78 |         return title
 79 | 
 80 |     @property
 81 |     def axes(self):
 82 |         return (self.ax,)
 83 | 
 84 |     def load_data(self, **kwargs):
 85 |         self._prepare_plot()
 86 |         return self
 87 | 
 88 |     def _prepare_plot(self):
 89 |         pass
 90 | 
 91 |     @staticmethod
 92 |     def _update(i, object, epoch_start=0):
 93 |         pass
 94 | 
 95 |     def set_title(self, title):
 96 |         """Prepends a custom title to the plot.
 97 |         Parameters
 98 |         ----------
 99 |         title: String
100 |             Custom title to prepend.
101 |         Returns
102 |         -------
103 |         None
104 |         """
105 |         self._custom_title = title
106 | 
107 |     def plot(self, epoch):
108 |         """Plots data at a given epoch.
109 |         Parameters
110 |         ----------
111 |         epoch: int
112 |             Epoch to use for the plotting.
113 |         Returns
114 |         -------
115 |         fig: figure
116 |             Figure containing the plot.
117 |         """
118 |         self.__class__._update(epoch, self)
119 |         self.fig.tight_layout()
120 |         return self.fig
121 | 
122 |     def animate(self, epoch_start=0, epoch_end=-1):
123 |         """Animates plotted data from `epoch_start` to `epoch_end`.
124 |         Parameters
125 |         ----------
126 |         epoch_start: int, optional
127 |             Epoch to start the animation from.
128 |         epoch_end: int, optional
129 |             Epoch to end the animation.
130 |         Returns
131 |         -------
132 |         anim: FuncAnimation
133 |             Animation function for the data.
134 |         """
135 |         if epoch_end == -1:
136 |             epoch_end = self.n_epochs
137 | 
138 |         anim = animation.FuncAnimation(self.fig, self.__class__._update,
139 |                                        fargs=(self, epoch_start),
140 |                                        frames=(epoch_end - epoch_start),
141 |                                        blit=True)
142 |         return anim
143 | 
144 | class FeatureSpace(Basic):
145 |     """Creates an instance of a FeatureSpace object to make plots
146 |     and animations.
147 |     Parameters
148 |     ----------
149 |     ax: AxesSubplot
150 |         Subplot of a Matplotlib figure.
151 |     scaled_fixed: boolean, optional
152 |         If True, axis scales are fixed to the maximum from beginning.
153 |         Default is True.
154 |     """
155 |     def __init__(self, ax, scale_fixed=True, boundary=True, cmap=None, alpha=1.0):
156 |         super(FeatureSpace, self).__init__(ax)
157 |         self.ax.grid(False)
158 |         self.scale_fixed = scale_fixed
159 |         self.boundary = boundary
160 |         self.contour = None
161 |         self.bent_inputs = None
162 |         self.bent_lines = None
163 |         self.bent_contour_lines = None
164 |         self.grid_lines = None
165 |         self.contour_lines = None
166 |         self.predictions = None
167 |         self.targets = None
168 |         
169 |         if cmap is None:
170 |             cmap = plt.cm.RdBu
171 |         self.cmap = cmap
172 |         self.alpha = alpha
173 | 
174 |         self.n_inputs = 0
175 | 
176 |         self.lines = []
177 |         self.points = []
178 | 
179 |     def load_data(self, feature_space_data):
180 |         """ Loads feature space data as computed in Replay class.
181 |         Parameters
182 |         ----------
183 |         feature_space_data: FeatureSpaceData
184 |             Namedtuple containing information about original grid
185 |             lines, data points and predictions.
186 |         Returns
187 |         -------
188 |         self: FeatureSpace
189 |             Returns the FeatureSpace instance itself.
190 |         """
191 |         self.predictions = feature_space_data.prediction
192 |         self.targets = feature_space_data.target
193 |         self.grid_lines, self.inputs, self.contour_lines = feature_space_data.line
194 |         self.bent_lines, self.bent_inputs, self.bent_contour_lines = feature_space_data.bent_line
195 | 
196 |         self.n_epochs = self.bent_inputs.shape[0]
197 |         self.n_inputs = self.bent_inputs.shape[-1]
198 | 
199 |         self.classes = np.unique(self.targets)
200 |         self.bent_inputs = [self.bent_inputs[:, self.targets == target, :] for target in self.classes]
201 | 
202 |         self._prepare_plot()
203 |         return self
204 | 
205 |     def _prepare_plot(self):
206 |         if self.scale_fixed:
207 |             xlim = [self.bent_contour_lines[:, :, :, 0].min() - .05, self.bent_contour_lines[:, :, :, 0].max() + .05]
208 |             ylim = [self.bent_contour_lines[:, :, :, 1].min() - .05, self.bent_contour_lines[:, :, :, 1].max() + .05]
209 |             self.ax.set_xlim(xlim)
210 |             self.ax.set_ylim(ylim)
211 | 
212 |         self.ax.set_xlabel(r"$x_0$", fontsize=12)
213 |         self.ax.set_ylabel(r"$x_1$", fontsize=12, rotation=0)
214 | 
215 |         self.lines = []
216 |         self.points = []
217 |         for c in range(self.grid_lines.shape[0]):
218 |             line, = self.ax.plot([], [], linewidth=0.5, color='k')
219 |             self.lines.append(line)
220 |         for c in range(len(self.classes)):
221 |             point = self.ax.scatter([], [])
222 |             self.points.append(point)
223 | 
224 |         contour_x = self.bent_contour_lines[0, :, :, 0]
225 |         contour_y = self.bent_contour_lines[0, :, :, 1]
226 |         
227 |         if self.boundary:
228 |             self.contour = self.ax.contourf(contour_x, contour_y, np.zeros(shape=(len(contour_x), len(contour_y))),
229 |                                   cmap=plt.cm.brg, alpha=self.alpha, levels=np.linspace(0, 1, 8))
230 | 
231 |     @staticmethod
232 |     def _update(i, fs, epoch_start=0, colors=None, **kwargs):
233 |         epoch = i + epoch_start
234 |         fs.ax.set_title('Epoch: {}'.format(epoch))
235 |         if not fs.scale_fixed:
236 |             xlim = [fs.bent_contour_lines[epoch, :, :, 0].min() - .05, fs.bent_contour_lines[epoch, :, :, 0].max() + .05]
237 |             ylim = [fs.bent_contour_lines[epoch, :, :, 1].min() - .05, fs.bent_contour_lines[epoch, :, :, 1].max() + .05]
238 |             fs.ax.set_xlim(xlim)
239 |             fs.ax.set_ylim(ylim)
240 | 
241 |         if len(fs.lines):
242 |             line_coords = fs.bent_lines[epoch].transpose()
243 | 
244 |         for c, line in enumerate(fs.lines):
245 |             line.set_data(*line_coords[:, :, c])
246 | 
247 |         if colors is None:
248 |             colors = ['r', 'b']
249 |             
250 |         if 's' not in kwargs.keys():
251 |             kwargs.update({'s': 10})
252 |             
253 |         if 'marker' not in kwargs.keys():
254 |             kwargs.update({'marker': 'o'})
255 |             
256 |         input_coords = [coord[epoch].transpose() for coord in fs.bent_inputs]
257 |         for c in range(len(fs.points)):
258 |             fs.points[c].remove()
259 |             fs.points[c] = fs.ax.scatter(*input_coords[c], color=colors[int(fs.classes[c])], **kwargs)
260 | 
261 |         if fs.boundary:
262 |             for c in fs.contour.collections:
263 |                 c.remove()  # removes only the contours, leaves the rest intact
264 | 
265 |             fs.contour = fs.ax.contourf(fs.bent_contour_lines[epoch, :, :, 0],
266 |                                         fs.bent_contour_lines[epoch, :, :, 1],
267 |                                         fs.predictions[epoch].squeeze(),
268 |                                         cmap=fs.cmap, alpha=fs.alpha, levels=np.linspace(0, 1, 8))
269 | 
270 |         fs.ax.xaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f'))
271 |         fs.ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f'))
272 |         fs.ax.locator_params(tight=True, nbins=7)
273 |         
274 |         #for tick in fs.ax.xaxis.get_major_ticks():
275 |         #    tick.label.set_fontsize(10)
276 |         #for tick in fs.ax.yaxis.get_major_ticks():
277 |         #    tick.label.set_fontsize(10)
278 |         fs.ax.yaxis.set_label_coords(-0.15,0.5)
279 | 
280 |         return fs.lines
281 |     
282 | 
283 | def build_feature_space(model, states, X, y, layer_name=None, contour_points=1000, xlim=(-1, 1), ylim=(-1, 1),
284 |                         display_grid=True, epoch_start=0, epoch_end=-1):
285 |     """Builds a FeatureSpace object to be used for plotting and
286 |     animating.
287 |     The underlying data, that is, grid lines, inputs and contour
288 |     lines, before and after the transformations, as well as the
289 |     corresponding predictions for the contour lines, can be
290 |     later accessed as the second element of the `feature_space`
291 |     property.
292 |     Only layers with 2 hidden units are supported!
293 |     Parameters
294 |     ----------
295 |     ax: AxesSubplot
296 |         Subplot of a Matplotlib figure.
297 |     layer_name: String
298 |         Layer to be used for building the space.
299 |     contour_points: int, optional
300 |         Number of points in each axis of the contour.
301 |         Default is 1,000.
302 |     xlim: tuple of ints, optional
303 |         Boundaries for the X axis of the grid.
304 |     ylim: tuple of ints, optional
305 |         Boundaries for the Y axis of the grid.
306 |     scaled_fixed: boolean, optional
307 |         If True, axis scales are fixed to the maximum from beginning.
308 |         Default is True.
309 |     display_grid: boolean, optional
310 |         If True, display grid lines (for 2-dimensional inputs).
311 |         Default is True.
312 |     epoch_start: int, optional
313 |         First epoch to consider.
314 |     epoch_end: int, optional
315 |         Last epoch to consider.
316 |     Returns
317 |     -------
318 |     feature_space_plot: FeatureSpace
319 |         An instance of a FeatureSpace object to make plots and
320 |         animations.
321 |     """
322 |     layers = list(model.named_modules())
323 |     last_layer_name, last_layer_class = layers[-1]
324 |     is_logit = not isinstance(last_layer_class, nn.Sigmoid)
325 |     if is_logit:
326 |         activation_idx = -2
327 |         func = lambda x: 1 / (1 + np.exp(-x))
328 |     else:
329 |         activation_idx = -3
330 |         func = lambda x: x
331 |     
332 |     names = np.array(list(map(itemgetter(0), layers)))
333 |     matches = names == layer_name
334 | 
335 |     if np.any(matches):
336 |         activation_idx = np.argmax(matches)
337 |     else:
338 |         raise AttributeError("No layer named {}".format(layer_name))
339 |     if layer_name is None:
340 |         layer_name = layers[activation_idx][0]
341 |     
342 |     try:
343 |         final_dims = layers[activation_idx][1].out_features
344 |     except:
345 |         try:
346 |             final_dims = layers[activation_idx + 1][1].in_features
347 |         except:
348 |             final_dims = layers[activation_idx - 1][1].out_features            
349 |     
350 |     assert final_dims == 2, 'Only layers with 2-dimensional outputs are supported!'
351 | 
352 |     y_ind = np.atleast_1d(y.squeeze().argsort())
353 |     X = np.atleast_2d(X.squeeze())[y_ind].reshape(X.shape)
354 |     y = np.atleast_1d(y.squeeze())[y_ind]
355 | 
356 |     if epoch_end == -1:
357 |         epoch_end = len(states)-1
358 |     epoch_end = min(epoch_end, len(states)-1)
359 |     
360 |     #input_dims = self.model.input_shape[-1]
361 |     input_dims = X.shape[-1]
362 |     n_classes = len(np.unique(y))
363 | 
364 |     # Builds a 2D grid and the corresponding contour coordinates
365 |     grid_lines = np.array([])
366 |     contour_lines = np.array([])
367 |     if input_dims == 2 and display_grid:
368 |         grid_lines = build_2d_grid(xlim, ylim)
369 |         contour_lines = build_2d_grid(xlim, ylim, contour_points, contour_points)
370 | 
371 |     # Initializes "bent" variables, that is, the results of the transformations
372 |     bent_lines = []
373 |     bent_inputs = []
374 |     bent_contour_lines = []
375 |     bent_preds = []
376 |     
377 |     # For each epoch, uses the corresponding weights
378 |     for epoch in range(epoch_start, epoch_end + 1):
379 |         X_values = get_values_for_epoch(model, states, epoch, X)
380 |         bent_inputs.append(X_values[layer_name])
381 |         # Transforms the inputs
382 |         #inputs = [TEST_MODE, X] + weights
383 |         #bent_inputs.append(get_activations(inputs=inputs)[0])
384 | 
385 |         if input_dims == 2 and display_grid:
386 |             # Transforms the grid lines
387 |             grid_values = get_values_for_epoch(model, states, epoch, grid_lines.reshape(-1, 2))
388 |             #inputs = [TEST_MODE, grid_lines.reshape(-1, 2)] + weights
389 |             output_shape = (grid_lines.shape[:2]) + (-1,)
390 |             #bent_lines.append(get_activations(inputs=inputs)[0].reshape(output_shape))
391 |             bent_lines.append(grid_values[layer_name].reshape(output_shape))
392 | 
393 |             contour_values = get_values_for_epoch(model, states, epoch, contour_lines.reshape(-1, 2))
394 |             #inputs = [TEST_MODE, contour_lines.reshape(-1, 2)] + weights
395 |             output_shape = (contour_lines.shape[:2]) + (-1,)
396 |             #bent_contour_lines.append(get_activations(inputs=inputs)[0].reshape(output_shape))
397 |             bent_contour_lines.append(contour_values[layer_name].reshape(output_shape))
398 |             # Makes predictions for each point in the contour surface
399 |             #bent_preds.append((get_predictions(inputs=inputs)[0].reshape(output_shape) > .5).astype(int))
400 |             bent_preds.append((func(contour_values[last_layer_name]).reshape(output_shape) > .5).astype(int))
401 |             
402 | 
403 |     bent_inputs = np.array(bent_inputs)
404 | 
405 |     # Makes lists into ndarrays and wrap them as namedtuples
406 |     bent_lines = np.array(bent_lines)
407 |     bent_contour_lines = np.array(bent_contour_lines)
408 |     bent_preds = np.array(bent_preds)
409 | 
410 |     line_data = FeatureSpaceLines(grid=grid_lines, input=X, contour=contour_lines)
411 |     bent_line_data = FeatureSpaceLines(grid=bent_lines, input=bent_inputs, contour=bent_contour_lines)
412 |     _feature_space_data = FeatureSpaceData(line=line_data, bent_line=bent_line_data,
413 |                                                 prediction=bent_preds, target=y)
414 | 
415 |     return _feature_space_data
416 | 
417 | def build_decision_boundary(model, states, X, y, layer_name=None, contour_points=1000, xlim=(-1, 1), ylim=(-1, 1), display_grid=True,
418 |                             epoch_start=0, epoch_end=-1):
419 |     """Builds a FeatureSpace object to be used for plotting and
420 |     animating the raw inputs and the decision boundary.
421 |     The underlying data, that is, grid lines, inputs and contour
422 |     lines, as well as the corresponding predictions for the
423 |     contour lines, can be later accessed as the second element of
424 |     the  `decision_boundary` property.
425 |     Only inputs with 2 dimensions are supported!
426 |     Parameters
427 |     ----------
428 |     ax: AxesSubplot
429 |         Subplot of a Matplotlib figure.
430 |     contour_points: int, optional
431 |         Number of points in each axis of the contour.
432 |         Default is 1,000.
433 |     xlim: tuple of ints, optional
434 |         Boundaries for the X axis of the grid.
435 |     ylim: tuple of ints, optional
436 |         Boundaries for the Y axis of the grid.
437 |     display_grid: boolean, optional
438 |         If True, display grid lines (for 2-dimensional inputs).
439 |         Default is True.
440 |     epoch_start: int, optional
441 |         First epoch to consider.
442 |     epoch_end: int, optional
443 |         Last epoch to consider.
444 |     Returns
445 |     -------
446 |     decision_boundary_plot: FeatureSpace
447 |         An instance of a FeatureSpace object to make plots and
448 |         animations.
449 |     """
450 |     layers = list(model.named_modules())
451 |     last_layer_name, last_layer_class = layers[-1]
452 |     is_logit = not isinstance(last_layer_class, nn.Sigmoid)
453 |     if is_logit:
454 |         activation_idx = -2
455 |         func = lambda x: 1 / (1 + np.exp(-x))
456 |     else:
457 |         activation_idx = -3
458 |         func = lambda x: x
459 |         
460 |     if layer_name is None:
461 |         layer_name = layers[activation_idx][0]
462 |     else:
463 |         matches = np.array(list(map(itemgetter(0), layers))) == layer_name
464 |         if np.any(matches):
465 |             activation_idx = np.argmax(matches)
466 |         else:
467 |             raise AttributeError("No layer named {}".format(layer_name))
468 | 
469 |     try:
470 |         final_dims = layers[activation_idx][1].out_features
471 |     except AttributeError:
472 |         final_dims = layers[activation_idx + 1][1].in_features
473 |     assert final_dims == 2, 'Only layers with 2-dimensional outputs are supported!'
474 | 
475 |     y_ind = y.squeeze().argsort()
476 |     X = X.squeeze()[y_ind].reshape(X.shape)
477 |     y = y.squeeze()[y_ind]
478 | 
479 |     if epoch_end == -1:
480 |         epoch_end = len(states)-1
481 |     epoch_end = min(epoch_end, len(states)-1)
482 |     
483 |     #input_dims = self.model.input_shape[-1]
484 |     input_dims = X.shape[-1]
485 |     n_classes = len(np.unique(y))
486 |     
487 |     # Builds a 2D grid and the corresponding contour coordinates
488 |     grid_lines = np.array([])
489 |     if display_grid:
490 |         grid_lines = build_2d_grid(xlim, ylim)
491 | 
492 |     contour_lines = build_2d_grid(xlim, ylim, contour_points, contour_points)
493 | 
494 |     bent_lines = []
495 |     bent_inputs = []
496 |     bent_contour_lines = []
497 |     bent_preds = []
498 |     # For each epoch, uses the corresponding weights
499 |     for epoch in range(epoch_start, epoch_end + 1):
500 |         bent_lines.append(grid_lines)
501 |         bent_inputs.append(X)
502 |         bent_contour_lines.append(contour_lines)
503 | 
504 |         contour_values = get_values_for_epoch(model, states, epoch, contour_lines.reshape(-1, 2))
505 |         output_shape = (contour_lines.shape[:2]) + (-1,)
506 |         # Makes predictions for each point in the contour surface
507 |         bent_preds.append((func(contour_values[last_layer_name]).reshape(output_shape) > .5).astype(int))
508 | 
509 |     # Makes lists into ndarrays and wrap them as namedtuples
510 |     bent_inputs = np.array(bent_inputs)
511 |     bent_lines = np.array(bent_lines)
512 |     bent_contour_lines = np.array(bent_contour_lines)
513 |     bent_preds = np.array(bent_preds)
514 | 
515 |     line_data = FeatureSpaceLines(grid=grid_lines, input=X, contour=contour_lines)
516 |     bent_line_data = FeatureSpaceLines(grid=bent_lines, input=bent_inputs, contour=bent_contour_lines)
517 |     _decision_boundary_data = FeatureSpaceData(line=line_data, bent_line=bent_line_data,
518 |                                                     prediction=bent_preds, target=y)
519 | 
520 |     return _decision_boundary_data
521 | 
522 | def get_intermediate_values(model, x):
523 |     hooks = {}
524 |     visualization = {}
525 |     layer_names = {}
526 | 
527 |     def hook_fn(m, i, o):
528 |         visualization[layer_names[m]] = o.cpu().detach().numpy()
529 |     
530 |     for name, layer in model.named_modules():
531 |         if name != '':
532 |             layer_names[layer] = name
533 |             hooks[name] = layer.register_forward_hook(hook_fn)
534 | 
535 |     device = list(model.parameters())[0].device.type
536 |     # RNNs
537 |     model(torch.as_tensor(x).float().unsqueeze(0).to(device))
538 |     # model(torch.as_tensor(x).float().to(device))
539 | 
540 |     for hook in hooks.values():
541 |         hook.remove()
542 |         
543 |     return visualization
544 | 
545 | def get_values_for_epoch(model, states, epoch, x):
546 |     with torch.no_grad():
547 |         model.load_state_dict(states[epoch])
548 | 
549 |     return get_intermediate_values(model, x)
550 | 


--------------------------------------------------------------------------------
/postBuild:
--------------------------------------------------------------------------------
1 | # jupyter serverextension enable --sys-prefix jupyter_server_proxy
2 | jupyter server extension enable --sys-prefix jupyter_server_proxy
3 | # tensorboard launches at startup
4 | mv tensorboardserverextension.py ${NB_PYTHON_PREFIX}/lib/python*/site-packages/
5 | # enable tensorboard extension
6 | # jupyter serverextension enable --sys-prefix tensorboardserverextension
7 | jupyter server extension enable --sys-prefix tensorboardserverextension
8 | 


--------------------------------------------------------------------------------
/revision/v1.2/Revision_Volume1_v1.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/revision/v1.2/Revision_Volume1_v1.2.pdf


--------------------------------------------------------------------------------
/revision/v1.2/Revision_Volume2_v1.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/revision/v1.2/Revision_Volume2_v1.2.pdf


--------------------------------------------------------------------------------
/revision/v1.2/Revision_Volume3_v1.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/revision/v1.2/Revision_Volume3_v1.2.pdf


--------------------------------------------------------------------------------
/runs/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/runs/.gitkeep


--------------------------------------------------------------------------------
/stepbystep/v0.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import datetime
  3 | import torch
  4 | import matplotlib.pyplot as plt
  5 | from torch.utils.tensorboard import SummaryWriter
  6 | 
  7 | plt.style.use('fivethirtyeight')
  8 | 
  9 | class StepByStep(object):
 10 |     def __init__(self, model, loss_fn, optimizer):
 11 |         # Here we define the attributes of our class
 12 |         
 13 |         # We start by storing the arguments as attributes 
 14 |         # to use them later
 15 |         self.model = model
 16 |         self.loss_fn = loss_fn
 17 |         self.optimizer = optimizer
 18 |         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 19 |         # Let's send the model to the specified device right away
 20 |         self.model.to(self.device)
 21 | 
 22 |         # These attributes are defined here, but since they are
 23 |         # not informed at the moment of creation, we keep them None
 24 |         self.train_loader = None
 25 |         self.val_loader = None
 26 |         self.writer = None
 27 |         
 28 |         # These attributes are going to be computed internally
 29 |         self.losses = []
 30 |         self.val_losses = []
 31 |         self.total_epochs = 0
 32 | 
 33 |         # Creates the train_step function for our model, 
 34 |         # loss function and optimizer
 35 |         # Note: there are NO ARGS there! It makes use of the class
 36 |         # attributes directly
 37 |         self.train_step_fn = self._make_train_step_fn()
 38 |         # Creates the val_step function for our model and loss
 39 |         self.val_step_fn = self._make_val_step_fn()
 40 | 
 41 |     def to(self, device):
 42 |         # This method allows the user to specify a different device
 43 |         # It sets the corresponding attribute (to be used later in
 44 |         # the mini-batches) and sends the model to the device
 45 |         try:
 46 |             self.device = device
 47 |             self.model.to(self.device)
 48 |         except RuntimeError:
 49 |             self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 50 |             print(f"Couldn't send it to {device}, sending it to {self.device} instead.")
 51 |             self.model.to(self.device)
 52 | 
 53 |     def set_loaders(self, train_loader, val_loader=None):
 54 |         # This method allows the user to define which train_loader (and val_loader, optionally) to use
 55 |         # Both loaders are then assigned to attributes of the class
 56 |         # So they can be referred to later
 57 |         self.train_loader = train_loader
 58 |         self.val_loader = val_loader
 59 | 
 60 |     def set_tensorboard(self, name, folder='runs'):
 61 |         # This method allows the user to define a SummaryWriter to interface with TensorBoard
 62 |         suffix = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
 63 |         self.writer = SummaryWriter(f'{folder}/{name}_{suffix}')
 64 | 
 65 |     def _make_train_step_fn(self):
 66 |         # This method does not need ARGS... it can refer to
 67 |         # the attributes: self.model, self.loss_fn and self.optimizer
 68 |         
 69 |         # Builds function that performs a step in the train loop
 70 |         def perform_train_step_fn(x, y):
 71 |             # Sets model to TRAIN mode
 72 |             self.model.train()
 73 | 
 74 |             # Step 1 - Computes our model's predicted output - forward pass
 75 |             yhat = self.model(x)
 76 |             # Step 2 - Computes the loss
 77 |             loss = self.loss_fn(yhat, y)
 78 |             # Step 3 - Computes gradients for both "a" and "b" parameters
 79 |             loss.backward()
 80 |             # Step 4 - Updates parameters using gradients and the learning rate
 81 |             self.optimizer.step()
 82 |             self.optimizer.zero_grad()
 83 | 
 84 |             # Returns the loss
 85 |             return loss.item()
 86 | 
 87 |         # Returns the function that will be called inside the train loop
 88 |         return perform_train_step_fn
 89 |     
 90 |     def _make_val_step_fn(self):
 91 |         # Builds function that performs a step in the validation loop
 92 |         def perform_val_step_fn(x, y):
 93 |             # Sets model to EVAL mode
 94 |             self.model.eval()
 95 | 
 96 |             # Step 1 - Computes our model's predicted output - forward pass
 97 |             yhat = self.model(x)
 98 |             # Step 2 - Computes the loss
 99 |             loss = self.loss_fn(yhat, y)
100 |             # There is no need to compute Steps 3 and 4, since we don't update parameters during evaluation
101 |             return loss.item()
102 | 
103 |         return perform_val_step_fn
104 |             
105 |     def _mini_batch(self, validation=False):
106 |         # The mini-batch can be used with both loaders
107 |         # The argument `validation`defines which loader and 
108 |         # corresponding step function is going to be used
109 |         if validation:
110 |             data_loader = self.val_loader
111 |             step_fn = self.val_step_fn
112 |         else:
113 |             data_loader = self.train_loader
114 |             step_fn = self.train_step_fn
115 | 
116 |         if data_loader is None:
117 |             return None
118 |             
119 |         # Once the data loader and step function, this is the same
120 |         # mini-batch loop we had before
121 |         mini_batch_losses = []
122 |         for x_batch, y_batch in data_loader:
123 |             x_batch = x_batch.to(self.device)
124 |             y_batch = y_batch.to(self.device)
125 | 
126 |             mini_batch_loss = step_fn(x_batch, y_batch)
127 |             mini_batch_losses.append(mini_batch_loss)
128 | 
129 |         loss = np.mean(mini_batch_losses)
130 |         return loss
131 | 
132 |     def set_seed(self, seed=42):
133 |         torch.backends.cudnn.deterministic = True
134 |         torch.backends.cudnn.benchmark = False    
135 |         torch.manual_seed(seed)
136 |         np.random.seed(seed)
137 |     
138 |     def train(self, n_epochs, seed=42):
139 |         # To ensure reproducibility of the training process
140 |         self.set_seed(seed)
141 | 
142 |         for epoch in range(n_epochs):
143 |             # Keeps track of the numbers of epochs
144 |             # by updating the corresponding attribute
145 |             self.total_epochs += 1
146 | 
147 |             # inner loop
148 |             # Performs training using mini-batches
149 |             loss = self._mini_batch(validation=False)
150 |             self.losses.append(loss)
151 | 
152 |             # VALIDATION
153 |             # no gradients in validation!
154 |             with torch.no_grad():
155 |                 # Performs evaluation using mini-batches
156 |                 val_loss = self._mini_batch(validation=True)
157 |                 self.val_losses.append(val_loss)
158 | 
159 |             # If a SummaryWriter has been set...
160 |             if self.writer:
161 |                 scalars = {'training': loss}
162 |                 if val_loss is not None:
163 |                     scalars.update({'validation': val_loss})
164 |                 # Records both losses for each epoch under the main tag "loss"
165 |                 self.writer.add_scalars(main_tag='loss',
166 |                                         tag_scalar_dict=scalars,
167 |                                         global_step=epoch)
168 | 
169 |         if self.writer:
170 |             # Closes the writer
171 |             self.writer.close()
172 | 
173 |     def save_checkpoint(self, filename):
174 |         # Builds dictionary with all elements for resuming training
175 |         checkpoint = {'epoch': self.total_epochs,
176 |                       'model_state_dict': self.model.state_dict(),
177 |                       'optimizer_state_dict': self.optimizer.state_dict(),
178 |                       'loss': self.losses,
179 |                       'val_loss': self.val_losses}
180 | 
181 |         torch.save(checkpoint, filename)
182 | 
183 |     def load_checkpoint(self, filename):
184 |         # Loads dictionary
185 |         checkpoint = torch.load(filename, weights_only=False)
186 | 
187 |         # Restore state for model and optimizer
188 |         self.model.load_state_dict(checkpoint['model_state_dict'])
189 |         self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
190 | 
191 |         self.total_epochs = checkpoint['epoch']
192 |         self.losses = checkpoint['loss']
193 |         self.val_losses = checkpoint['val_loss']
194 | 
195 |         self.model.train() # always use TRAIN for resuming training   
196 | 
197 |     def predict(self, x):
198 |         # Set is to evaluation mode for predictions
199 |         self.model.eval() 
200 |         # Takes aNumpy input and make it a float tensor
201 |         x_tensor = torch.as_tensor(x).float()
202 |         # Send input to device and uses model for prediction
203 |         y_hat_tensor = self.model(x_tensor.to(self.device))
204 |         # Set it back to train mode
205 |         self.model.train()
206 |         # Detaches it, brings it to CPU and back to Numpy
207 |         return y_hat_tensor.detach().cpu().numpy()
208 | 
209 |     def plot_losses(self):
210 |         fig = plt.figure(figsize=(10, 4))
211 |         plt.plot(self.losses, label='Training Loss', c='b')
212 |         plt.plot(self.val_losses, label='Validation Loss', c='r')
213 |         plt.yscale('log')
214 |         plt.xlabel('Epochs')
215 |         plt.ylabel('Loss')
216 |         plt.legend()
217 |         plt.tight_layout()
218 |         return fig
219 | 
220 |     def add_graph(self):
221 |         # Fetches a single mini-batch so we can use add_graph
222 |         if self.train_loader and self.writer:
223 |             x_sample, y_sample = next(iter(self.train_loader))
224 |             self.writer.add_graph(self.model, x_sample.to(self.device))
225 | 


--------------------------------------------------------------------------------
/stepbystep/v1.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import datetime
  3 | import torch
  4 | import random
  5 | import matplotlib.pyplot as plt
  6 | from torch.utils.tensorboard import SummaryWriter
  7 | 
  8 | plt.style.use('fivethirtyeight')
  9 | 
 10 | class StepByStep(object):
 11 |     def __init__(self, model, loss_fn, optimizer):
 12 |         # Here we define the attributes of our class
 13 |         
 14 |         # We start by storing the arguments as attributes 
 15 |         # to use them later
 16 |         self.model = model
 17 |         self.loss_fn = loss_fn
 18 |         self.optimizer = optimizer
 19 |         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 20 |         # Let's send the model to the specified device right away
 21 |         self.model.to(self.device)
 22 | 
 23 |         # These attributes are defined here, but since they are
 24 |         # not informed at the moment of creation, we keep them None
 25 |         self.train_loader = None
 26 |         self.val_loader = None
 27 |         self.writer = None
 28 |         
 29 |         # These attributes are going to be computed internally
 30 |         self.losses = []
 31 |         self.val_losses = []
 32 |         self.total_epochs = 0
 33 | 
 34 |         # Creates the train_step function for our model, 
 35 |         # loss function and optimizer
 36 |         # Note: there are NO ARGS there! It makes use of the class
 37 |         # attributes directly
 38 |         self.train_step_fn = self._make_train_step_fn()
 39 |         # Creates the val_step function for our model and loss
 40 |         self.val_step_fn = self._make_val_step_fn()
 41 | 
 42 |     def to(self, device):
 43 |         # This method allows the user to specify a different device
 44 |         # It sets the corresponding attribute (to be used later in
 45 |         # the mini-batches) and sends the model to the device
 46 |         try:
 47 |             self.device = device
 48 |             self.model.to(self.device)
 49 |         except RuntimeError:
 50 |             self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 51 |             print(f"Couldn't send it to {device}, sending it to {self.device} instead.")
 52 |             self.model.to(self.device)
 53 | 
 54 |     def set_loaders(self, train_loader, val_loader=None):
 55 |         # This method allows the user to define which train_loader (and val_loader, optionally) to use
 56 |         # Both loaders are then assigned to attributes of the class
 57 |         # So they can be referred to later
 58 |         self.train_loader = train_loader
 59 |         self.val_loader = val_loader
 60 | 
 61 |     def set_tensorboard(self, name, folder='runs'):
 62 |         # This method allows the user to define a SummaryWriter to interface with TensorBoard
 63 |         suffix = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
 64 |         self.writer = SummaryWriter(f'{folder}/{name}_{suffix}')
 65 | 
 66 |     def _make_train_step_fn(self):
 67 |         # This method does not need ARGS... it can refer to
 68 |         # the attributes: self.model, self.loss_fn and self.optimizer
 69 |         
 70 |         # Builds function that performs a step in the train loop
 71 |         def perform_train_step_fn(x, y):
 72 |             # Sets model to TRAIN mode
 73 |             self.model.train()
 74 | 
 75 |             # Step 1 - Computes our model's predicted output - forward pass
 76 |             yhat = self.model(x)
 77 |             # Step 2 - Computes the loss
 78 |             loss = self.loss_fn(yhat, y)
 79 |             # Step 3 - Computes gradients for both "a" and "b" parameters
 80 |             loss.backward()
 81 |             # Step 4 - Updates parameters using gradients and the learning rate
 82 |             self.optimizer.step()
 83 |             self.optimizer.zero_grad()
 84 | 
 85 |             # Returns the loss
 86 |             return loss.item()
 87 | 
 88 |         # Returns the function that will be called inside the train loop
 89 |         return perform_train_step_fn
 90 |     
 91 |     def _make_val_step_fn(self):
 92 |         # Builds function that performs a step in the validation loop
 93 |         def perform_val_step_fn(x, y):
 94 |             # Sets model to EVAL mode
 95 |             self.model.eval()
 96 | 
 97 |             # Step 1 - Computes our model's predicted output - forward pass
 98 |             yhat = self.model(x)
 99 |             # Step 2 - Computes the loss
100 |             loss = self.loss_fn(yhat, y)
101 |             # There is no need to compute Steps 3 and 4, since we don't update parameters during evaluation
102 |             return loss.item()
103 | 
104 |         return perform_val_step_fn
105 |             
106 |     def _mini_batch(self, validation=False):
107 |         # The mini-batch can be used with both loaders
108 |         # The argument `validation`defines which loader and 
109 |         # corresponding step function is going to be used
110 |         if validation:
111 |             data_loader = self.val_loader
112 |             step_fn = self.val_step_fn
113 |         else:
114 |             data_loader = self.train_loader
115 |             step_fn = self.train_step_fn
116 | 
117 |         if data_loader is None:
118 |             return None
119 |             
120 |         # Once the data loader and step function, this is the same
121 |         # mini-batch loop we had before
122 |         mini_batch_losses = []
123 |         for x_batch, y_batch in data_loader:
124 |             x_batch = x_batch.to(self.device)
125 |             y_batch = y_batch.to(self.device)
126 | 
127 |             mini_batch_loss = step_fn(x_batch, y_batch)
128 |             mini_batch_losses.append(mini_batch_loss)
129 | 
130 |         loss = np.mean(mini_batch_losses)
131 |         return loss
132 | 
133 |     def set_seed(self, seed=42):
134 |         torch.backends.cudnn.deterministic = True
135 |         torch.backends.cudnn.benchmark = False    
136 |         torch.manual_seed(seed)
137 |         np.random.seed(seed)
138 |         random.seed(seed)
139 |         try:
140 |             self.train_loader.sampler.generator.manual_seed(seed)
141 |         except AttributeError:
142 |             pass
143 | 
144 |     def train(self, n_epochs, seed=42):
145 |         # To ensure reproducibility of the training process
146 |         self.set_seed(seed)
147 | 
148 |         for epoch in range(n_epochs):
149 |             # Keeps track of the numbers of epochs
150 |             # by updating the corresponding attribute
151 |             self.total_epochs += 1
152 | 
153 |             # inner loop
154 |             # Performs training using mini-batches
155 |             loss = self._mini_batch(validation=False)
156 |             self.losses.append(loss)
157 | 
158 |             # VALIDATION
159 |             # no gradients in validation!
160 |             with torch.no_grad():
161 |                 # Performs evaluation using mini-batches
162 |                 val_loss = self._mini_batch(validation=True)
163 |                 self.val_losses.append(val_loss)
164 | 
165 |             # If a SummaryWriter has been set...
166 |             if self.writer:
167 |                 scalars = {'training': loss}
168 |                 if val_loss is not None:
169 |                     scalars.update({'validation': val_loss})
170 |                 # Records both losses for each epoch under the main tag "loss"
171 |                 self.writer.add_scalars(main_tag='loss',
172 |                                         tag_scalar_dict=scalars,
173 |                                         global_step=epoch)
174 | 
175 |         if self.writer:
176 |             # Closes the writer
177 |             self.writer.close()
178 | 
179 |     def save_checkpoint(self, filename):
180 |         # Builds dictionary with all elements for resuming training
181 |         checkpoint = {'epoch': self.total_epochs,
182 |                       'model_state_dict': self.model.state_dict(),
183 |                       'optimizer_state_dict': self.optimizer.state_dict(),
184 |                       'loss': self.losses,
185 |                       'val_loss': self.val_losses}
186 | 
187 |         torch.save(checkpoint, filename)
188 | 
189 |     def load_checkpoint(self, filename):
190 |         # Loads dictionary
191 |         checkpoint = torch.load(filename, weights_only=False)
192 | 
193 |         # Restore state for model and optimizer
194 |         self.model.load_state_dict(checkpoint['model_state_dict'])
195 |         self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
196 | 
197 |         self.total_epochs = checkpoint['epoch']
198 |         self.losses = checkpoint['loss']
199 |         self.val_losses = checkpoint['val_loss']
200 | 
201 |         self.model.train() # always use TRAIN for resuming training   
202 | 
203 |     def predict(self, x):
204 |         # Set is to evaluation mode for predictions
205 |         self.model.eval() 
206 |         # Takes aNumpy input and make it a float tensor
207 |         x_tensor = torch.as_tensor(x).float()
208 |         # Send input to device and uses model for prediction
209 |         y_hat_tensor = self.model(x_tensor.to(self.device))
210 |         # Set it back to train mode
211 |         self.model.train()
212 |         # Detaches it, brings it to CPU and back to Numpy
213 |         return y_hat_tensor.detach().cpu().numpy()
214 | 
215 |     def plot_losses(self):
216 |         fig = plt.figure(figsize=(10, 4))
217 |         plt.plot(self.losses, label='Training Loss', c='b')
218 |         plt.plot(self.val_losses, label='Validation Loss', c='r')
219 |         plt.yscale('log')
220 |         plt.xlabel('Epochs')
221 |         plt.ylabel('Loss')
222 |         plt.legend()
223 |         plt.tight_layout()
224 |         return fig
225 | 
226 |     def add_graph(self):
227 |         # Fetches a single mini-batch so we can use add_graph
228 |         if self.train_loader and self.writer:
229 |             x_sample, y_sample = next(iter(self.train_loader))
230 |             self.writer.add_graph(self.model, x_sample.to(self.device))
231 | 
232 |     def count_parameters(self):
233 |         return sum(p.numel() for p in self.model.parameters() if p.requires_grad)


--------------------------------------------------------------------------------
/stepbystep/v2.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import datetime
  3 | import torch
  4 | import torch.nn as nn
  5 | import random
  6 | import matplotlib.pyplot as plt
  7 | from torch.utils.tensorboard import SummaryWriter
  8 | 
  9 | plt.style.use('fivethirtyeight')
 10 | 
 11 | class StepByStep(object):
 12 |     def __init__(self, model, loss_fn, optimizer):
 13 |         # Here we define the attributes of our class
 14 |         
 15 |         # We start by storing the arguments as attributes 
 16 |         # to use them later
 17 |         self.model = model
 18 |         self.loss_fn = loss_fn
 19 |         self.optimizer = optimizer
 20 |         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 21 |         # Let's send the model to the specified device right away
 22 |         self.model.to(self.device)
 23 | 
 24 |         # These attributes are defined here, but since they are
 25 |         # not informed at the moment of creation, we keep them None
 26 |         self.train_loader = None
 27 |         self.val_loader = None
 28 |         self.writer = None
 29 |         
 30 |         # These attributes are going to be computed internally
 31 |         self.losses = []
 32 |         self.val_losses = []
 33 |         self.total_epochs = 0
 34 |         
 35 |         self.visualization = {}
 36 |         self.handles = {}
 37 | 
 38 |         # Creates the train_step function for our model, 
 39 |         # loss function and optimizer
 40 |         # Note: there are NO ARGS there! It makes use of the class
 41 |         # attributes directly
 42 |         self.train_step_fn = self._make_train_step_fn()
 43 |         # Creates the val_step function for our model and loss
 44 |         self.val_step_fn = self._make_val_step_fn()
 45 |         
 46 |     def to(self, device):
 47 |         # This method allows the user to specify a different device
 48 |         # It sets the corresponding attribute (to be used later in
 49 |         # the mini-batches) and sends the model to the device
 50 |         try:
 51 |             self.device = device
 52 |             self.model.to(self.device)
 53 |         except RuntimeError:
 54 |             self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 55 |             print(f"Couldn't send it to {device}, sending it to {self.device} instead.")
 56 |             self.model.to(self.device)
 57 | 
 58 |     def set_loaders(self, train_loader, val_loader=None):
 59 |         # This method allows the user to define which train_loader (and val_loader, optionally) to use
 60 |         # Both loaders are then assigned to attributes of the class
 61 |         # So they can be referred to later
 62 |         self.train_loader = train_loader
 63 |         self.val_loader = val_loader
 64 | 
 65 |     def set_tensorboard(self, name, folder='runs'):
 66 |         # This method allows the user to define a SummaryWriter to interface with TensorBoard
 67 |         suffix = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
 68 |         self.writer = SummaryWriter(f'{folder}/{name}_{suffix}')
 69 | 
 70 |     def _make_train_step_fn(self):
 71 |         # This method does not need ARGS... it can refer to
 72 |         # the attributes: self.model, self.loss_fn and self.optimizer
 73 |         
 74 |         # Builds function that performs a step in the train loop
 75 |         def perform_train_step_fn(x, y):
 76 |             # Sets model to TRAIN mode
 77 |             self.model.train()
 78 | 
 79 |             # Step 1 - Computes our model's predicted output - forward pass
 80 |             yhat = self.model(x)
 81 |             # Step 2 - Computes the loss
 82 |             loss = self.loss_fn(yhat, y)
 83 |             # Step 3 - Computes gradients for both "a" and "b" parameters
 84 |             loss.backward()
 85 |             # Step 4 - Updates parameters using gradients and the learning rate
 86 |             self.optimizer.step()
 87 |             self.optimizer.zero_grad()
 88 | 
 89 |             # Returns the loss
 90 |             return loss.item()
 91 | 
 92 |         # Returns the function that will be called inside the train loop
 93 |         return perform_train_step_fn
 94 |     
 95 |     def _make_val_step_fn(self):
 96 |         # Builds function that performs a step in the validation loop
 97 |         def perform_val_step_fn(x, y):
 98 |             # Sets model to EVAL mode
 99 |             self.model.eval()
100 | 
101 |             # Step 1 - Computes our model's predicted output - forward pass
102 |             yhat = self.model(x)
103 |             # Step 2 - Computes the loss
104 |             loss = self.loss_fn(yhat, y)
105 |             # There is no need to compute Steps 3 and 4, since we don't update parameters during evaluation
106 |             return loss.item()
107 | 
108 |         return perform_val_step_fn
109 |             
110 |     def _mini_batch(self, validation=False):
111 |         # The mini-batch can be used with both loaders
112 |         # The argument `validation`defines which loader and 
113 |         # corresponding step function is going to be used
114 |         if validation:
115 |             data_loader = self.val_loader
116 |             step_fn = self.val_step_fn
117 |         else:
118 |             data_loader = self.train_loader
119 |             step_fn = self.train_step_fn
120 | 
121 |         if data_loader is None:
122 |             return None
123 |             
124 |         # Once the data loader and step function, this is the same
125 |         # mini-batch loop we had before
126 |         mini_batch_losses = []
127 |         for x_batch, y_batch in data_loader:
128 |             x_batch = x_batch.to(self.device)
129 |             y_batch = y_batch.to(self.device)
130 | 
131 |             mini_batch_loss = step_fn(x_batch, y_batch)
132 |             mini_batch_losses.append(mini_batch_loss)
133 | 
134 |         loss = np.mean(mini_batch_losses)
135 |         return loss
136 | 
137 |     def set_seed(self, seed=42):
138 |         torch.backends.cudnn.deterministic = True
139 |         torch.backends.cudnn.benchmark = False    
140 |         torch.manual_seed(seed)
141 |         np.random.seed(seed)
142 |         random.seed(seed)
143 |         try:
144 |             self.train_loader.sampler.generator.manual_seed(seed)
145 |         except AttributeError:
146 |             pass
147 | 
148 |     def train(self, n_epochs, seed=42):
149 |         # To ensure reproducibility of the training process
150 |         self.set_seed(seed)
151 | 
152 |         for epoch in range(n_epochs):
153 |             # Keeps track of the numbers of epochs
154 |             # by updating the corresponding attribute
155 |             self.total_epochs += 1
156 | 
157 |             # inner loop
158 |             # Performs training using mini-batches
159 |             loss = self._mini_batch(validation=False)
160 |             self.losses.append(loss)
161 | 
162 |             # VALIDATION
163 |             # no gradients in validation!
164 |             with torch.no_grad():
165 |                 # Performs evaluation using mini-batches
166 |                 val_loss = self._mini_batch(validation=True)
167 |                 self.val_losses.append(val_loss)
168 | 
169 |             # If a SummaryWriter has been set...
170 |             if self.writer:
171 |                 scalars = {'training': loss}
172 |                 if val_loss is not None:
173 |                     scalars.update({'validation': val_loss})
174 |                 # Records both losses for each epoch under the main tag "loss"
175 |                 self.writer.add_scalars(main_tag='loss',
176 |                                         tag_scalar_dict=scalars,
177 |                                         global_step=epoch)
178 | 
179 |         if self.writer:
180 |             # Closes the writer
181 |             self.writer.close()
182 | 
183 |     def save_checkpoint(self, filename):
184 |         # Builds dictionary with all elements for resuming training
185 |         checkpoint = {'epoch': self.total_epochs,
186 |                       'model_state_dict': self.model.state_dict(),
187 |                       'optimizer_state_dict': self.optimizer.state_dict(),
188 |                       'loss': self.losses,
189 |                       'val_loss': self.val_losses}
190 | 
191 |         torch.save(checkpoint, filename)
192 | 
193 |     def load_checkpoint(self, filename):
194 |         # Loads dictionary
195 |         checkpoint = torch.load(filename, weights_only=False)
196 | 
197 |         # Restore state for model and optimizer
198 |         self.model.load_state_dict(checkpoint['model_state_dict'])
199 |         self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
200 | 
201 |         self.total_epochs = checkpoint['epoch']
202 |         self.losses = checkpoint['loss']
203 |         self.val_losses = checkpoint['val_loss']
204 | 
205 |         self.model.train() # always use TRAIN for resuming training   
206 | 
207 |     def predict(self, x):
208 |         # Set is to evaluation mode for predictions
209 |         self.model.eval() 
210 |         # Takes aNumpy input and make it a float tensor
211 |         x_tensor = torch.as_tensor(x).float()
212 |         # Send input to device and uses model for prediction
213 |         y_hat_tensor = self.model(x_tensor.to(self.device))
214 |         # Set it back to train mode
215 |         self.model.train()
216 |         # Detaches it, brings it to CPU and back to Numpy
217 |         return y_hat_tensor.detach().cpu().numpy()
218 | 
219 |     def plot_losses(self):
220 |         fig = plt.figure(figsize=(10, 4))
221 |         plt.plot(self.losses, label='Training Loss', c='b')
222 |         plt.plot(self.val_losses, label='Validation Loss', c='r')
223 |         plt.yscale('log')
224 |         plt.xlabel('Epochs')
225 |         plt.ylabel('Loss')
226 |         plt.legend()
227 |         plt.tight_layout()
228 |         return fig
229 | 
230 |     def add_graph(self):
231 |         # Fetches a single mini-batch so we can use add_graph
232 |         if self.train_loader and self.writer:
233 |             x_sample, y_sample = next(iter(self.train_loader))
234 |             self.writer.add_graph(self.model, x_sample.to(self.device))
235 | 
236 |     def count_parameters(self):
237 |         return sum(p.numel() for p in self.model.parameters() if p.requires_grad)
238 |     
239 |     @staticmethod
240 |     def _visualize_tensors(axs, x, y=None, yhat=None, layer_name='', title=None):
241 |         # The number of images is the number of subplots in a row
242 |         n_images = len(axs)
243 |         # Gets max and min values for scaling the grayscale
244 |         minv, maxv = np.min(x[:n_images]), np.max(x[:n_images])
245 |         # For each image
246 |         for j, image in enumerate(x[:n_images]):
247 |             ax = axs[j]
248 |             # Sets title, labels, and removes ticks
249 |             if title is not None:
250 |                 ax.set_title('{} #{}'.format(title, j), fontsize=12)
251 |             ax.set_ylabel(
252 |                 '{}\n{}x{}'.format(layer_name, *np.atleast_2d(image).shape), 
253 |                 rotation=0, labelpad=40
254 |             )
255 |             xlabel1 = '' if y is None else '\nLabel: {}'.format(y[j])
256 |             xlabel2 = '' if yhat is None else '\nPredicted: {}'.format(yhat[j])
257 |             xlabel = '{}{}'.format(xlabel1, xlabel2)
258 |             if len(xlabel):
259 |                 ax.set_xlabel(xlabel, fontsize=12)
260 |             ax.set_xticks([])
261 |             ax.set_yticks([])
262 | 
263 |             # Plots weight as an image
264 |             ax.imshow(
265 |                 np.atleast_2d(image.squeeze()),
266 |                 cmap='gray', 
267 |                 vmin=minv, 
268 |                 vmax=maxv
269 |             )
270 |         return
271 | 
272 |     def visualize_filters(self, layer_name, **kwargs):
273 |         try:
274 |             # Gets the layer object from the model
275 |             layer = self.model
276 |             for name in layer_name.split('.'):
277 |                 layer = getattr(layer, name)
278 |             # We are only looking at filters for 2D convolutions
279 |             if isinstance(layer, nn.Conv2d):
280 |                 # Takes the weight information
281 |                 weights = layer.weight.data.cpu().numpy()
282 |                 # The weights have channels_out (filter), channels_in, H, W shape
283 |                 n_filters, n_channels, _, _ = weights.shape
284 | 
285 |                 # Builds a figure
286 |                 size = (2 * n_channels + 2, 2 * n_filters)
287 |                 fig, axes = plt.subplots(n_filters, n_channels, figsize=size)
288 |                 axes = np.atleast_2d(axes).reshape(n_filters, n_channels)
289 |                 # For each channel_out (filter)
290 |                 for i in range(n_filters):    
291 |                     StepByStep._visualize_tensors(
292 |                         axes[i, :], 
293 |                         weights[i], 
294 |                         layer_name='Filter #{}'.format(i), 
295 |                         title='Channel' if (i == 0) else None
296 |                     )
297 | 
298 |                 for ax in axes.flat:
299 |                     ax.label_outer()
300 | 
301 |                 fig.tight_layout()
302 |                 return fig
303 |         except AttributeError:
304 |             return
305 |     
306 |     def attach_hooks(self, layers_to_hook, hook_fn=None):
307 |         # Clear any previous values
308 |         self.visualization = {}
309 |         # Creates the dictionary to map layer objects to their names
310 |         modules = list(self.model.named_modules())
311 |         layer_names = {layer: name for name, layer in modules[1:]}
312 | 
313 |         if hook_fn is None:
314 |             # Hook function to be attached to the forward pass
315 |             def hook_fn(layer, inputs, outputs):
316 |                 # Gets the layer name
317 |                 name = layer_names[layer]
318 |                 # Detaches outputs
319 |                 values = outputs.detach().cpu().numpy()
320 |                 # Since the hook function may be called multiple times
321 |                 # for example, if we make predictions for multiple mini-batches
322 |                 # it concatenates the results
323 |                 if self.visualization[name] is None:
324 |                     self.visualization[name] = values
325 |                 else:
326 |                     self.visualization[name] = np.concatenate([self.visualization[name], values])
327 | 
328 |         for name, layer in modules:
329 |             # If the layer is in our list
330 |             if name in layers_to_hook:
331 |                 # Initializes the corresponding key in the dictionary
332 |                 self.visualization[name] = None
333 |                 # Register the forward hook and keep the handle in another dict
334 |                 self.handles[name] = layer.register_forward_hook(hook_fn)
335 | 
336 |     def remove_hooks(self):
337 |         # Loops through all hooks and removes them
338 |         for handle in self.handles.values():
339 |             handle.remove()
340 |         # Clear the dict, as all hooks have been removed
341 |         self.handles = {}
342 | 
343 |     def visualize_outputs(self, layers, n_images=10, y=None, yhat=None):
344 |         layers = list(filter(lambda l: l in self.visualization.keys(), layers))
345 |         shapes = [self.visualization[layer].shape for layer in layers]
346 |         n_rows = [shape[1] if len(shape) == 4 else 1 for shape in shapes]
347 |         total_rows = np.sum(n_rows)
348 | 
349 |         fig, axes = plt.subplots(total_rows, n_images, figsize=(1.5*n_images, 1.5*total_rows))
350 |         axes = np.atleast_2d(axes).reshape(total_rows, n_images)
351 | 
352 |         # Loops through the layers, one layer per row of subplots
353 |         row = 0
354 |         for i, layer in enumerate(layers):
355 |             start_row = row
356 |             # Takes the produced feature maps for that layer
357 |             output = self.visualization[layer]
358 | 
359 |             is_vector = len(output.shape) == 2
360 | 
361 |             for j in range(n_rows[i]):
362 |                 StepByStep._visualize_tensors(
363 |                     axes[row, :],
364 |                     output if is_vector else output[:, j].squeeze(),
365 |                     y, 
366 |                     yhat, 
367 |                     layer_name=layers[i] if is_vector else '{}\nfil#{}'.format(layers[i], row-start_row),
368 |                     title='Image' if (row == 0) else None
369 |                 )
370 |                 row += 1
371 | 
372 |         for ax in axes.flat:
373 |             ax.label_outer()
374 | 
375 |         plt.tight_layout()
376 |         return fig
377 | 
378 |     def correct(self, x, y, threshold=.5):
379 |         self.model.eval()
380 |         yhat = self.model(x.to(self.device))
381 |         y = y.to(self.device)
382 |         self.model.train()
383 | 
384 |         # We get the size of the batch and the number of classes 
385 |         # (only 1, if it is binary)
386 |         n_samples, n_dims = yhat.shape
387 |         if n_dims > 1:        
388 |             # In a multiclass classification, the biggest logit
389 |             # always wins, so we don't bother getting probabilities
390 | 
391 |             # This is PyTorch's version of argmax, 
392 |             # but it returns a tuple: (max value, index of max value)
393 |             _, predicted = torch.max(yhat, 1)
394 |         else:
395 |             n_dims += 1
396 |             # In binary classification, we NEED to check if the
397 |             # last layer is a sigmoid (and then it produces probs)
398 |             if isinstance(self.model, nn.Sequential) and \
399 |                isinstance(self.model[-1], nn.Sigmoid):
400 |                 predicted = (yhat > threshold).long()
401 |             # or something else (logits), which we need to convert
402 |             # using a sigmoid
403 |             else:
404 |                 predicted = (torch.sigmoid(yhat) > threshold).long()
405 | 
406 |         # How many samples got classified correctly for each class
407 |         result = []
408 |         for c in range(n_dims):
409 |             n_class = (y == c).sum().item()
410 |             n_correct = (predicted[y == c] == c).sum().item()
411 |             result.append((n_correct, n_class))
412 |         return torch.tensor(result)
413 |     
414 |     @staticmethod
415 |     def loader_apply(loader, func, reduce='sum'):
416 |         results = [func(x, y) for i, (x, y) in enumerate(loader)]
417 |         results = torch.stack(results, axis=0)
418 | 
419 |         if reduce == 'sum':
420 |             results = results.sum(axis=0)
421 |         elif reduce == 'mean':
422 |             results = results.float().mean(axis=0)
423 | 
424 |         return results


--------------------------------------------------------------------------------
/tensorboardserverextension.py:
--------------------------------------------------------------------------------
1 | from subprocess import Popen
2 | 
3 | 
4 | def load_jupyter_server_extension(nbapp):
5 |     """serve the bokeh-app directory with bokeh server"""
6 |     Popen(["tensorboard", "--logdir", "runs", "--port", "6006"])
7 | 


--------------------------------------------------------------------------------