├── .gitignore ├── Chapter00.ipynb ├── Chapter01.ipynb ├── Chapter02.1.ipynb ├── Chapter02.ipynb ├── Chapter03.ipynb ├── Chapter04.ipynb ├── Chapter05.ipynb ├── Chapter06.ipynb ├── Chapter07.ipynb ├── Chapter08.ipynb ├── Chapter09.ipynb ├── Chapter10.ipynb ├── Chapter11.ipynb ├── ChapterExtra.ipynb ├── LICENSE ├── README.md ├── apt.txt ├── config.py ├── data_generation ├── ball.py ├── image_classification.py ├── nlp.py ├── rps.py ├── simple_linear_regression.py └── square_sequences.py ├── data_preparation ├── v0.py ├── v1.py └── v2.py ├── environment.yml ├── helpers.py ├── images ├── 1conv1.png ├── 1conv2.png ├── 3channel1.png ├── 3channel2.png ├── 3channel_filters1.png ├── aiayn_dot.png ├── aiayn_multihead.png ├── alexnet.png ├── alice_dorothy.png ├── architecture_lenet.png ├── attention.png ├── attn_narrow_2heads.png ├── attn_narrow_first_head.png ├── attn_narrow_transf.png ├── basic_attention.png ├── bert_embeddings.png ├── bert_input_embed.png ├── bert_mlm.png ├── bert_nsp.png ├── bidirect_rnn.png ├── blank1.png ├── blank2.png ├── blank_center.png ├── blank_end.png ├── block_tokens.png ├── book10.png ├── book9.png ├── bow.png ├── cbow.png ├── classification.png ├── classification_equiv.png ├── classification_relu2.png ├── classification_softmax.png ├── cls_hidden_state.png ├── context_translate.png ├── context_vector.png ├── conv1.png ├── conv1_ma.png ├── conv1d.png ├── conv1d_dilated.png ├── conv1d_edges.png ├── conv2.png ├── conv3.png ├── conv5.png ├── conv6.png ├── conv7.png ├── conv8.png ├── cross_attn.png ├── dec_both.png ├── decoder.png ├── decoder_self.png ├── decoder_self_simplified.png ├── dropout_paper.png ├── elmo_embed.png ├── elmo_lstm.png ├── embed_arithmetic.png ├── enc_both.png ├── enc_dec_attn_translate.png ├── encdec_attn.png ├── encdec_self_simplified.png ├── encoded_distances.png ├── encoder.png ├── encoder_decoder.png ├── encoder_lost_seq.png ├── encoder_self.png ├── encoder_self_detail.png ├── encoder_self_simplified.png ├── fill1.png ├── fill2.png ├── full_transformer.png ├── full_transformer_and_class.png ├── gru_cell.png ├── inception_model.png ├── inception_modules.png ├── kq_matches.png ├── layer_vs_batch_norm.png ├── logistic_model.png ├── lstm_cell.png ├── multiattn.png ├── multihead_chunking.png ├── new_books.png ├── ngrams.png ├── norm_first.png ├── ohe1.png ├── ohe2.png ├── ohe3.png ├── packed_seq_data.png ├── packed_seq_inddata.png ├── padding1.png ├── padding2.png ├── padding3.png ├── paddings.png ├── paths.png ├── pooling1.png ├── posenc_mod4mod8.png ├── posenc_modnorm4.png ├── posenc_modnorm_deg.png ├── posenc_modnorm_mult.png ├── posenc_modnorm_sincos.png ├── posenc_norm1k.png ├── posenc_norm4.png ├── posenc_norm4_long.png ├── residual.png ├── rest_continuous.png ├── rest_discrete.png ├── rnn1.png ├── rnn2.png ├── rnn_cell_diagram.png ├── rnn_cell_diagram_seq.png ├── score_alignment.png ├── score_alignment_translate.png ├── shifted_target.png ├── sincos_distance.png ├── stacked_encdec.png ├── stacked_layers.png ├── stacked_rnn.png ├── stride1.png ├── strider2.png ├── strider3.png ├── sublayer.png ├── transf_classes.png ├── transf_decself.png ├── transf_encdecself.png ├── transf_encself.png ├── translation_att.png ├── vit_model.png ├── w2v_cbow.png ├── w2v_embed.png └── w2v_logits.png ├── model_configuration ├── v0.py ├── v1.py ├── v2.py ├── v3.py └── v4.py ├── model_training ├── v0.py ├── v1.py ├── v2.py ├── v3.py ├── v4.py └── v5.py ├── plots ├── chapter0.py ├── chapter1.py ├── chapter10.py ├── chapter11.py ├── chapter2.py ├── chapter2_1.py ├── chapter3.py ├── chapter4.py ├── chapter5.py ├── chapter6.py ├── chapter7.py ├── chapter8.py ├── chapter9.py ├── chapterextra.py └── replay.py ├── postBuild ├── revision └── v1.2 │ ├── Revision_Volume1_v1.2.pdf │ ├── Revision_Volume2_v1.2.pdf │ └── Revision_Volume3_v1.2.pdf ├── runs └── .gitkeep ├── seq2seq.py ├── stepbystep ├── v0.py ├── v1.py ├── v2.py ├── v3.py └── v4.py └── tensorboardserverextension.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Daniel Voigt Godoy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning with PyTorch Step-by-Step 2 | 3 | ## New book: "A Hands-On Guide to Fine-Tuning LLMs" 4 | 5 |

6 | 7 |
8 | Kindle | Paperback | PDF [Leanpub] | PDF [Gumroad] 9 |

10 | 11 | ## Revised for PyTorch 2.x! 12 | The revised version addresses changes in PyTorch, Torchvision, HuggingFace, and other libraries. The chapters most affected were Chapter 4 (in Volume II) and Chapter 11 (in Volume III). 13 | 14 | Please check the PDFs below containing the changes (check the paragraphs highlighted in red): 15 | - [Changes to Volume I](https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/master/revision/v1.2/Revision_Volume1_v1.2.pdf) 16 | - [Changes to Volume II](https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/master/revision/v1.2/Revision_Volume2_v1.2.pdf) 17 | - [Changes to Volume III](https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/master/revision/v1.2/Revision_Volume3_v1.2.pdf) 18 | 19 | [![](images/new_books.png)](https://pytorchstepbystep.com) 20 | 21 | This is the official repository of my book "**Deep Learning with PyTorch Step-by-Step**". Here you will find **one Jupyter notebook** for every **chapter** in the book. 22 | 23 | Each notebook contains **all the code shown** in its corresponding chapter, and you should be able to 24 | **run its cells in sequence** to get the **same outputs as shown in the book**. I strongly believe that being able to **reproduce the results** brings **confidence** to the reader. 25 | 26 | There are **three** options for you to run the Jupyter notebooks: 27 | 28 | ### Google Colab 29 | 30 | You can easily **load the notebooks directly from GitHub** using Colab and run them using a **GPU** provided by Google. You need to be logged in a Google Account of your own. 31 | 32 | You can go through the chapters already using the links below: 33 | 34 | #### Part I - Fundamentals 35 | 36 | - [Chapter 0 - Visualizing Gradient Descent](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter00.ipynb) 37 | - [Chapter 1 - A Simple Regression Problem](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter01.ipynb) 38 | - [Chapter 2 - Rethinking the Training Loop](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter02.ipynb) 39 | - [Chapter 2.1 - Going Classy](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter02.1.ipynb) 40 | - [Chapter 3 - A Simple Classification Problem](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter03.ipynb) 41 | 42 | #### Part II - Computer Vision 43 | 44 | - [Chapter 4 - Classifying Images](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter04.ipynb) 45 | - [Chapter 5 - Convolutions](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter05.ipynb) 46 | - [Chapter 6 - Rock, Paper, Scissors](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter06.ipynb) 47 | - [Chapter 7 - Transfer Learning](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter07.ipynb) 48 | - [Extra Chapter - Vanishing and Exploding Gradients](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/ChapterExtra.ipynb) 49 | 50 | #### Part III - Sequences 51 | 52 | - [Chapter 8 - Sequences](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter08.ipynb) 53 | - [Chapter 9 - Sequence-to-Sequence](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter09.ipynb) 54 | - [Chapter 10 - Transform and Roll Out](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter10.ipynb) 55 | 56 | #### Part IV - Natural Language Processing 57 | 58 | - [Chapter 11 - Down the Yellow Brick Rabbit Hole](https://colab.research.google.com/github/dvgodoy/PyTorchStepByStep/blob/master/Chapter11.ipynb) 59 | 60 | ### Binder 61 | 62 | You can also **load the notebooks directly from GitHub** using Binder, but the process is slightly different. It will create an environment on the cloud and allow you to access **Jupyter's Home Page** in your browser, listing all available notebooks, just like in your own computer. 63 | 64 | If you **make changes** to the notebooks, **make sure to download** them, since Binder **does not keep the changes** once you close it. 65 | 66 | You can start your environment on the cloud right now using the button below: 67 | 68 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/dvgodoy/PyTorchStepByStep/master) 69 | 70 | ### Local Installation 71 | 72 | This option will give you more **flexibility**, but it will require **more effort to set up**. I encourage you to try setting up your own environment. It may seem daunting at first, but you can surely accomplish it following **seven easy steps**: 73 | 74 | **1 - Anaconda** 75 | 76 | If you don’t have [**Anaconda’s Individual Edition**](https://www.anaconda.com/products/individual) installed yet, that would be a good time to do it - it is a very handy way to start - since it contains most of the Python libraries a data scientist will ever need to develop and train models. 77 | 78 | Please follow **the installation instructions** for your OS: 79 | 80 | - [Windows](https://docs.anaconda.com/anaconda/install/windows/) 81 | - [macOS](https://docs.anaconda.com/anaconda/install/mac-os/) 82 | - [Linux](https://docs.anaconda.com/anaconda/install/linux/) 83 | 84 | Make sure you choose **Python 3.X** version since Python 2 was discontinued in January 2020. 85 | 86 | **2 - Conda (Virtual) Environments** 87 | 88 | Virtual environments are a convenient way to isolate Python installations associated with different projects. 89 | 90 | First, you need to choose a **name** for your environment :-) Let’s call ours `pytorchbook` (or anything else you find easier to remember). Then, you need to open a **terminal** (in Ubuntu) or **Anaconda Prompt** (in Windows or macOS) and type the following command: 91 | 92 | `conda create -n pytorchbook anaconda` 93 | 94 | The command above creates a conda environment named `pytorchbook` and includes **all anaconda packages** in it (time to get a coffee, it will take a while...). If you want to learn more about creating and using conda environments, please check Anaconda’s [**Managing Environments**](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) user guide. 95 | 96 | Did it finish creating the environment? Good! It is time to **activate it**, meaning, making that Python installation the one to be used now. In the same terminal (or Anaconda Prompt), just type: 97 | 98 | `conda activate pytorchbook` 99 | 100 | Your prompt should look like this (if you’re using Linux)... 101 | 102 | `(pytorchbook)$` 103 | 104 | or like this (if you’re using Windows): 105 | 106 | `(pytorchbook)C:\>` 107 | 108 | Done! You are using a **brand new conda environment** now. You’ll need to **activate it** every time you open a new terminal or, if you’re a Windows or macOS user, you can open the corresponding Anaconda Prompt (it will show up as **Anaconda Prompt (pytorchbook)**, in our case), which will have it activated from start. 109 | 110 | **IMPORTANT**: From now on, I am assuming you’ll activate the `pytorchbook` environment every time you open a terminal / Anaconda Prompt. Further installation steps **must** be executed inside the environment. 111 | 112 | **3 - PyTorch** 113 | 114 | It is time to install the star of the show :-) We can go straight to the [**Start Locally**](https://pytorch.org/get-started/locally/) section of its website and it will automatically select the options that best suit your local environment and it will show you the command to run. 115 | 116 | Your choices should look like: 117 | 118 | - PyTorch Build: "Stable" 119 | - Your OS: your operating system 120 | - Package: "Conda" 121 | - Language: "Python" 122 | - CUDA: "None" if you **don't** have a **GPU**, or the latest version (e.g. "10.1"), if you **have** a **GPU**. 123 | 124 | The installation command will be shown right below your choices, so you can copy it. If you have a **Windows** computer and **no GPU**, you'd have to run the following command in your **Anaconda Prompt (pytorchbook)**: 125 | 126 | `(pytorchbook) C:\> conda install pytorch torchvision cpuonly -c pytorch` 127 | 128 | **4 - TensorBoard** 129 | 130 | TensorBoard is a powerful tool and we can use it even if we are developing models in PyTorch. Luckily, you don’t need to install the whole TensorFlow to get it, you can easily **install TensorBoard alone** using **conda**. You just need to run this command in your **terminal** or **Anaconda Prompt** (again, after activating the environment): 131 | 132 | `(pytorchbook)C:\> conda install -c conda-forge tensorboard` 133 | 134 | **5 - GraphViz and TorchViz (optional)** 135 | 136 | > This step is optional, mostly because the installation of GraphViz can be challenging sometimes (especially on Windows). If, for any reason, you do not succeed in installing it correctly, or if you 137 | > decide to skip this installation step, you will still be **able to execute the code in this book** (except for a couple of cells that generate images of a model’s structure in the Dynamic Computation Graph section of Chapter 1). 138 | 139 | We need to install GraphViz to be able to use **TorchViz**, a neat package that allows us to visualize a model’s structure. Please check the [**installation instructions**]( https://www.graphviz.org/download/) for your OS. 140 | 141 | > If you are using **Windows**, please use the installer at [GraphViz's Windows Package](https://graphviz.gitlab.io/_pages/Download/windows/graphviz-2.38.msi). You also need to add GraphViz to the PATH (environment variable) in Windows. Most likely, you can find GraphViz executable file at `C:\ProgramFiles(x86)\Graphviz2.38\bin`. Once you found it, you need to set or change the PATH accordingly, adding GraphViz's location to it. For more details on how to do that, please refer to [How to Add to Windows PATH Environment Variable](https://bit.ly/3fIwYA5). 142 | 143 | For additional information, you can also check the [How to Install Graphviz Software](https://bit.ly/30Ayct3) guide. 144 | 145 | If you installed GraphViz successfully, you can install the [torchviz](https://github.com/szagoruyko/pytorchviz) package. This package is not part of Anaconda Distribution Repository and is only available at PyPI , the Python Package Index, so we need to pip install it. 146 | 147 | Once again, open a **terminal** or **Anaconda Prompt** and run this command (just once 148 | more: after activating the environment): 149 | 150 | `(pytorchbook)C:\> pip install torchviz` 151 | 152 | **6 - Git** 153 | 154 | It is way beyond the scope of this guide to introduce you to version control and its most popular tool: `git`. If you are familiar with it already, great, you can skip this section altogether! 155 | 156 | Otherwise, I’d recommend you to learn more about it, it will **definitely** be useful for you later down the line. In the meantime, I will show you the bare minimum, so you can use `git` to **clone this repository** containing all code used in this book - so you have your own, local copy of it and can modify and experiment with it as you please. 157 | 158 | First, you need to install it. So, head to its [downloads](https://git-scm.com/downloads) page and follow instructions for your OS. Once installation is complete, please open a **new terminal** or **Anaconda Prompt** (it's OK to close the previous one). In the new terminal or Anaconda Prompt, you should be able to **run `git` commands**. To clone this repository, you only need to run: 159 | 160 | `(pytorchbook)C:\> git clone https://github.com/dvgodoy/PyTorchStepByStep.git` 161 | 162 | The command above will create a `PyTorchStepByStep` folder which contains a **local copy** of everything available on this GitHub’s repository. 163 | 164 | **7 - Jupyter** 165 | 166 | After cloning the repository, navigate to the `PyTorchStepByStep` and, **once inside it**, you only need to **start Jupyter** on your terminal or Anaconda Prompt: 167 | 168 | `(pytorchbook)C:\> jupyter notebook` 169 | 170 | This will open your browser up and you will see **Jupyter's Home Page** containing this repository's notebooks and code. 171 | 172 | Congratulations! You are ready to go through the chapters' notebooks! 173 | 174 | -------------------------------------------------------------------------------- /apt.txt: -------------------------------------------------------------------------------- 1 | graphviz 2 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import errno 4 | import requests 5 | import subprocess 6 | import shutil 7 | from IPython.display import HTML, display 8 | from tensorboard import manager 9 | 10 | def tensorboard_cleanup(): 11 | info_dir = manager._get_info_dir() 12 | shutil.rmtree(info_dir) 13 | 14 | FOLDERS = { 15 | 0: ['plots'], 16 | 1: ['plots'], 17 | 2: ['plots', 'data_generation', 'data_preparation', 'model_configuration', 'model_training'], 18 | 21: ['plots', 'data_generation', 'data_preparation', 'model_configuration', 'stepbystep'], 19 | 3: ['plots', 'stepbystep'], 20 | 4: ['plots', 'stepbystep', 'data_generation'], 21 | 5: ['plots', 'stepbystep', 'data_generation', ''], 22 | 6: ['plots', 'stepbystep', 'stepbystep', 'data_generation', 'data_generation', 'data_preparation'], 23 | 7: ['plots', 'stepbystep', 'data_generation'], 24 | 71: ['plots', 'stepbystep', 'data_generation'], 25 | 8: ['plots', 'plots', 'stepbystep', 'data_generation'], 26 | 9: ['plots', 'plots', 'plots', 'stepbystep', 'data_generation'], 27 | 10: ['plots', 'plots', 'plots', 'plots', 'stepbystep', 'data_generation', 'data_generation', '', ''], 28 | 11: ['plots', 'stepbystep', 'data_generation', ''], 29 | } 30 | FILENAMES = { 31 | 0: ['chapter0.py'], 32 | 1: ['chapter1.py'], 33 | 2: ['chapter2.py', 'simple_linear_regression.py', 'v0.py', 'v0.py', 'v0.py'], 34 | 21: ['chapter2_1.py', 'simple_linear_regression.py', 'v2.py', '', 'v0.py'], 35 | 3: ['chapter3.py', 'v0.py'], 36 | 4: ['chapter4.py', 'v0.py', 'image_classification.py'], 37 | 5: ['chapter5.py', 'v1.py', 'image_classification.py', 'helpers.py'], 38 | 6: ['chapter6.py', 'v2.py', 'v3.py', 'rps.py', 'simple_linear_regression.py', 'v2.py'], 39 | 7: ['chapter7.py', 'v3.py', 'rps.py'], 40 | 71: ['chapterextra.py', 'v3.py', 'ball.py'], 41 | 8: ['chapter8.py', 'replay.py', 'v4.py', 'square_sequences.py'], 42 | 9: ['chapter8.py', 'chapter9.py', 'replay.py', 'v4.py', 'square_sequences.py'], 43 | 10: ['chapter8.py', 'chapter9.py', 'chapter10.py', 'replay.py', 'v4.py', 'square_sequences.py', 'image_classification.py', 'helpers.py', 'seq2seq.py'], 44 | 11: ['chapter11.py', 'v4.py', 'nlp.py', 'seq2seq.py'], 45 | } 46 | 47 | try: 48 | host = os.environ['BINDER_SERVICE_HOST'] 49 | IS_BINDER = True 50 | except KeyError: 51 | IS_BINDER = False 52 | 53 | try: 54 | import google.colab 55 | IS_COLAB = True 56 | except ModuleNotFoundError: 57 | IS_COLAB = False 58 | 59 | IS_LOCAL = (not IS_BINDER) and (not IS_COLAB) 60 | 61 | def download_to_colab(chapter, branch='master'): 62 | base_url = 'https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/{}/'.format(branch) 63 | 64 | folders = FOLDERS[chapter] 65 | filenames = FILENAMES[chapter] 66 | for folder, filename in zip(folders, filenames): 67 | if len(folder): 68 | try: 69 | os.mkdir(folder) 70 | except OSError as e: 71 | if e.errno != errno.EEXIST: 72 | raise 73 | 74 | if len(filename): 75 | path = os.path.join(folder, filename) 76 | url = '{}{}'.format(base_url, path) 77 | r = requests.get(url, allow_redirects=True) 78 | open(path, 'wb').write(r.content) 79 | 80 | try: 81 | os.mkdir('runs') 82 | except OSError as e: 83 | if e.errno != errno.EEXIST: 84 | raise 85 | 86 | TB_LINK = '' 87 | if IS_BINDER: 88 | TB_LINK = HTML(''' 89 | Click here to open TensorBoard 90 | 95 | ''') 96 | 97 | def config_chapter0(branch='master'): 98 | if IS_COLAB: 99 | print('Downloading files from GitHub repo to Colab...') 100 | download_to_colab(0, branch) 101 | print('Finished!') 102 | 103 | def config_chapter1(branch='master'): 104 | if IS_COLAB: 105 | print('Installing torchviz...') 106 | subprocess.run([sys.executable, '-m', 'pip', 'install', 'torchviz']) 107 | print('Downloading files from GitHub repo to Colab...') 108 | download_to_colab(1, branch) 109 | print('Creating folders...') 110 | folders = ['data_preparation', 'model_configuration', 'model_training'] 111 | 112 | for folder in folders: 113 | try: 114 | os.mkdir(folder) 115 | except OSError as e: 116 | e.errno 117 | if e.errno != errno.EEXIST: 118 | raise 119 | print('Finished!') 120 | 121 | def config_chapter2(branch='master'): 122 | if IS_COLAB: 123 | print('Downloading files from GitHub repo to Colab...') 124 | download_to_colab(2, branch) 125 | print('Finished!') 126 | 127 | def config_chapter2_1(branch='master'): 128 | if IS_COLAB: 129 | print('Downloading files from GitHub repo to Colab...') 130 | download_to_colab(21, branch) 131 | print('Finished!') 132 | 133 | def config_chapter3(branch='master'): 134 | if IS_COLAB: 135 | print('Downloading files from GitHub repo to Colab...') 136 | download_to_colab(3, branch) 137 | print('Finished!') 138 | 139 | def config_chapter4(branch='master'): 140 | if IS_COLAB: 141 | print('Downloading files from GitHub repo to Colab...') 142 | download_to_colab(4, branch) 143 | print('Finished!') 144 | 145 | def config_chapter5(branch='master'): 146 | if IS_COLAB: 147 | print('Downloading files from GitHub repo to Colab...') 148 | download_to_colab(5, branch) 149 | print('Finished!') 150 | 151 | def config_chapter6(branch='master'): 152 | if IS_COLAB: 153 | print('Downloading files from GitHub repo to Colab...') 154 | download_to_colab(6, branch) 155 | print('Finished!') 156 | 157 | def config_chapter7(branch='master'): 158 | if IS_COLAB: 159 | print('Downloading files from GitHub repo to Colab...') 160 | download_to_colab(7, branch) 161 | print('Finished!') 162 | 163 | def config_chapterextra(branch='master'): 164 | if IS_COLAB: 165 | print('Downloading files from GitHub repo to Colab...') 166 | download_to_colab(71, branch) 167 | print('Finished!') 168 | 169 | def config_chapter8(branch='master'): 170 | if IS_COLAB: 171 | print('Downloading files from GitHub repo to Colab...') 172 | download_to_colab(8, branch) 173 | print('Finished!') 174 | 175 | def config_chapter9(branch='master'): 176 | if IS_COLAB: 177 | print('Downloading files from GitHub repo to Colab...') 178 | download_to_colab(9, branch) 179 | print('Finished!') 180 | 181 | def config_chapter10(branch='master'): 182 | if IS_COLAB: 183 | print('Downloading files from GitHub repo to Colab...') 184 | download_to_colab(10, branch) 185 | print('Finished!') 186 | 187 | def config_chapter11(branch='master'): 188 | if IS_COLAB: 189 | print('Downloading files from GitHub repo to Colab...') 190 | download_to_colab(11, branch) 191 | print('Finished!') 192 | -------------------------------------------------------------------------------- /data_generation/ball.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def load_data(n_dims=10, n_points=1000, classif_radius_fraction=0.5, only_sphere=False, shuffle=True, seed=13): 4 | """ 5 | Parameters 6 | ---------- 7 | n_dims: int, optional 8 | Number of dimensions of the n-ball. Default is 10. 9 | n_points: int, optional 10 | Number of points in each parabola. Default is 1,000. 11 | classif_radius_fraction: float, optional 12 | Points farther away from the center than 13 | `classification_radius_fraction * ball radius` are 14 | considered to be positive cases. The remaining 15 | points are the negative cases. 16 | only_sphere: boolean 17 | If True, generates a n-sphere, that is, a hollow n-ball. 18 | Default is False. 19 | shuffle: boolean, optional 20 | If True, the points are shuffled. Default is True. 21 | seed: int, optional 22 | Random seed. Default is 13. 23 | Returns 24 | ------- 25 | X, y: tuple of ndarray 26 | X is an array of shape (n_points, n_dims) containing the 27 | points in the n-ball. 28 | y is an array of shape (n_points, 1) containing the 29 | classes of the samples. 30 | """ 31 | np.random.seed(seed) 32 | radius = np.sqrt(n_dims) 33 | points = np.random.normal(size=(n_points, n_dims)) 34 | sphere = radius * points / np.linalg.norm(points, axis=1).reshape(-1, 1) 35 | if only_sphere: 36 | X = sphere 37 | else: 38 | X = sphere * np.random.uniform(size=(n_points, 1))**(1 / n_dims) 39 | 40 | adjustment = 1 / np.std(X) 41 | radius *= adjustment 42 | X *= adjustment 43 | 44 | y = (np.abs(np.sum(X, axis=1)) > (radius * classif_radius_fraction)).astype(int) 45 | 46 | # But we must not feed the network with neatly organized inputs... 47 | # so let's randomize them 48 | if shuffle: 49 | np.random.seed(seed) 50 | shuffled = np.random.permutation(range(X.shape[0])) 51 | X = X[shuffled] 52 | y = y[shuffled].reshape(-1, 1) 53 | 54 | return (X, y) 55 | -------------------------------------------------------------------------------- /data_generation/image_classification.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def gen_img(start, target, fill=1, img_size=10): 5 | # Generates empty image 6 | img = np.zeros((img_size, img_size), dtype=float) 7 | 8 | start_row, start_col = None, None 9 | 10 | if start > 0: 11 | start_row = start 12 | else: 13 | start_col = np.abs(start) 14 | 15 | if target == 0: 16 | if start_row is None: 17 | img[:, start_col] = fill 18 | else: 19 | img[start_row, :] = fill 20 | else: 21 | if start_col == 0: 22 | start_col = 1 23 | 24 | if target == 1: 25 | if start_row is not None: 26 | up = (range(start_row, -1, -1), 27 | range(0, start_row + 1)) 28 | else: 29 | up = (range(img_size - 1, start_col - 1, -1), 30 | range(start_col, img_size)) 31 | img[up] = fill 32 | else: 33 | if start_row is not None: 34 | down = (range(start_row, img_size, 1), 35 | range(0, img_size - start_row)) 36 | else: 37 | down = (range(0, img_size - 1 - start_col + 1), 38 | range(start_col, img_size)) 39 | img[down] = fill 40 | 41 | return 255 * img.reshape(1, img_size, img_size) 42 | 43 | 44 | def generate_dataset(img_size=10, n_images=100, binary=True, seed=17): 45 | np.random.seed(seed) 46 | 47 | starts = np.random.randint(-(img_size - 1), img_size, size=(n_images,)) 48 | targets = np.random.randint(0, 3, size=(n_images,)) 49 | 50 | images = np.array([gen_img(s, t, img_size=img_size) 51 | for s, t in zip(starts, targets)], dtype=np.uint8) 52 | 53 | if binary: 54 | targets = (targets > 0).astype(int) 55 | 56 | return images, targets 57 | -------------------------------------------------------------------------------- /data_generation/nlp.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import zipfile 3 | import os 4 | import errno 5 | import nltk 6 | from nltk.tokenize import sent_tokenize 7 | 8 | #ALICE_URL = 'https://ota.bodleian.ox.ac.uk/repository/xmlui/bitstream/handle/20.500.12024/1476/alice28-1476.txt' 9 | #WIZARD_URL = 'https://ota.bodleian.ox.ac.uk/repository/xmlui/bitstream/handle/20.500.12024/1740/wizoz10-1740.txt' 10 | ALICE_URL = 'https://llds.ling-phil.ox.ac.uk/llds/xmlui/bitstream/handle/20.500.14106/1476/alice28-1476.txt' 11 | WIZARD_URL = 'https://llds.ling-phil.ox.ac.uk/llds/xmlui/bitstream/handle/20.500.14106/1740/wizoz10-1740.txt' 12 | 13 | def download_text(url, localfolder='texts'): 14 | localfile = os.path.split(url)[-1] 15 | try: 16 | os.mkdir(f'{localfolder}') 17 | except OSError as e: 18 | if e.errno != errno.EEXIST: 19 | raise 20 | try: 21 | r = requests.get(url, allow_redirects=True) 22 | open(os.path.join(localfolder, localfile), 'wb').write(r.content) 23 | except Exception as e: 24 | print(f'Error downloading file: {str(e)}') 25 | 26 | def sentence_tokenize(source, quote_char='\\', sep_char=',', 27 | include_header=True, include_source=True, 28 | extensions=('txt'), **kwargs): 29 | nltk.download('punkt') 30 | # If source is a folder, goes through all files inside it 31 | # that match the desired extensions ('txt' by default) 32 | if os.path.isdir(source): 33 | filenames = [f for f in os.listdir(source) 34 | if os.path.isfile(os.path.join(source, f)) and 35 | os.path.splitext(f)[1][1:] in extensions] 36 | elif isinstance(source, str): 37 | filenames = [source] 38 | 39 | # If there is a configuration file, builds a dictionary with 40 | # the corresponding start and end lines of each text file 41 | config_file = os.path.join(source, 'lines.cfg') 42 | config = {} 43 | if os.path.exists(config_file): 44 | with open(config_file, 'r') as f: 45 | rows = f.readlines() 46 | 47 | for r in rows[1:]: 48 | fname, start, end = r.strip().split(',') 49 | config.update({fname: (int(start), int(end))}) 50 | 51 | new_fnames = [] 52 | # For each file of text 53 | for fname in filenames: 54 | # If there's a start and end line for that file, use it 55 | try: 56 | start, end = config[fname] 57 | except KeyError: 58 | start = None 59 | end = None 60 | 61 | # Opens the file, slices the configures lines (if any) 62 | # cleans line breaks and uses the sentence tokenizer 63 | with open(os.path.join(source, fname), 'r') as f: 64 | contents = (''.join(f.readlines()[slice(start, end, None)]) 65 | .replace('\n', ' ').replace('\r', '')) 66 | corpus = sent_tokenize(contents, **kwargs) 67 | 68 | # Builds a CSV file containing tokenized sentences 69 | base = os.path.splitext(fname)[0] 70 | new_fname = f'{base}.sent.csv' 71 | new_fname = os.path.join(source, new_fname) 72 | with open(new_fname, 'w') as f: 73 | # Header of the file 74 | if include_header: 75 | if include_source: 76 | f.write('sentence,source\n') 77 | else: 78 | f.write('sentence\n') 79 | # Writes one line for each sentence 80 | for sentence in corpus: 81 | if include_source: 82 | f.write(f'{quote_char}{sentence}{quote_char}{sep_char}{fname}\n') 83 | else: 84 | f.write(f'{quote_char}{sentence}{quote_char}\n') 85 | new_fnames.append(new_fname) 86 | 87 | # Returns list of the newly generated CSV files 88 | return sorted(new_fnames) 89 | -------------------------------------------------------------------------------- /data_generation/rps.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import zipfile 3 | import os 4 | import errno 5 | 6 | def download_rps(localfolder=''): 7 | filenames = ['rps.zip', 'rps-test-set.zip'] 8 | for filename in filenames: 9 | try: 10 | os.mkdir(f'{localfolder}{filename[:-4]}') 11 | 12 | localfile = f'{localfolder}{filename}' 13 | # url = 'https://storage.googleapis.com/laurencemoroney-blog.appspot.com/{}' 14 | # Updated from TFDS URL at 15 | # https://github.com/tensorflow/datasets/blob/master/tensorflow_datasets/datasets/rock_paper_scissors/rock_paper_scissors_dataset_builder.py 16 | url = 'https://storage.googleapis.com/download.tensorflow.org/data/{}' 17 | r = requests.get(url.format(filename), allow_redirects=True) 18 | open(localfile, 'wb').write(r.content) 19 | with zipfile.ZipFile(localfile, 'r') as zip_ref: 20 | zip_ref.extractall(localfolder) 21 | except OSError as e: 22 | if e.errno != errno.EEXIST: 23 | raise 24 | else: 25 | print(f'{filename[:-4]} folder already exists!') 26 | -------------------------------------------------------------------------------- /data_generation/simple_linear_regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | true_b = 1 4 | true_w = 2 5 | N = 100 6 | 7 | # Data Generation 8 | np.random.seed(42) 9 | x = np.random.rand(N, 1) 10 | y = true_b + true_w * x + (.1 * np.random.randn(N, 1)) 11 | 12 | # Shuffles the indices 13 | idx = np.arange(N) 14 | np.random.shuffle(idx) 15 | 16 | # Uses first 80 random indices for train 17 | train_idx = idx[:int(N*.8)] 18 | # Uses the remaining indices for validation 19 | val_idx = idx[int(N*.8):] 20 | 21 | # Generates train and validation sets 22 | x_train, y_train = x[train_idx], y[train_idx] 23 | x_val, y_val = x[val_idx], y[val_idx] 24 | -------------------------------------------------------------------------------- /data_generation/square_sequences.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def generate_sequences(n=128, variable_len=False, seed=13): 4 | basic_corners = np.array([[-1, -1], [-1, 1], [1, 1], [1, -1]]) 5 | np.random.seed(seed) 6 | bases = np.random.randint(4, size=n) 7 | if variable_len: 8 | lengths = np.random.randint(3, size=n) + 2 9 | else: 10 | lengths = [4] * n 11 | directions = np.random.randint(2, size=n) 12 | points = [basic_corners[[(b + i) % 4 for i in range(4)]][slice(None, None, d*2-1)][:l] + np.random.randn(l, 2) * 0.1 for b, d, l in zip(bases, directions, lengths)] 13 | return points, directions -------------------------------------------------------------------------------- /data_preparation/v0.py: -------------------------------------------------------------------------------- 1 | 2 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 3 | 4 | # Our data was in Numpy arrays, but we need to transform them 5 | # into PyTorch's Tensors and then we send them to the 6 | # chosen device 7 | x_train_tensor = torch.as_tensor(x_train).float().to(device) 8 | y_train_tensor = torch.as_tensor(y_train).float().to(device) 9 | -------------------------------------------------------------------------------- /data_preparation/v1.py: -------------------------------------------------------------------------------- 1 | 2 | # Our data was in Numpy arrays, but we need to transform them into PyTorch's Tensors 3 | x_train_tensor = torch.from_numpy(x_train).float() 4 | y_train_tensor = torch.from_numpy(y_train).float() 5 | 6 | # Builds Dataset 7 | train_data = TensorDataset(x_train_tensor, y_train_tensor) 8 | 9 | # Builds DataLoader 10 | train_loader = DataLoader(dataset=train_data, batch_size=16, shuffle=True) 11 | -------------------------------------------------------------------------------- /data_preparation/v2.py: -------------------------------------------------------------------------------- 1 | 2 | torch.manual_seed(13) 3 | 4 | # Builds tensors from numpy arrays BEFORE split 5 | x_tensor = torch.as_tensor(x).float() 6 | y_tensor = torch.as_tensor(y).float() 7 | 8 | # Builds dataset containing ALL data points 9 | dataset = TensorDataset(x_tensor, y_tensor) 10 | 11 | # Performs the split 12 | ratio = .8 13 | n_total = len(dataset) 14 | n_train = int(n_total * ratio) 15 | n_val = n_total - n_train 16 | 17 | train_data, val_data = random_split(dataset, [n_train, n_val]) 18 | 19 | # Builds a loader of each set 20 | train_loader = DataLoader(dataset=train_data, batch_size=16, shuffle=True) 21 | val_loader = DataLoader(dataset=val_data, batch_size=16) 22 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: pytorchbook 2 | channels: 3 | - conda-forge 4 | - pytorch 5 | - dglteam 6 | dependencies: 7 | - python=3.10 8 | - pip 9 | - numpy 10 | - matplotlib 11 | - scikit-learn 12 | - jupyter 13 | - jupyterlab 14 | - pytorch>=2.2.1 15 | - torchvision>=0.17.1 16 | - torchtext 17 | - opencv 18 | - librosa 19 | - nb_conda_kernels 20 | - pip: 21 | - torchviz 22 | - tensorboard 23 | - jupyter-server-proxy 24 | -------------------------------------------------------------------------------- /helpers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.utils.data import random_split, WeightedRandomSampler 4 | 5 | def make_train_step_fn(model, loss_fn, optimizer): 6 | # Builds function that performs a step in the train loop 7 | def perform_train_step_fn(x, y): 8 | # Sets model to TRAIN mode 9 | model.train() 10 | 11 | # Step 1 - Computes our model's predicted output - forward pass 12 | yhat = model(x) 13 | # Step 2 - Computes the loss 14 | loss = loss_fn(yhat, y) 15 | # Step 3 - Computes gradients for both "a" and "b" parameters 16 | loss.backward() 17 | # Step 4 - Updates parameters using gradients and the learning rate 18 | optimizer.step() 19 | optimizer.zero_grad() 20 | 21 | # Returns the loss 22 | return loss.item() 23 | 24 | # Returns the function that will be called inside the train loop 25 | return perform_train_step_fn 26 | 27 | def mini_batch(device, data_loader, step_fn): 28 | mini_batch_losses = [] 29 | for x_batch, y_batch in data_loader: 30 | x_batch = x_batch.to(device) 31 | y_batch = y_batch.to(device) 32 | 33 | mini_batch_loss = step_fn(x_batch, y_batch) 34 | mini_batch_losses.append(mini_batch_loss) 35 | 36 | loss = np.mean(mini_batch_losses) 37 | return loss 38 | 39 | def make_val_step_fn(model, loss_fn): 40 | # Builds function that performs a step in the validation loop 41 | def perform_val_step_fn(x, y): 42 | # Sets model to EVAL mode 43 | model.eval() 44 | 45 | # Step 1 - Computes our model's predicted output - forward pass 46 | yhat = model(x) 47 | # Step 2 - Computes the loss 48 | loss = loss_fn(yhat, y) 49 | # There is no need to compute Steps 3 and 4, since we don't update parameters during evaluation 50 | return loss.item() 51 | 52 | return perform_val_step_fn 53 | 54 | def index_splitter(n, splits, seed=13): 55 | idx = torch.arange(n) 56 | # Makes the split argument a tensor 57 | splits_tensor = torch.as_tensor(splits) 58 | total = splits_tensor.sum().float() 59 | # If the total does not add up to one 60 | # divide every number by the total 61 | if not total.isclose(torch.ones(1)[0]): 62 | splits_tensor = splits_tensor / total 63 | # Uses PyTorch random_split to split the indices 64 | torch.manual_seed(seed) 65 | return random_split(idx, splits_tensor) 66 | 67 | # def index_splitter(n, splits, seed=13): 68 | # idx = torch.arange(n) 69 | # # Makes the split argument a tensor 70 | # splits_tensor = torch.as_tensor(splits) 71 | # # Finds the correct multiplier, so we don't have 72 | # # to worry about summing up to N (or one) 73 | # multiplier = n / splits_tensor.sum() 74 | # splits_tensor = (multiplier * splits_tensor).long() 75 | # # If there is a difference, throws at the first split 76 | # # so random_split does not complain 77 | # diff = n - splits_tensor.sum() 78 | # splits_tensor[0] += diff 79 | # # Uses PyTorch random_split to split the indices 80 | # torch.manual_seed(seed) 81 | # return random_split(idx, splits_tensor) 82 | 83 | def make_balanced_sampler(y): 84 | # Computes weights for compensating imbalanced classes 85 | classes, counts = y.unique(return_counts=True) 86 | weights = 1.0 / counts.float() 87 | sample_weights = weights[y.squeeze().long()] 88 | # Builds sampler with compute weights 89 | generator = torch.Generator() 90 | sampler = WeightedRandomSampler( 91 | weights=sample_weights, 92 | num_samples=len(sample_weights), 93 | generator=generator, 94 | replacement=True 95 | ) 96 | return sampler 97 | 98 | def freeze_model(model): 99 | for parameter in model.parameters(): 100 | parameter.requires_grad = False 101 | 102 | def preprocessed_dataset(model, loader, device=None): 103 | if device is None: 104 | device = next(model.parameters()).device 105 | 106 | features = None 107 | labels = None 108 | 109 | for i, (x, y) in enumerate(loader): 110 | model.eval() 111 | x = x.to(device) 112 | output = model(x) 113 | if i == 0: 114 | features = output.detach().cpu() 115 | labels = y.cpu() 116 | else: 117 | features = torch.cat([features, output.detach().cpu()]) 118 | labels = torch.cat([labels, y.cpu()]) 119 | 120 | dataset = TensorDataset(features, labels) 121 | return dataset 122 | 123 | def inception_loss(outputs, labels): 124 | try: 125 | main, aux = outputs 126 | except ValueError: 127 | main = outputs 128 | aux = None 129 | loss_aux = 0 130 | 131 | multi_loss_fn = nn.CrossEntropyLoss(reduction='mean') 132 | loss_main = multi_loss_fn(main, labels) 133 | if aux is not None: 134 | loss_aux = multi_loss_fn(aux, labels) 135 | return loss_main + 0.4 * loss_aux 136 | 137 | -------------------------------------------------------------------------------- /images/1conv1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/1conv1.png -------------------------------------------------------------------------------- /images/1conv2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/1conv2.png -------------------------------------------------------------------------------- /images/3channel1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/3channel1.png -------------------------------------------------------------------------------- /images/3channel2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/3channel2.png -------------------------------------------------------------------------------- /images/3channel_filters1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/3channel_filters1.png -------------------------------------------------------------------------------- /images/aiayn_dot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/aiayn_dot.png -------------------------------------------------------------------------------- /images/aiayn_multihead.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/aiayn_multihead.png -------------------------------------------------------------------------------- /images/alexnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/alexnet.png -------------------------------------------------------------------------------- /images/alice_dorothy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/alice_dorothy.png -------------------------------------------------------------------------------- /images/architecture_lenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/architecture_lenet.png -------------------------------------------------------------------------------- /images/attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/attention.png -------------------------------------------------------------------------------- /images/attn_narrow_2heads.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/attn_narrow_2heads.png -------------------------------------------------------------------------------- /images/attn_narrow_first_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/attn_narrow_first_head.png -------------------------------------------------------------------------------- /images/attn_narrow_transf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/attn_narrow_transf.png -------------------------------------------------------------------------------- /images/basic_attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/basic_attention.png -------------------------------------------------------------------------------- /images/bert_embeddings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bert_embeddings.png -------------------------------------------------------------------------------- /images/bert_input_embed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bert_input_embed.png -------------------------------------------------------------------------------- /images/bert_mlm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bert_mlm.png -------------------------------------------------------------------------------- /images/bert_nsp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bert_nsp.png -------------------------------------------------------------------------------- /images/bidirect_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bidirect_rnn.png -------------------------------------------------------------------------------- /images/blank1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/blank1.png -------------------------------------------------------------------------------- /images/blank2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/blank2.png -------------------------------------------------------------------------------- /images/blank_center.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/blank_center.png -------------------------------------------------------------------------------- /images/blank_end.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/blank_end.png -------------------------------------------------------------------------------- /images/block_tokens.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/block_tokens.png -------------------------------------------------------------------------------- /images/book10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/book10.png -------------------------------------------------------------------------------- /images/book9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/book9.png -------------------------------------------------------------------------------- /images/bow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/bow.png -------------------------------------------------------------------------------- /images/cbow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/cbow.png -------------------------------------------------------------------------------- /images/classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/classification.png -------------------------------------------------------------------------------- /images/classification_equiv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/classification_equiv.png -------------------------------------------------------------------------------- /images/classification_relu2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/classification_relu2.png -------------------------------------------------------------------------------- /images/classification_softmax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/classification_softmax.png -------------------------------------------------------------------------------- /images/cls_hidden_state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/cls_hidden_state.png -------------------------------------------------------------------------------- /images/context_translate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/context_translate.png -------------------------------------------------------------------------------- /images/context_vector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/context_vector.png -------------------------------------------------------------------------------- /images/conv1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv1.png -------------------------------------------------------------------------------- /images/conv1_ma.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv1_ma.png -------------------------------------------------------------------------------- /images/conv1d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv1d.png -------------------------------------------------------------------------------- /images/conv1d_dilated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv1d_dilated.png -------------------------------------------------------------------------------- /images/conv1d_edges.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv1d_edges.png -------------------------------------------------------------------------------- /images/conv2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv2.png -------------------------------------------------------------------------------- /images/conv3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv3.png -------------------------------------------------------------------------------- /images/conv5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv5.png -------------------------------------------------------------------------------- /images/conv6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv6.png -------------------------------------------------------------------------------- /images/conv7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv7.png -------------------------------------------------------------------------------- /images/conv8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/conv8.png -------------------------------------------------------------------------------- /images/cross_attn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/cross_attn.png -------------------------------------------------------------------------------- /images/dec_both.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/dec_both.png -------------------------------------------------------------------------------- /images/decoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/decoder.png -------------------------------------------------------------------------------- /images/decoder_self.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/decoder_self.png -------------------------------------------------------------------------------- /images/decoder_self_simplified.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/decoder_self_simplified.png -------------------------------------------------------------------------------- /images/dropout_paper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/dropout_paper.png -------------------------------------------------------------------------------- /images/elmo_embed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/elmo_embed.png -------------------------------------------------------------------------------- /images/elmo_lstm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/elmo_lstm.png -------------------------------------------------------------------------------- /images/embed_arithmetic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/embed_arithmetic.png -------------------------------------------------------------------------------- /images/enc_both.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/enc_both.png -------------------------------------------------------------------------------- /images/enc_dec_attn_translate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/enc_dec_attn_translate.png -------------------------------------------------------------------------------- /images/encdec_attn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encdec_attn.png -------------------------------------------------------------------------------- /images/encdec_self_simplified.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encdec_self_simplified.png -------------------------------------------------------------------------------- /images/encoded_distances.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoded_distances.png -------------------------------------------------------------------------------- /images/encoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder.png -------------------------------------------------------------------------------- /images/encoder_decoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder_decoder.png -------------------------------------------------------------------------------- /images/encoder_lost_seq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder_lost_seq.png -------------------------------------------------------------------------------- /images/encoder_self.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder_self.png -------------------------------------------------------------------------------- /images/encoder_self_detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder_self_detail.png -------------------------------------------------------------------------------- /images/encoder_self_simplified.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/encoder_self_simplified.png -------------------------------------------------------------------------------- /images/fill1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/fill1.png -------------------------------------------------------------------------------- /images/fill2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/fill2.png -------------------------------------------------------------------------------- /images/full_transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/full_transformer.png -------------------------------------------------------------------------------- /images/full_transformer_and_class.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/full_transformer_and_class.png -------------------------------------------------------------------------------- /images/gru_cell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/gru_cell.png -------------------------------------------------------------------------------- /images/inception_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/inception_model.png -------------------------------------------------------------------------------- /images/inception_modules.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/inception_modules.png -------------------------------------------------------------------------------- /images/kq_matches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/kq_matches.png -------------------------------------------------------------------------------- /images/layer_vs_batch_norm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/layer_vs_batch_norm.png -------------------------------------------------------------------------------- /images/logistic_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/logistic_model.png -------------------------------------------------------------------------------- /images/lstm_cell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/lstm_cell.png -------------------------------------------------------------------------------- /images/multiattn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/multiattn.png -------------------------------------------------------------------------------- /images/multihead_chunking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/multihead_chunking.png -------------------------------------------------------------------------------- /images/new_books.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/new_books.png -------------------------------------------------------------------------------- /images/ngrams.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/ngrams.png -------------------------------------------------------------------------------- /images/norm_first.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/norm_first.png -------------------------------------------------------------------------------- /images/ohe1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/ohe1.png -------------------------------------------------------------------------------- /images/ohe2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/ohe2.png -------------------------------------------------------------------------------- /images/ohe3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/ohe3.png -------------------------------------------------------------------------------- /images/packed_seq_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/packed_seq_data.png -------------------------------------------------------------------------------- /images/packed_seq_inddata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/packed_seq_inddata.png -------------------------------------------------------------------------------- /images/padding1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/padding1.png -------------------------------------------------------------------------------- /images/padding2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/padding2.png -------------------------------------------------------------------------------- /images/padding3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/padding3.png -------------------------------------------------------------------------------- /images/paddings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/paddings.png -------------------------------------------------------------------------------- /images/paths.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/paths.png -------------------------------------------------------------------------------- /images/pooling1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/pooling1.png -------------------------------------------------------------------------------- /images/posenc_mod4mod8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_mod4mod8.png -------------------------------------------------------------------------------- /images/posenc_modnorm4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_modnorm4.png -------------------------------------------------------------------------------- /images/posenc_modnorm_deg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_modnorm_deg.png -------------------------------------------------------------------------------- /images/posenc_modnorm_mult.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_modnorm_mult.png -------------------------------------------------------------------------------- /images/posenc_modnorm_sincos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_modnorm_sincos.png -------------------------------------------------------------------------------- /images/posenc_norm1k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_norm1k.png -------------------------------------------------------------------------------- /images/posenc_norm4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_norm4.png -------------------------------------------------------------------------------- /images/posenc_norm4_long.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/posenc_norm4_long.png -------------------------------------------------------------------------------- /images/residual.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/residual.png -------------------------------------------------------------------------------- /images/rest_continuous.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rest_continuous.png -------------------------------------------------------------------------------- /images/rest_discrete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rest_discrete.png -------------------------------------------------------------------------------- /images/rnn1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rnn1.png -------------------------------------------------------------------------------- /images/rnn2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rnn2.png -------------------------------------------------------------------------------- /images/rnn_cell_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rnn_cell_diagram.png -------------------------------------------------------------------------------- /images/rnn_cell_diagram_seq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/rnn_cell_diagram_seq.png -------------------------------------------------------------------------------- /images/score_alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/score_alignment.png -------------------------------------------------------------------------------- /images/score_alignment_translate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/score_alignment_translate.png -------------------------------------------------------------------------------- /images/shifted_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/shifted_target.png -------------------------------------------------------------------------------- /images/sincos_distance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/sincos_distance.png -------------------------------------------------------------------------------- /images/stacked_encdec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/stacked_encdec.png -------------------------------------------------------------------------------- /images/stacked_layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/stacked_layers.png -------------------------------------------------------------------------------- /images/stacked_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/stacked_rnn.png -------------------------------------------------------------------------------- /images/stride1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/stride1.png -------------------------------------------------------------------------------- /images/strider2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/strider2.png -------------------------------------------------------------------------------- /images/strider3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/strider3.png -------------------------------------------------------------------------------- /images/sublayer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/sublayer.png -------------------------------------------------------------------------------- /images/transf_classes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/transf_classes.png -------------------------------------------------------------------------------- /images/transf_decself.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/transf_decself.png -------------------------------------------------------------------------------- /images/transf_encdecself.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/transf_encdecself.png -------------------------------------------------------------------------------- /images/transf_encself.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/transf_encself.png -------------------------------------------------------------------------------- /images/translation_att.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/translation_att.png -------------------------------------------------------------------------------- /images/vit_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/vit_model.png -------------------------------------------------------------------------------- /images/w2v_cbow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/w2v_cbow.png -------------------------------------------------------------------------------- /images/w2v_embed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/w2v_embed.png -------------------------------------------------------------------------------- /images/w2v_logits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/images/w2v_logits.png -------------------------------------------------------------------------------- /model_configuration/v0.py: -------------------------------------------------------------------------------- 1 | 2 | # This is redundant now, but it won't be when we introduce 3 | # Datasets... 4 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 5 | 6 | # Sets learning rate - this is "eta" ~ the "n"-like Greek letter 7 | lr = 0.1 8 | 9 | torch.manual_seed(42) 10 | # Now we can create a model and send it at once to the device 11 | model = nn.Sequential(nn.Linear(1, 1)).to(device) 12 | 13 | # Defines a SGD optimizer to update the parameters 14 | # (now retrieved directly from the model) 15 | optimizer = optim.SGD(model.parameters(), lr=lr) 16 | 17 | # Defines a MSE loss function 18 | loss_fn = nn.MSELoss(reduction='mean') 19 | -------------------------------------------------------------------------------- /model_configuration/v1.py: -------------------------------------------------------------------------------- 1 | 2 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 3 | 4 | # Sets learning rate - this is "eta" ~ the "n" like Greek letter 5 | lr = 0.1 6 | 7 | torch.manual_seed(42) 8 | # Now we can create a model and send it at once to the device 9 | model = nn.Sequential(nn.Linear(1, 1)).to(device) 10 | 11 | # Defines a SGD optimizer to update the parameters (now retrieved directly from the model) 12 | optimizer = optim.SGD(model.parameters(), lr=lr) 13 | 14 | # Defines a MSE loss function 15 | loss_fn = nn.MSELoss(reduction='mean') 16 | 17 | # Creates the train_step function for our model, loss function and optimizer 18 | train_step_fn = make_train_step_fn(model, loss_fn, optimizer) 19 | -------------------------------------------------------------------------------- /model_configuration/v2.py: -------------------------------------------------------------------------------- 1 | 2 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 3 | 4 | # Sets learning rate - this is "eta" ~ the "n" like Greek letter 5 | lr = 0.1 6 | 7 | torch.manual_seed(42) 8 | # Now we can create a model and send it at once to the device 9 | model = nn.Sequential(nn.Linear(1, 1)).to(device) 10 | 11 | # Defines a SGD optimizer to update the parameters (now retrieved directly from the model) 12 | optimizer = optim.SGD(model.parameters(), lr=lr) 13 | 14 | # Defines a MSE loss function 15 | loss_fn = nn.MSELoss(reduction='mean') 16 | 17 | # Creates the train_step function for our model, loss function and optimizer 18 | train_step_fn = make_train_step_fn(model, loss_fn, optimizer) 19 | 20 | # Creates the val_step function for our model and loss function 21 | val_step_fn = make_val_step_fn(model, loss_fn) 22 | -------------------------------------------------------------------------------- /model_configuration/v3.py: -------------------------------------------------------------------------------- 1 | 2 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 3 | 4 | # Sets learning rate - this is "eta" ~ the "n" like Greek letter 5 | lr = 0.1 6 | 7 | torch.manual_seed(42) 8 | # Now we can create a model and send it at once to the device 9 | model = nn.Sequential(nn.Linear(1, 1)).to(device) 10 | 11 | # Defines a SGD optimizer to update the parameters (now retrieved directly from the model) 12 | optimizer = optim.SGD(model.parameters(), lr=lr) 13 | 14 | # Defines a MSE loss function 15 | loss_fn = nn.MSELoss(reduction='mean') 16 | 17 | # Creates the train_step function for our model, loss function and optimizer 18 | train_step_fn = make_train_step_fn(model, loss_fn, optimizer) 19 | 20 | # Creates the val_step function for our model and loss function 21 | val_step_fn = make_val_step_fn(model, loss_fn) 22 | 23 | # Creates a Summary Writer to interface with TensorBoard 24 | writer = SummaryWriter('runs/simple_linear_regression') 25 | 26 | # Fetches a single mini-batch so we can use add_graph 27 | x_sample, y_sample = next(iter(train_loader)) 28 | writer.add_graph(model, x_sample.to(device)) 29 | -------------------------------------------------------------------------------- /model_configuration/v4.py: -------------------------------------------------------------------------------- 1 | 2 | # Sets learning rate - this is "eta" ~ the "n" like Greek letter 3 | lr = 0.1 4 | 5 | torch.manual_seed(42) 6 | # Now we can create a model and send it at once to the device 7 | model = nn.Sequential(nn.Linear(1, 1)) 8 | 9 | # Defines a SGD optimizer to update the parameters 10 | # (now retrieved directly from the model) 11 | optimizer = optim.SGD(model.parameters(), lr=lr) 12 | 13 | # Defines a MSE loss function 14 | loss_fn = nn.MSELoss(reduction='mean') 15 | -------------------------------------------------------------------------------- /model_training/v0.py: -------------------------------------------------------------------------------- 1 | 2 | # Defines number of epochs 3 | n_epochs = 1000 4 | 5 | for epoch in range(n_epochs): 6 | # Sets model to TRAIN mode 7 | model.train() 8 | 9 | # Step 1 - Computes model's predicted output - forward pass 10 | yhat = model(x_train_tensor) 11 | 12 | # Step 2 - Computes the loss 13 | loss = loss_fn(yhat, y_train_tensor) 14 | 15 | # Step 3 - Computes gradients for both "b" and "w" parameters 16 | loss.backward() 17 | 18 | # Step 4 - Updates parameters using gradients and 19 | # the learning rate 20 | optimizer.step() 21 | optimizer.zero_grad() 22 | -------------------------------------------------------------------------------- /model_training/v1.py: -------------------------------------------------------------------------------- 1 | 2 | # Defines number of epochs 3 | n_epochs = 1000 4 | 5 | losses = [] 6 | 7 | # For each epoch... 8 | for epoch in range(n_epochs): 9 | # Performs one train step and returns the corresponding loss 10 | loss = train_step_fn(x_train_tensor, y_train_tensor) 11 | losses.append(loss) 12 | -------------------------------------------------------------------------------- /model_training/v2.py: -------------------------------------------------------------------------------- 1 | 2 | # Defines number of epochs 3 | n_epochs = 1000 4 | 5 | losses = [] 6 | 7 | # For each epoch... 8 | for epoch in range(n_epochs): 9 | # inner loop 10 | mini_batch_losses = [] 11 | for x_batch, y_batch in train_loader: 12 | # the dataset "lives" in the CPU, so do our mini-batches 13 | # therefore, we need to send those mini-batches to the 14 | # device where the model "lives" 15 | x_batch = x_batch.to(device) 16 | y_batch = y_batch.to(device) 17 | 18 | # Performs one train step and returns the corresponding loss 19 | # for this mini-batch 20 | mini_batch_loss = train_step_fn(x_batch, y_batch) 21 | mini_batch_losses.append(mini_batch_loss) 22 | 23 | # Computes average loss over all mini-batches - that's the epoch loss 24 | loss = np.mean(mini_batch_losses) 25 | 26 | losses.append(loss) 27 | -------------------------------------------------------------------------------- /model_training/v3.py: -------------------------------------------------------------------------------- 1 | 2 | # Defines number of epochs 3 | n_epochs = 200 4 | 5 | losses = [] 6 | 7 | for epoch in range(n_epochs): 8 | # inner loop 9 | loss = mini_batch(device, train_loader, train_step_fn) 10 | losses.append(loss) 11 | -------------------------------------------------------------------------------- /model_training/v4.py: -------------------------------------------------------------------------------- 1 | 2 | # Defines number of epochs 3 | n_epochs = 200 4 | 5 | losses = [] 6 | val_losses = [] 7 | 8 | for epoch in range(n_epochs): 9 | # inner loop 10 | loss = mini_batch(device, train_loader, train_step_fn) 11 | losses.append(loss) 12 | 13 | # VALIDATION 14 | # no gradients in validation! 15 | with torch.no_grad(): 16 | val_loss = mini_batch(device, val_loader, val_step_fn) 17 | val_losses.append(val_loss) 18 | -------------------------------------------------------------------------------- /model_training/v5.py: -------------------------------------------------------------------------------- 1 | 2 | # Defines number of epochs 3 | n_epochs = 200 4 | 5 | losses = [] 6 | val_losses = [] 7 | 8 | for epoch in range(n_epochs): 9 | # inner loop 10 | loss = mini_batch(device, train_loader, train_step_fn) 11 | losses.append(loss) 12 | 13 | # VALIDATION 14 | # no gradients in validation! 15 | with torch.no_grad(): 16 | val_loss = mini_batch(device, val_loader, val_step_fn) 17 | val_losses.append(val_loss) 18 | 19 | # Records both losses for each epoch under the main tag "loss" 20 | writer.add_scalars(main_tag='loss', 21 | tag_scalar_dict={'training': loss, 'validation': val_loss}, 22 | global_step=epoch) 23 | 24 | # Closes the writer 25 | writer.close() 26 | -------------------------------------------------------------------------------- /plots/chapter1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from sklearn.linear_model import LinearRegression 4 | plt.style.use('fivethirtyeight') 5 | 6 | def fit_model(x_train, y_train): 7 | # Fits a linear regression to find the actual b and w that minimize the loss 8 | regression = LinearRegression() 9 | regression.fit(x_train, y_train) 10 | b_minimum, w_minimum = regression.intercept_[0], regression.coef_[0][0] 11 | return b_minimum, w_minimum 12 | 13 | def figure1(x_train, y_train, x_val, y_val): 14 | fig, ax = plt.subplots(1, 2, figsize=(12, 6)) 15 | 16 | ax[0].scatter(x_train, y_train) 17 | ax[0].set_xlabel('x') 18 | ax[0].set_ylabel('y') 19 | ax[0].set_ylim([0, 3.1]) 20 | ax[0].set_title('Generated Data - Train') 21 | 22 | ax[1].scatter(x_val, y_val, c='r') 23 | ax[1].set_xlabel('x') 24 | ax[1].set_ylabel('y') 25 | ax[1].set_ylim([0, 3.1]) 26 | ax[1].set_title('Generated Data - Validation') 27 | fig.tight_layout() 28 | 29 | return fig, ax 30 | 31 | def figure3(x_train, y_train): 32 | b_minimum, w_minimum = fit_model(x_train, y_train) 33 | # Generates evenly spaced x feature 34 | x_range = np.linspace(0, 1, 101) 35 | # Computes yhat 36 | yhat_range = b_minimum + w_minimum * x_range 37 | 38 | fig, ax = plt.subplots(1, 1, figsize=(6, 6)) 39 | ax.set_xlabel('x') 40 | ax.set_ylabel('y') 41 | ax.set_ylim([0, 3.1]) 42 | 43 | # Dataset 44 | ax.scatter(x_train, y_train) 45 | # Predictions 46 | ax.plot(x_range, yhat_range, label='Final model\'s predictions', c='k', linestyle='--') 47 | 48 | # Annotations 49 | ax.annotate('b = {:.4f} w = {:.4f}'.format(b_minimum, w_minimum), xy=(.4, 1.5), c='k', rotation=34) 50 | ax.legend(loc=0) 51 | fig.tight_layout() 52 | return fig, ax 53 | -------------------------------------------------------------------------------- /plots/chapter10.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from matplotlib import pyplot as plt 4 | 5 | def hist_encoding(encoding): 6 | encoding = encoding.cpu().detach().numpy() 7 | fig, axs = plt.subplots(1, 4, figsize=(15, 4)) 8 | axs = axs.flatten() 9 | for i in range(4): 10 | data_point = encoding[i][0] 11 | axs[i].hist(data_point, bins=np.linspace(-3, 3, 15), alpha=.5) 12 | axs[i].set_xlabel(f'Data Point #{i}') 13 | axs[i].set_ylabel('# of features') 14 | axs[i].set_title(f'mean={data_point.mean():.4f}\n var={data_point.var():.4f}', fontsize=16) 15 | axs[i].set_ylim([0, 10]) 16 | axs[i].label_outer() 17 | fig.tight_layout() 18 | return fig 19 | 20 | def hist_layer_normed(encoding, normed): 21 | encoding = encoding.cpu().detach().numpy() 22 | normed = normed.cpu().detach() 23 | fig, axs = plt.subplots(1, 4, figsize=(15, 4)) 24 | for i in range(4): 25 | data_point = encoding[i][0] 26 | normed_point = normed.detach()[i][0] 27 | axs[i].hist(data_point, bins=np.linspace(-3, 3, 15), alpha=.5, label='Original') 28 | axs[i].hist(normed_point.numpy(), bins=np.linspace(-3, 3, 15), alpha=.5, label='Standardized') 29 | axs[i].set_xlabel(f'Data Point #{i}') 30 | axs[i].set_ylabel('# of features') 31 | axs[i].set_title(f'mean={normed.mean().numpy():.4f}\n std={normed.std(unbiased=False).numpy():.4f}', fontsize=16) 32 | axs[i].legend() 33 | axs[i].set_ylim([0, 80]) 34 | axs[i].label_outer() 35 | fig.tight_layout() 36 | return fig 37 | 38 | def plot_patches(patches, kernel_size=3): 39 | n, p1, p2, v = patches.shape 40 | fig, axs = plt.subplots(p1, p2, figsize=(3, 3)) 41 | for i in range(p1): 42 | for j in range(p2): 43 | axs[i, j].imshow(patches.squeeze()[i, j].view(kernel_size, -1).cpu().detach().numpy(), cmap=plt.cm.gray) 44 | axs[i, j].grid(False) 45 | axs[i, j].set_xticklabels([]) 46 | axs[i, j].set_yticklabels([]) 47 | return fig 48 | 49 | def plot_seq_patches(seq_patches): 50 | seq_patches = seq_patches.cpu().detach().numpy() 51 | fig, axs = plt.subplots(1, seq_patches.shape[0], figsize=(3.5, 4)) 52 | for i in range(seq_patches.shape[0]): 53 | axs[i].imshow(seq_patches[i].reshape(-1, 1), cmap=plt.cm.gray) 54 | axs[i].grid(False) 55 | axs[i].set_xticklabels([]) 56 | axs[i].set_xlabel(i) 57 | axs[i].set_ylabel('Features') 58 | axs[i].label_outer() 59 | fig.suptitle('Sequence') 60 | fig.tight_layout(pad=0.3) 61 | fig.subplots_adjust(top=0.9) 62 | return fig 63 | 64 | def plot_seq_patches_transp(seq_patches, add_cls=False, title=None): 65 | seq_patches = seq_patches.cpu().detach().numpy() 66 | seq_patches = np.atleast_3d(seq_patches) 67 | n, l, d = seq_patches.shape 68 | fig, saxs = plt.subplots(1+seq_patches.shape[1]+add_cls, n, figsize=(n*6, 6), sharex=True) 69 | 70 | if title is None: 71 | title = 'Sequence' 72 | for seq_n in range(n): 73 | axs = saxs[:, seq_n] 74 | if add_cls: 75 | sub_patches = np.concatenate([np.zeros_like(seq_patches[seq_n, :1]), seq_patches[seq_n]]) 76 | else: 77 | sub_patches = seq_patches[seq_n] 78 | axs[0].text(4, 1, f'{title} #{seq_n}', fontsize=16) 79 | axs[0].grid(False) 80 | axs[0].set_yticks([]) 81 | for i in range(sub_patches.shape[0]): 82 | label = i 83 | if add_cls: 84 | label = i-1 if i > 0 else '[CLS]' 85 | axs[i+1].imshow(sub_patches[i].reshape(1, -1), cmap=plt.cm.gray) 86 | axs[i+1].set_yticklabels([label], rotation=0) 87 | axs[i+1].grid(False) 88 | axs[i+1].set_yticks([0]) 89 | #axs[i+1].set_ylabel(i, rotation=0) 90 | #axs[i+1].label_outer() 91 | axs[-1].set_xlabel('Features') 92 | fig.tight_layout() 93 | return fig 94 | 95 | def plot_images(imgs, title=True): 96 | imgs = imgs.squeeze(1).cpu().detach().numpy() 97 | imgs = np.atleast_3d(imgs) 98 | fig, axs = plt.subplots(1, imgs.shape[0], figsize=(6, 3)) 99 | if imgs.shape[0] == 1: 100 | axs = [axs] 101 | for i in range(imgs.shape[0]): 102 | axs[i].imshow(imgs[i], cmap=plt.cm.gray) 103 | axs[i].grid(False) 104 | axs[i].set_xticks([]) 105 | axs[i].set_yticks([]) 106 | if title: 107 | axs[i].set_title(f'Image #{i}') 108 | 109 | return fig -------------------------------------------------------------------------------- /plots/chapter11.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib 3 | from matplotlib import pyplot as plt 4 | 5 | def plot_word_vectors(wv, words, other=None): 6 | vectors = [] 7 | for word in words: 8 | try: 9 | vectors.append(wv[word]) 10 | except KeyError: 11 | if other is not None: 12 | vectors.append(other[word]) 13 | 14 | vectors = np.array(vectors) 15 | 16 | fig, axs = plt.subplots(len(words), 1, figsize=(18, len(words)*.7)) 17 | if len(words) == 1: 18 | axs = [axs] 19 | 20 | for i, word in enumerate(words): 21 | axs[i].imshow(vectors[i].reshape(1, -1), cmap=plt.cm.RdBu, vmin=vectors.min(), vmax=vectors.max()) 22 | axs[i].set_xticklabels([]) 23 | axs[i].set_yticklabels(['', word, '']) 24 | axs[i].grid(False) 25 | 26 | fig.tight_layout() 27 | return fig 28 | 29 | def plot_attention(tokens, alphas): 30 | n_tokens = max(list(map(len, tokens))) 31 | batch_size, n_heads, _ = alphas[:, :, 0, :].shape 32 | alphas = alphas.detach().cpu().numpy()[:, :, 0, :n_tokens] 33 | fig, axs = plt.subplots(n_heads, batch_size, figsize=(n_tokens * batch_size, n_heads)) 34 | 35 | textcolors=["white", "black"] 36 | kw = dict(horizontalalignment="center", verticalalignment="center") 37 | valfmt = matplotlib.ticker.StrMethodFormatter("{x:.2f}") 38 | 39 | for i, axr in enumerate(axs): # row 40 | for j, ax in enumerate(axr): # col 41 | data = alphas[j, i] 42 | im = ax.imshow(np.array(data.tolist()).reshape(1,-1), vmin=0, vmax=1, cmap=plt.cm.gray) 43 | ax.grid(False) 44 | if i == 0: 45 | ax.set_xticks(np.arange(len(tokens[j]))) 46 | ax.set_xticklabels(tokens[j]) 47 | else: 48 | ax.set_xticks([]) 49 | ax.set_yticks([-.5, 0, .5], minor=True) 50 | ax.set_yticklabels(['', f'Head #{i}', '']) 51 | ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False) 52 | 53 | for jp in range(data.shape[0]): 54 | kw.update(color=textcolors[int(im.norm(data[jp]) > .5)]) 55 | text = im.axes.text(jp, 0, valfmt(data[jp], None), **kw) 56 | return fig -------------------------------------------------------------------------------- /plots/chapter2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | plt.style.use('fivethirtyeight') 4 | 5 | def plot_losses(losses, val_losses): 6 | fig = plt.figure(figsize=(10, 4)) 7 | plt.plot(losses, label='Training Loss', c='b') 8 | plt.plot(val_losses, label='Validation Loss', c='r') 9 | plt.yscale('log') 10 | plt.xlabel('Epochs') 11 | plt.ylabel('Loss') 12 | plt.legend() 13 | plt.tight_layout() 14 | return fig 15 | 16 | 17 | def plot_resumed_losses(saved_epoch, saved_losses, saved_val_losses, n_epochs, losses, val_losses): 18 | range_before = range(0, saved_epoch) 19 | range_after = range(saved_epoch, saved_epoch + n_epochs) 20 | 21 | fig = plt.figure(figsize=(10, 4)) 22 | # Checkpointed loss 23 | plt.plot(range_before, saved_losses, 24 | label='Checkpointed Training Loss', c='b', linestyle='--') 25 | plt.plot(range_before, saved_val_losses, 26 | label='Checkpointed Validation Loss', c='r', linestyle='--') 27 | # Losses after resuming 28 | plt.plot(range_after, losses, label='Training Loss', c='b') 29 | plt.plot(range_after, val_losses, label='Validation Loss', c='r') 30 | # Divider 31 | plt.plot([saved_epoch, saved_epoch], 32 | [np.min(saved_losses + losses), np.max(saved_losses + losses)], 33 | c='k', linewidth=1, linestyle='--', label='Checkpoint') 34 | plt.yscale('log') 35 | plt.xlabel('Epochs') 36 | plt.ylabel('Loss') 37 | plt.legend() 38 | plt.tight_layout() 39 | return fig -------------------------------------------------------------------------------- /plots/chapter2_1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | plt.style.use('fivethirtyeight') 4 | 5 | def figure1(x, y): 6 | fig, ax = plt.subplots(1, 1, figsize=(6, 6)) 7 | 8 | ax.scatter(x, y) 9 | ax.set_xlabel('x') 10 | ax.set_ylabel('y') 11 | ax.set_ylim([0, 3.1]) 12 | ax.set_title('Generated Data - Full Dataset') 13 | fig.tight_layout() 14 | return fig -------------------------------------------------------------------------------- /plots/chapter3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from operator import itemgetter 5 | from mpl_toolkits.mplot3d import Axes3D 6 | from matplotlib.colors import ListedColormap 7 | from sklearn.metrics import confusion_matrix, roc_curve, precision_recall_curve 8 | plt.style.use('fivethirtyeight') 9 | 10 | def odds(prob): 11 | return prob / (1 - prob) 12 | 13 | def log_odds(prob): 14 | return np.log(odds(prob)) 15 | 16 | def sigmoid(z): 17 | return 1 / (1 + np.exp(-z)) 18 | 19 | def split_cm(cm): 20 | # Actual negatives go in the top row, 21 | # above the probability line 22 | actual_negative = cm[0] 23 | # Predicted negatives go in the first column 24 | tn = actual_negative[0] 25 | # Predicted positives go in the second column 26 | fp = actual_negative[1] 27 | 28 | # Actual positives go in the bottow row, 29 | # below the probability line 30 | actual_positive = cm[1] 31 | # Predicted negatives go in the first column 32 | fn = actual_positive[0] 33 | # Predicted positives go in the second column 34 | tp = actual_positive[1] 35 | 36 | return tn, fp, fn, tp 37 | 38 | def tpr_fpr(cm): 39 | tn, fp, fn, tp = split_cm(cm) 40 | 41 | tpr = tp / (tp + fn) 42 | fpr = fp / (fp + tn) 43 | 44 | return tpr, fpr 45 | 46 | def precision_recall(cm): 47 | tn, fp, fn, tp = split_cm(cm) 48 | 49 | precision = tp / (tp + fp) 50 | recall = tp / (tp + fn) 51 | 52 | return precision, recall 53 | 54 | def probability_line(ax, y, probs, threshold, shift=0.0, annot=False, colors=None): 55 | if colors is None: 56 | colors = ['r', 'b'] 57 | ax.grid(False) 58 | ax.set_ylim([-.1, .1]) 59 | ax.axes.get_yaxis().set_visible(False) 60 | ax.plot([0, 1], [0, 0], linewidth=2, c='k', zorder=1) 61 | ax.plot([0, 0], [-.1, .1], c='k', zorder=1) 62 | ax.plot([1, 1], [-.1, .1], c='k', zorder=1) 63 | 64 | tn = (y == 0) & (probs < threshold) 65 | fn = (y == 0) & (probs >= threshold) 66 | tp = (y == 1) & (probs >= threshold) 67 | fp = (y == 1) & (probs < threshold) 68 | 69 | ax.plot([threshold, threshold], [-.1, .1], c='k', zorder=1, linestyle='--') 70 | ax.scatter(probs[tn], np.zeros(tn.sum()) + shift, c=colors[0], s=150, zorder=2, edgecolor=colors[0], linewidth=3) 71 | ax.scatter(probs[fn], np.zeros(fn.sum()) + shift, c=colors[0], s=150, zorder=2, edgecolor=colors[1], linewidth=3) 72 | 73 | ax.scatter(probs[tp], np.zeros(tp.sum()) - shift, c=colors[1], s=150, zorder=2, edgecolor=colors[1], linewidth=3) 74 | ax.scatter(probs[fp], np.zeros(fp.sum()) - shift, c=colors[1], s=150, zorder=2, edgecolor=colors[0], linewidth=3) 75 | 76 | ax.set_xlabel(r'$\sigma(z) = P(y=1)$') 77 | ax.set_title('Threshold = {}'.format(threshold)) 78 | 79 | if annot: 80 | ax.annotate('TN', xy=(.20, .03), c='k', weight='bold', fontsize=20) 81 | ax.annotate('FN', xy=(.20, -.08), c='k', weight='bold', fontsize=20) 82 | ax.annotate('FP', xy=(.70, .03), c='k', weight='bold', fontsize=20) 83 | ax.annotate('TP', xy=(.70, -.08), c='k', weight='bold', fontsize=20) 84 | return ax 85 | 86 | def probability_contour(ax, model, device, X, y, threshold, cm=None, cm_bright=None): 87 | if cm is None: 88 | cm = plt.cm.RdBu 89 | if cm_bright is None: 90 | cm_bright = ListedColormap(['#FF0000', '#0000FF']) 91 | 92 | h = .02 # step size in the mesh 93 | 94 | x_min, x_max = -2.25, 2.25 95 | y_min, y_max = -2.25, 2.25 96 | 97 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), 98 | np.arange(y_min, y_max, h)) 99 | 100 | logits = model(torch.as_tensor(np.c_[xx.ravel(), yy.ravel()]).float().to(device)) 101 | logits = logits.detach().cpu().numpy().reshape(xx.shape) 102 | 103 | yhat = sigmoid(logits) 104 | 105 | ax.contour(xx, yy, yhat, levels=[threshold], cmap="Greys", vmin=0, vmax=1) 106 | contour = ax.contourf(xx, yy, yhat, 25, cmap=cm, alpha=.8, vmin=0, vmax=1) 107 | # Plot the training points 108 | ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright, edgecolors='k') 109 | # Plot the testing points 110 | #ax.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=cm_bright, edgecolors='k', alpha=0.6) 111 | 112 | ax.set_xlim(xx.min(), xx.max()) 113 | ax.set_ylim(yy.min(), yy.max()) 114 | ax.set_xlabel(r'$X_1$') 115 | ax.set_ylabel(r'$X_2$') 116 | ax.set_title(r'$\sigma(z) = P(y=1)$') 117 | ax.grid(False) 118 | 119 | ax_c = plt.colorbar(contour) 120 | ax_c.set_ticks([0, .25, .5, .75, 1]) 121 | return ax 122 | 123 | def eval_curves_from_probs(y, probabilities, threshs, line=False, annot=False): 124 | cms = [confusion_matrix(y, (probabilities >= threshold)) for threshold in threshs] 125 | rates = np.array(list(map(tpr_fpr, cms))) 126 | precrec = np.array(list(map(precision_recall, cms))) 127 | return eval_curves(rates[:, 1], rates[:, 0], precrec[:, 1], precrec[:, 0], threshs, line=line, annot=annot) 128 | 129 | def eval_curves(fprs, tprs, recalls, precisions, thresholds, thresholds2=None, line=False, annot=False): 130 | fig, axs = plt.subplots(1, 2, figsize=(10, 5)) 131 | 132 | if thresholds2 is None: 133 | thresholds2 = thresholds[:] 134 | 135 | marker = '.r-' if line else '.r' 136 | 137 | axs[0].plot(fprs, tprs, marker, markersize=12, linewidth=2) 138 | axs[0].set_xlim([-.05, 1.05]) 139 | axs[0].set_ylim([-.05, 1.05]) 140 | axs[0].set_xlabel('False Positive Rate') 141 | axs[0].set_ylabel('True Positive Rate') 142 | axs[0].set_title('ROC Curve') 143 | 144 | axs[1].plot(recalls, precisions, marker, markersize=12, linewidth=2) 145 | axs[1].set_xlim([-.05, 1.05]) 146 | axs[1].set_ylim([-.05, 1.05]) 147 | axs[1].set_xlabel('Recall') 148 | axs[1].set_ylabel('Precision') 149 | axs[1].set_title('Precision-Recall Curve') 150 | 151 | if annot: 152 | for thresh, fpr, tpr, prec, rec in zip(thresholds, fprs, tprs, precisions, recalls): 153 | axs[0].annotate(str(thresh), xy=(fpr - .03, tpr - .07)) 154 | 155 | for thresh, fpr, tpr, prec, rec in zip(thresholds2, fprs, tprs, precisions, recalls): 156 | axs[1].annotate(str(thresh), xy=(rec - .03, prec - .07)) 157 | 158 | fig.tight_layout() 159 | return fig 160 | 161 | 162 | 163 | def figure1(X_train, y_train, X_val, y_val, cm_bright=None): 164 | if cm_bright is None: 165 | cm_bright = ListedColormap(['#FF0000', '#0000FF']) 166 | 167 | fig, ax = plt.subplots(1, 2, figsize=(12, 6)) 168 | 169 | ax[0].scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)#, edgecolors='k') 170 | ax[0].set_xlabel(r'$X_1$') 171 | ax[0].set_ylabel(r'$X_2$') 172 | ax[0].set_xlim([-2.3, 2.3]) 173 | ax[0].set_ylim([-2.3, 2.3]) 174 | ax[0].set_title('Generated Data - Train') 175 | 176 | ax[1].scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=cm_bright)#, edgecolors='k') 177 | ax[1].set_xlabel(r'$X_1$') 178 | ax[1].set_ylabel(r'$X_2$') 179 | ax[1].set_xlim([-2.3, 2.3]) 180 | ax[1].set_ylim([-2.3, 2.3]) 181 | ax[1].set_title('Generated Data - Validation') 182 | fig.tight_layout() 183 | 184 | return fig 185 | 186 | def figure2(prob1): 187 | fig, ax = plt.subplots(1, 2, figsize=(10, 5)) 188 | prob = np.linspace(.01, .99, 99) 189 | 190 | for i in [0, 1]: 191 | ax[i].plot(prob, odds(prob), linewidth=2) 192 | ax[i].set_xlabel('Probability') 193 | if i: 194 | ax[i].set_yscale('log') 195 | ax[i].set_ylabel('Odds Ratio (log scale)') 196 | ax[i].set_title('Odds Ratio (log scale)') 197 | else: 198 | ax[i].set_ylabel('Odds Ratio') 199 | ax[i].set_title('Odds Ratio') 200 | ax[i].scatter([prob1, .5, (1-prob1)], [odds(prob1), odds(.5), odds(1-prob1)], c='r') 201 | 202 | fig.tight_layout() 203 | 204 | return fig 205 | 206 | def figure3(prob1): 207 | fig, ax = plt.subplots(1, 2, figsize=(10, 5)) 208 | prob = np.linspace(.01, .99, 99) 209 | 210 | ax[0].plot(prob, log_odds(prob), linewidth=2) 211 | ax[0].set_xlabel('Probability') 212 | ax[0].set_ylabel('Log Odds Ratio') 213 | ax[0].set_title('Log Odds Ratio') 214 | ax[0].scatter([prob1, .5, (1-prob1)], [log_odds(prob1), log_odds(.5), log_odds(1-prob1)], c='r') 215 | 216 | ax[1].plot(log_odds(prob), prob, linewidth=2) 217 | ax[1].set_ylabel('Probability') 218 | ax[1].set_xlabel('Log Odds Ratio') 219 | ax[1].set_title('Probability') 220 | ax[1].scatter([log_odds(prob1), log_odds(.5), log_odds(1-prob1)], [prob1, .5, (1-prob1)], c='r') 221 | fig.tight_layout() 222 | 223 | return fig 224 | 225 | def figure4(prob1): 226 | fig, ax = plt.subplots(1, 1, figsize=(5, 5)) 227 | prob = np.linspace(.01, .99, 99) 228 | 229 | ax.plot(log_odds(prob), prob, linewidth=2, c='r') 230 | ax.set_ylabel('Probability') 231 | ax.set_xlabel('Log Odds Ratio') 232 | ax.set_title('Sigmoid') 233 | ax.scatter([log_odds(prob1), log_odds(.5), log_odds(1-prob1)], [prob1, .5, (1-prob1)], c='r') 234 | fig.tight_layout() 235 | 236 | return fig 237 | 238 | def figure7(X, y, model, device, cm=None, cm_bright=None): 239 | if cm is None: 240 | cm = plt.cm.RdBu 241 | if cm_bright is None: 242 | cm_bright = ListedColormap(['#FF0000', '#0000FF']) 243 | fig = plt.figure(figsize=(15, 4.5)) 244 | 245 | h = .02 # step size in the mesh 246 | 247 | # x_min, x_max = X_train[:, 0].min() - .5, X_train[:, 0].max() + .5 248 | # y_min, y_max = X_train[:, 1].min() - .5, X_train[:, 1].max() + .5 249 | 250 | x_min, x_max = -2.25, 2.25 251 | y_min, y_max = -2.25, 2.25 252 | 253 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), 254 | np.arange(y_min, y_max, h)) 255 | 256 | logits = model(torch.as_tensor(np.c_[xx.ravel(), yy.ravel()]).float().to(device)) 257 | logits = logits.detach().cpu().numpy().reshape(xx.shape) 258 | 259 | yhat = sigmoid(logits) 260 | 261 | # 1st plot 262 | ax = plt.subplot(1, 3, 1) 263 | 264 | contour = ax.contourf(xx, yy, logits, 25, cmap=cm, alpha=.8) 265 | # Plot the training points 266 | ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright) 267 | # Plot the testing points 268 | #ax.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=cm_bright, edgecolors='k', alpha=0.6) 269 | 270 | ax.set_xlim(xx.min(), xx.max()) 271 | ax.set_ylim(yy.min(), yy.max()) 272 | ax.set_xlabel(r'$X_1$') 273 | ax.set_ylabel(r'$X_2$') 274 | ax.set_title(r'$z = b + w_1x_1 + w_2x_2$') 275 | ax.grid(False) 276 | ax_c = plt.colorbar(contour) 277 | ax_c.set_label("$z$", rotation=0) 278 | 279 | # 2nd plot 280 | ax = fig.add_subplot(1, 3, 2, projection='3d') 281 | 282 | surf = ax.plot_surface(xx, yy, yhat, rstride=1, cstride=1, alpha=.5, cmap=cm, linewidth=0, antialiased=True, vmin=0, vmax=1) 283 | # Plot the training points 284 | ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright) 285 | # Plot the testing points 286 | #ax.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=cm_bright, edgecolors='k', alpha=0.6) 287 | 288 | ax.set_xlim(xx.min(), xx.max()) 289 | ax.set_ylim(yy.min(), yy.max()) 290 | ax.set_xlabel(r'$X_1$') 291 | ax.set_ylabel(r'$X_2$') 292 | ax.set_title(r'$\sigma(z) = P(y=1)$') 293 | 294 | ax_c = plt.colorbar(surf) 295 | ax_c.set_ticks([0, .25, .5, .75, 1]) 296 | ax.view_init(30, 220) 297 | 298 | # 3rd plot 299 | ax = plt.subplot(1, 3, 3) 300 | 301 | ax.contour(xx, yy, yhat, levels=[.5], cmap="Greys", vmin=0, vmax=1) 302 | contour = ax.contourf(xx, yy, yhat, 25, cmap=cm, alpha=.8, vmin=0, vmax=1) 303 | # Plot the training points 304 | ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright) 305 | # Plot the testing points 306 | #ax.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=cm_bright, edgecolors='k', alpha=0.6) 307 | 308 | ax.set_xlim(xx.min(), xx.max()) 309 | ax.set_ylim(yy.min(), yy.max()) 310 | ax.set_xlabel(r'$X_1$') 311 | ax.set_ylabel(r'$X_2$') 312 | ax.set_title(r'$\sigma(z) = P(y=1)$') 313 | ax.grid(False) 314 | 315 | ax_c = plt.colorbar(contour) 316 | ax_c.set_ticks([0, .25, .5, .75, 1]) 317 | 318 | plt.tight_layout() 319 | 320 | return fig 321 | 322 | def one_dimension(x, y, colors=None): 323 | if colors is None: 324 | colors = ['r', 'b'] 325 | fig, ax = plt.subplots(1, 1, figsize=(10, 2)) 326 | 327 | ax.grid(False) 328 | ax.set_ylim([-.1, .1]) 329 | ax.axes.get_yaxis().set_visible(False) 330 | ax.plot([-3, 3], [0, 0], linewidth=2, c='k', zorder=1) 331 | ax.plot([0, 0], [-.03, .03], c='k', zorder=1) 332 | 333 | ax.scatter(x[y==1], np.zeros_like(x[y==1]), c=colors[1], s=150, zorder=2, linewidth=3) 334 | ax.scatter(x[y==0], np.zeros_like(x[y==0]), c=colors[0], s=150, zorder=2, linewidth=3) 335 | ax.set_xlabel(r'$X_1$') 336 | ax.set_title('One Dimension') 337 | 338 | fig.tight_layout() 339 | 340 | return fig 341 | 342 | def two_dimensions(x, y, colors=None): 343 | if colors is None: 344 | colors = ['r', 'b'] 345 | 346 | x2 = np.concatenate([x.reshape(-1, 1), (x ** 2).reshape(-1, 1)], axis=1) 347 | 348 | fig = plt.figure(figsize=(10, 4.5)) 349 | gs = fig.add_gridspec(3, 2) 350 | 351 | ax = fig.add_subplot(gs[2, 0]) 352 | 353 | ax.grid(False) 354 | ax.set_ylim([-.1, .1]) 355 | ax.axes.get_yaxis().set_visible(False) 356 | ax.plot([-3, 3], [0, 0], linewidth=2, c='k', zorder=1) 357 | ax.plot([0, 0], [-.03, .03], c='k', zorder=1) 358 | 359 | ax.scatter(x[y==1], np.zeros_like(x[y==1]), c=colors[1], s=150, zorder=2, linewidth=3) 360 | ax.scatter(x[y==0], np.zeros_like(x[y==0]), c=colors[0], s=150, zorder=2, linewidth=3) 361 | ax.set_xlabel(r'$X_1$') 362 | ax.set_title('One Dimension') 363 | 364 | ax = fig.add_subplot(gs[:, 1]) 365 | 366 | ax.scatter(*x2[y==1, :].T, c='b', s=150, zorder=2, linewidth=3) 367 | ax.scatter(*x2[y==0, :].T, c='r', s=150, zorder=2, linewidth=3) 368 | ax.plot([-2, 2], [1, 1], 'k--', linewidth=2) 369 | ax.set_xlabel(r'$X_1$') 370 | ax.set_ylabel(r'$X_2=X_1^2$') 371 | ax.set_title('Two Dimensions') 372 | 373 | fig.tight_layout() 374 | return fig 375 | 376 | def figure9(x, y, model, device, probabilities, threshold, shift=0.0, annot=False, cm=None, cm_bright=None): 377 | fig = plt.figure(figsize=(15, 5)) 378 | gs = fig.add_gridspec(3, 3) 379 | 380 | ax = fig.add_subplot(gs[:, 0]) 381 | probability_contour(ax, model, device, x, y, threshold, cm, cm_bright) 382 | 383 | if cm_bright is None: 384 | colors = ['r', 'b'] 385 | else: 386 | colors = cm_bright.colors 387 | 388 | ax = fig.add_subplot(gs[1, 1:]) 389 | probability_line(ax, y, probabilities, threshold, shift, annot, colors) 390 | 391 | fig.tight_layout() 392 | return fig 393 | 394 | def figure10(y, probabilities, threshold, shift, annot, colors=None): 395 | fig, ax = plt.subplots(1, 1, figsize=(10, 2)) 396 | probability_line(ax, y, probabilities, threshold, shift, annot, colors) 397 | fig.tight_layout() 398 | return fig 399 | 400 | def figure17(y, probabilities, threshs): 401 | cms = [confusion_matrix(y, (probabilities >= threshold)) for threshold in threshs] 402 | rates = np.array(list(map(tpr_fpr, cms))) 403 | precrec = np.array(list(map(precision_recall, cms))) 404 | precrec = np.nan_to_num(precrec, nan=1.) 405 | fig = eval_curves(rates[:, 1], rates[:, 0], precrec[:, 1], precrec[:, 0], threshs, line=True, annot=False) 406 | return fig 407 | 408 | def figure19(y, probabilities, threshs=(.4, .5, .57), colors=None): 409 | fig, axs = plt.subplots(3, 1, figsize=(10, 6)) 410 | probability_line(axs[0], y, probabilities, threshs[0], 0.0, False, colors) 411 | probability_line(axs[1], y, probabilities, threshs[1], 0.0, False, colors) 412 | probability_line(axs[2], y, probabilities, threshs[2], 0.0, False, colors) 413 | fig.tight_layout() 414 | return fig 415 | 416 | def figure20(y): 417 | fpr_perfect, tpr_perfect, thresholds1_perfect = roc_curve(y, y) 418 | prec_perfect, rec_perfect, thresholds2_perfect = precision_recall_curve(y, y) 419 | fig = eval_curves(fpr_perfect, tpr_perfect, rec_perfect, prec_perfect, thresholds1_perfect, thresholds2_perfect, line=True) 420 | return fig 421 | 422 | def figure21(y, probabilities): 423 | fpr_random, tpr_random, thresholds1_random = roc_curve(y, probabilities) 424 | prec_random, rec_random, thresholds2_random = precision_recall_curve(y, probabilities) 425 | fig = eval_curves(fpr_random, tpr_random, rec_random, prec_random, thresholds1_random, thresholds2_random, line=True) 426 | axs = fig.axes 427 | axs[0].plot([0, 1], [0, 1], 'k--', linewidth=2) 428 | axs[1].plot([0, 1], [y.mean(), y.mean()], 'k--', linewidth=2) 429 | return fig -------------------------------------------------------------------------------- /plots/chapter4.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | plt.style.use('fivethirtyeight') 6 | 7 | def plot_images(images, targets, n_plot=30): 8 | n_rows = n_plot // 6 + ((n_plot % 6) > 0) 9 | fig, axes = plt.subplots(n_rows, 6, figsize=(9, 1.5 * n_rows)) 10 | axes = np.atleast_2d(axes) 11 | 12 | for i, (image, target) in enumerate(zip(images[:n_plot], targets[:n_plot])): 13 | row, col = i // 6, i % 6 14 | ax = axes[row, col] 15 | ax.set_title('#{} - Label:{}'.format(i, target), {'size': 12}) 16 | # plot filter channel in grayscale 17 | ax.imshow(image.squeeze(), cmap='gray', vmin=0, vmax=1) 18 | 19 | for ax in axes.flat: 20 | ax.set_xticks([]) 21 | ax.set_yticks([]) 22 | ax.label_outer() 23 | 24 | plt.tight_layout() 25 | return fig 26 | 27 | def image_channels(red, green, blue, rgb, gray, rows=(0, 1, 2)): 28 | fig, axs = plt.subplots(len(rows), 4, figsize=(15, 5.5)) 29 | 30 | zeros = np.zeros((5, 5), dtype=np.uint8) 31 | 32 | titles1 = ['Red', 'Green', 'Blue', 'Grayscale Image'] 33 | titles0 = ['image_r', 'image_g', 'image_b', 'image_gray'] 34 | titles2 = ['as first channel', 'as second channel', 'as third channel', 'RGB Image'] 35 | 36 | idx0 = np.argmax(np.array(rows) == 0) 37 | idx1 = np.argmax(np.array(rows) == 1) 38 | idx2 = np.argmax(np.array(rows) == 2) 39 | 40 | for i, m in enumerate([red, green, blue, gray]): 41 | if 0 in rows: 42 | axs[idx0, i].axis('off') 43 | axs[idx0, i].invert_yaxis() 44 | if (1 in rows) or (i < 3): 45 | axs[idx0, i].text(0.15, 0.25, str(m.astype(np.uint8)), verticalalignment='top') 46 | axs[idx0, i].set_title(titles0[i], fontsize=16) 47 | 48 | if 1 in rows: 49 | axs[idx1, i].set_title(titles1[i], fontsize=16) 50 | axs[idx1, i].set_xlabel('5x5', fontsize=14) 51 | axs[idx1, i].imshow(m, cmap=plt.cm.gray) 52 | 53 | if 2 in rows: 54 | axs[idx2, i].set_title(titles2[i], fontsize=16) 55 | axs[idx2, i].set_xlabel(f'5x5x3 - {titles1[i][0]} only', fontsize=14) 56 | if i < 3: 57 | stacked = [zeros] * 3 58 | stacked[i] = m 59 | axs[idx2, i].imshow(np.stack(stacked, axis=2)) 60 | else: 61 | axs[idx2, i].imshow(rgb) 62 | 63 | for r in [1, 2]: 64 | if r in rows: 65 | idx = idx1 if r == 1 else idx2 66 | axs[idx, i].set_xticks([]) 67 | axs[idx, i].set_yticks([]) 68 | for k, v in axs[idx, i].spines.items(): 69 | v.set_color('black') 70 | v.set_linewidth(.8) 71 | 72 | if 1 in rows: 73 | axs[idx1, 0].set_ylabel('Single\nChannel\n(grayscale)', rotation=0, labelpad=40, fontsize=12) 74 | axs[idx1, 3].set_xlabel('5x5 = 0.21R + 0.72G + 0.07B') 75 | if 2 in rows: 76 | axs[idx2, 0].set_ylabel('Three\nChannels\n(color)', rotation=0, labelpad=40, fontsize=12) 77 | axs[idx2, 3].set_xlabel('5x5x3 = (R, G, B) stacked') 78 | fig.tight_layout() 79 | return fig 80 | 81 | def figure5(sbs_logistic, sbs_nn): 82 | fig, axs = plt.subplots(1, 2, figsize=(15, 6)) 83 | axs[0].plot(sbs_logistic.losses, 'b--', label='Logistic - Training') 84 | axs[1].plot(sbs_logistic.val_losses, 'r--', label='Logistic - Validation') 85 | axs[0].plot(sbs_nn.losses, 'b', label='3-layer Network - Training', alpha=.5) 86 | axs[1].plot(sbs_nn.val_losses, 'r', label='3-layer Network - Validation', alpha=.5) 87 | axs[0].set_xlabel('Epochs') 88 | axs[0].set_ylabel('Losses') 89 | axs[0].set_ylim([0.45, 0.75]) 90 | axs[0].legend() 91 | axs[1].set_xlabel('Epochs') 92 | axs[1].set_ylabel('Losses') 93 | axs[1].set_ylim([0.45, 0.75]) 94 | axs[1].legend() 95 | fig.tight_layout() 96 | return fig 97 | 98 | def figure7(weights): 99 | fig, axs = plt.subplots(1, 5, figsize=(15, 4)) 100 | 101 | for i, m in enumerate(weights): 102 | axs[i].imshow(m.reshape(-1, 5).tolist(), cmap='gray') 103 | axs[i].grid(False) 104 | axs[i].set_xticks([]) 105 | axs[i].set_yticks([]) 106 | axs[i].set_title(r'$w_{0' + str(i) + '}$') 107 | 108 | fig.suptitle('Hidden Layer #0') 109 | fig.subplots_adjust(top=0.6) 110 | fig.tight_layout() 111 | return fig 112 | 113 | def figure5b(sbs_logistic, sbs_nn, sbs_relu): 114 | fig, axs = plt.subplots(1, 2, figsize=(15, 6)) 115 | axs[0].plot(sbs_logistic.losses, 'b--', label='Logistic - Training') 116 | axs[1].plot(sbs_logistic.val_losses, 'r--', label='Logistic - Validation') 117 | 118 | axs[0].plot(sbs_nn.losses, 'b', label='3-layer Network - Training', alpha=.5) 119 | axs[1].plot(sbs_nn.val_losses, 'r', label='3-layer Network - Validation', alpha=.5) 120 | 121 | axs[0].plot(sbs_relu.losses, 'b', label='ReLU Network - Training', alpha=.8) 122 | axs[1].plot(sbs_relu.val_losses, 'r', label='ReLU Network - Validation', alpha=.8) 123 | 124 | axs[0].set_xlabel('Epochs') 125 | axs[0].set_ylabel('Losses') 126 | axs[0].legend() 127 | axs[1].set_xlabel('Epochs') 128 | axs[1].set_ylabel('Losses') 129 | axs[1].legend() 130 | fig.tight_layout() 131 | return fig 132 | 133 | def plot_activation(func, name=None): 134 | z = torch.linspace(-5, 5, 1000) 135 | z.requires_grad_(True) 136 | func(z).sum().backward() 137 | sig = func(z).detach() 138 | 139 | fig, ax = plt.subplots(1, 1, figsize=(8, 5)) 140 | 141 | # Move left y-axis and bottim x-axis to centre, passing through (0,0) 142 | if name is None: 143 | try: 144 | name = func.__name__ 145 | except AttributeError: 146 | name = '' 147 | 148 | if name == 'sigmoid': 149 | ax.set_ylim([0, 1.1]) 150 | elif name == 'tanh': 151 | ax.set_ylim([-1.1, 1.1]) 152 | elif name == 'relu': 153 | ax.set_ylim([-.1, 5.01]) 154 | else: 155 | ax.set_ylim([-1.1, 5.01]) 156 | 157 | ax.set_xticks(np.arange(-5, 6, 1)) 158 | ax.set_xlabel('z') 159 | ax.set_ylabel(r'$\sigma(z)$') 160 | 161 | # Eliminate upper and right axes 162 | ax.spines['right'].set_color('none') 163 | ax.spines['top'].set_color('none') 164 | 165 | # Show ticks in the left and lower axes only 166 | ax.xaxis.set_ticks_position('bottom') 167 | ax.yaxis.set_ticks_position('left') 168 | 169 | ax.set_title(name, fontsize=16) 170 | ax.plot(z.detach().numpy(), sig.numpy(), c='k', label='Activation') 171 | ax.plot(z.detach().numpy(), z.grad.numpy(), c='r', label='Gradient') 172 | ax.legend(loc=2) 173 | 174 | fig.tight_layout() 175 | fig.show() 176 | return fig 177 | 178 | def weights_comparison(w_logistic_output, w_nn_equiv): 179 | fig = plt.figure(figsize=(15, 6)) 180 | ax0 = plt.subplot2grid((1, 3), (0, 0), colspan=2) 181 | ax1 = plt.subplot2grid((1, 3), (0, 2)) 182 | 183 | ax0.bar(np.arange(25), w_logistic_output.cpu().numpy().squeeze(), alpha=1, label='Logistic') 184 | ax0.bar(np.arange(25), w_nn_equiv.cpu().numpy().squeeze(), alpha=.5, label='3-layer Network (Composed)') 185 | ax0.set_title('Weights') 186 | ax0.set_xlabel('Parameters') 187 | ax0.set_ylabel('Value') 188 | ax0.legend() 189 | 190 | ax1.scatter(w_logistic_output.cpu().numpy(), w_nn_equiv.cpu().numpy(), alpha=.5) 191 | ax1.set_xlabel('Logistic') 192 | ax1.set_ylabel('3-layer network (Composed)') 193 | ax1.set_title('Weights') 194 | ax1.set_xlim([-2, 2]) 195 | ax1.set_ylim([-2, 2]) 196 | 197 | fig.tight_layout() 198 | return fig 199 | -------------------------------------------------------------------------------- /plots/chapter5.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | plt.style.use('fivethirtyeight') 4 | 5 | def plot_images(images, targets, n_plot=30): 6 | n_rows = n_plot // 10 + ((n_plot % 10) > 0) 7 | fig, axes = plt.subplots(n_rows, 10, figsize=(15, 1.5 * n_rows)) 8 | axes = np.atleast_2d(axes) 9 | 10 | for i, (image, target) in enumerate(zip(images[:n_plot], targets[:n_plot])): 11 | row, col = i // 10, i % 10 12 | ax = axes[row, col] 13 | ax.set_title('#{} - Label:{}'.format(i, target), {'size': 12}) 14 | # plot filter channel in grayscale 15 | ax.imshow(image.squeeze(), cmap='gray', vmin=0, vmax=1) 16 | 17 | for ax in axes.flat: 18 | ax.set_xticks([]) 19 | ax.set_yticks([]) 20 | ax.label_outer() 21 | 22 | plt.tight_layout() 23 | return fig -------------------------------------------------------------------------------- /plots/chapter6.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | import matplotlib.pyplot as plt 5 | import pandas as pd 6 | from copy import deepcopy 7 | from PIL import Image 8 | from stepbystep.v2 import StepByStep 9 | from torchvision.transforms import ToPILImage 10 | from sklearn.linear_model import LinearRegression 11 | from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, MultiStepLR, CyclicLR, LambdaLR 12 | 13 | def EWMA(past_value, current_value, alpha): 14 | return (1 - alpha) * past_value + alpha * current_value 15 | 16 | def calc_ewma(values, period): 17 | alpha = 2 / (period + 1) 18 | result = [] 19 | for v in values: 20 | try: 21 | prev_value = result[-1] 22 | except IndexError: 23 | prev_value = 0 24 | 25 | new_value = EWMA(prev_value, v, alpha) 26 | result.append(new_value) 27 | return np.array(result) 28 | 29 | def correction(averaged_value, beta, steps): 30 | return averaged_value / (1 - (beta ** steps)) 31 | 32 | def figure1(folder='rps'): 33 | paper = Image.open(f'{folder}/paper/paper02-089.png') 34 | rock = Image.open(f'{folder}/rock/rock06ck02-100.png') 35 | scissors = Image.open(f'{folder}/scissors/testscissors02-006.png') 36 | 37 | images = [rock, paper, scissors] 38 | titles = ['Rock', 'Paper', 'Scissors'] 39 | 40 | fig, axs = plt.subplots(1, 3, figsize=(12, 5)) 41 | for ax, image, title in zip(axs, images, titles): 42 | ax.imshow(image) 43 | ax.set_xticks([]) 44 | ax.set_yticks([]) 45 | ax.set_title(title) 46 | 47 | return fig 48 | 49 | def calc_corrected_ewma(values, period): 50 | ewma = calc_ewma(values, period) 51 | 52 | alpha = 2 / (period + 1) 53 | beta = 1 - alpha 54 | 55 | result = [] 56 | for step, v in enumerate(ewma): 57 | adj_value = correction(v, beta, step + 1) 58 | result.append(adj_value) 59 | 60 | return np.array(result) 61 | 62 | def figure2(first_images, first_labels): 63 | fig, axs = plt.subplots(1, 6, figsize=(12, 4)) 64 | titles = ['Paper', 'Rock', 'Scissors'] 65 | for i in range(6): 66 | image, label = ToPILImage()(first_images[i]), first_labels[i] 67 | axs[i].imshow(image) 68 | axs[i].set_xticks([]) 69 | axs[i].set_yticks([]) 70 | axs[i].set_title(titles[label], fontsize=12) 71 | fig.tight_layout() 72 | return fig 73 | 74 | def plot_dist(ax, distrib_outputs, p): 75 | ax.hist(distrib_outputs, bins=np.linspace(0, 20, 21)) 76 | ax.set_xlabel('Sum of Adjusted Outputs') 77 | ax.set_ylabel('# of Scenarios') 78 | ax.set_title('p = {:.2f}'.format(p)) 79 | ax.set_ylim([0, 500]) 80 | mean_value = distrib_outputs.mean() 81 | ax.plot([mean_value, mean_value], [0, 500], c='r', linestyle='--', label='Mean = {:.2f}'.format(mean_value)) 82 | ax.legend() 83 | 84 | def figure7(p, distrib_outputs): 85 | fig, ax = plt.subplots(1, 1, figsize=(6, 4)) 86 | plot_dist(ax, distrib_outputs, p) 87 | fig.tight_layout() 88 | return fig 89 | 90 | def figure8(ps=(0.1, 0.3, 0.5, 0.9)): 91 | spaced_points = torch.linspace(.1, 1.1, 11) 92 | fig, axs = plt.subplots(1, 4, figsize=(15, 4)) 93 | for ax, p in zip(axs.flat, ps): 94 | torch.manual_seed(17) 95 | distrib_outputs = torch.tensor([F.linear(F.dropout(spaced_points, p=p), 96 | weight=torch.ones(11), bias=torch.tensor(0)) 97 | for _ in range(1000)]) 98 | plot_dist(ax, distrib_outputs, p) 99 | ax.label_outer() 100 | fig.tight_layout() 101 | return fig 102 | 103 | def figure9(first_images, seed=17, p=.33): 104 | torch.manual_seed(seed) 105 | fig, axs = plt.subplots(1, 3, figsize=(12, 4)) 106 | axs[0].imshow(ToPILImage()(first_images[0])) 107 | axs[0].set_title('Original Image') 108 | axs[0].grid(False) 109 | axs[0].set_xticks([]) 110 | axs[0].set_yticks([]) 111 | axs[1].imshow(ToPILImage()(F.dropout(first_images[:1], p=p)[0])) 112 | axs[1].set_title('Regular Dropout') 113 | axs[1].grid(False) 114 | axs[1].set_xticks([]) 115 | axs[1].set_yticks([]) 116 | axs[2].imshow(ToPILImage()(F.dropout2d(first_images[:1], p=p)[0])) 117 | axs[2].set_title('Two-Dimensional Dropout') 118 | axs[2].grid(False) 119 | axs[2].set_xticks([]) 120 | axs[2].set_yticks([]) 121 | fig.tight_layout() 122 | return fig 123 | 124 | def figure11(losses, val_losses, losses_nodrop, val_losses_nodrop): 125 | fig, axs = plt.subplots(1, 1, figsize=(10, 5)) 126 | axs.plot(losses, 'b', label='Training Losses - Dropout') 127 | axs.plot(val_losses, 'r', label='Validation Losses - Dropout') 128 | axs.plot(losses_nodrop, 'b--', label='Training Losses - No Dropout') 129 | axs.plot(val_losses_nodrop, 'r--', label='Validation Losses - No Dropout') 130 | plt.yscale('log') 131 | plt.xlabel('Epochs') 132 | plt.ylabel('Loss') 133 | plt.title('Regularizing Effect') 134 | fig.legend(loc='lower left') 135 | fig.tight_layout() 136 | return fig 137 | 138 | def figure15(alpha=1/3, periods=5, steps=10): 139 | t = np.arange(1, steps+1) 140 | fig, ax = plt.subplots(1, 1, figsize=(6, 4)) 141 | ax.bar(t-1, alpha*(1-alpha)**(t-1), label='EWMA') 142 | ax.bar(t-1, [1/periods]*periods + [0]*(10-periods), color='r', alpha=.3, label='MA') 143 | ax.set_xticks(t-1) 144 | ax.grid(False) 145 | ax.set_xlabel('Lag') 146 | ax.set_ylabel('Weight') 147 | ax.set_title(r'$EWMA\ \alpha=\frac{1}{3}$ vs MA (5 periods)') 148 | ax.legend() 149 | fig.tight_layout() 150 | return fig 151 | 152 | def ma_vs_ewma(values, periods=19): 153 | ma19 = pd.Series(values).rolling(min_periods=0, window=periods).mean() 154 | fig, ax = plt.subplots(1, 1, figsize=(6, 4)) 155 | ax.plot(values, c='k', label='Temperatures') 156 | ax.plot(ma19, c='k', linestyle='--', label='MA') 157 | ax.plot(calc_ewma(values, periods), c='r', linestyle='--', label='EWMA') 158 | ax.plot(calc_corrected_ewma(values, periods), c='r', linestyle='-', label='Bias-corrected EWMA') 159 | ax.set_title('MA vs EWMA') 160 | ax.set_ylabel('Temperature') 161 | ax.set_xlabel('Days') 162 | ax.legend(fontsize=12) 163 | fig.tight_layout() 164 | return fig 165 | 166 | def figure17(gradients, corrected_gradients, corrected_sq_gradients, adapted_gradients): 167 | fig, axs = plt.subplots(1, 3, figsize=(15, 5)) 168 | ax = axs[0] 169 | ax.plot(gradients, c='k', label=r'$Gradients$') 170 | ax.plot(corrected_gradients, c='r', linestyle='-', label=r'$Bias-corrected\ EWMA(grad)$') 171 | ax.set_title('EWMA for Smoothing') 172 | ax.set_ylabel('Gradient') 173 | ax.set_xlabel('Mini-batches') 174 | ax.set_ylim([-1.5, 1.5]) 175 | ax.legend(fontsize=12) 176 | 177 | ax = axs[1] 178 | ax.plot(1/(np.sqrt(corrected_sq_gradients)+1e-8), c='b', linestyle='-', label=r'$\frac{1}{\sqrt{Bias-corrected\ EWMA(grad^2)}}$') 179 | ax.set_title('EWMA for Scaling') 180 | ax.set_ylabel('Factor') 181 | ax.set_xlabel('Mini-batches') 182 | ax.set_ylim([0, 5]) 183 | ax.legend(fontsize=12) 184 | 185 | ax = axs[2] 186 | ax.plot(gradients, c='k', label='Gradients') 187 | ax.plot(adapted_gradients, c='g', label='Adapted Gradients') 188 | ax.set_title('Gradients') 189 | ax.set_ylabel('Gradient') 190 | ax.set_xlabel('Mini-batches') 191 | ax.set_ylim([-1.5, 1.5]) 192 | ax.legend(fontsize=12) 193 | fig.tight_layout() 194 | return fig 195 | 196 | def contour_data(x_tensor, y_tensor): 197 | linr = LinearRegression() 198 | linr.fit(x_tensor, y_tensor) 199 | b, w = linr.intercept_, linr.coef_[0] 200 | 201 | # we have to split the ranges in 100 evenly spaced intervals each 202 | b_range = np.linspace(.7, 2.3, 101) 203 | w_range = np.linspace(.7, 2.3, 101) 204 | # meshgrid is a handy function that generates a grid of b and w 205 | # values for all combinations 206 | bs, ws = np.meshgrid(b_range, w_range) 207 | all_predictions = np.apply_along_axis( 208 | func1d=lambda x: bs + ws * x, 209 | axis=1, 210 | arr=x_tensor.numpy() 211 | ) 212 | all_labels = y_tensor.numpy().reshape(-1, 1, 1) 213 | all_errors = (all_predictions - all_labels) 214 | all_losses = (all_errors ** 2).mean(axis=0) 215 | return b, w, bs, ws, all_losses 216 | 217 | def plot_paths(results, b, w, bs, ws, all_losses, axs=None): 218 | if axs is None: 219 | fig, axs = plt.subplots(1, len(results), figsize=(5 * len(results), 5)) 220 | axs = np.atleast_2d(axs) 221 | axs = [ax for row in axs for ax in row] 222 | for i, (ax, desc) in enumerate(zip(axs, results.keys())): 223 | biases = np.array(results[desc]['parms']['']['linear.bias']).squeeze() 224 | weights = np.array(results[desc]['parms']['']['linear.weight']).squeeze() 225 | ax.plot(biases, weights, '-o', linewidth=1, zorder=1, c='k', markersize=4) 226 | # Loss surface 227 | CS = ax.contour(bs[0, :], ws[:, 0], all_losses, cmap=plt.cm.jet, levels=12) 228 | ax.clabel(CS, inline=1, fontsize=10) 229 | ax.scatter(b, w, c='r', zorder=2, s=40) 230 | ax.set_xlim([.7, 2.3]) 231 | ax.set_ylim([.7, 2.3]) 232 | ax.set_xlabel('Bias') 233 | ax.set_ylabel('Weight') 234 | ax.set_title(desc) 235 | ax.label_outer() 236 | fig = ax.get_figure() 237 | fig.tight_layout() 238 | return fig 239 | 240 | def plot_losses(results, axs=None): 241 | n = len(results.keys()) 242 | if axs is None: 243 | fig, axs = plt.subplots(1, n, figsize=(5*n, 4)) 244 | else: 245 | fig = axs[0].get_figure() 246 | for ax, k in zip(axs, results.keys()): 247 | ax.plot(results[k]['losses'], label='Training Loss', c='b') 248 | ax.plot(results[k]['val_losses'], label='Validation Loss', c='r') 249 | ax.set_yscale('log') 250 | ax.set_xlabel('Epochs') 251 | ax.set_ylabel('Loss') 252 | ax.set_ylim([1e-3, 1]) 253 | ax.set_title(k) 254 | ax.legend() 255 | fig.tight_layout() 256 | return fig 257 | 258 | def momentum(past_value, current_value, beta): 259 | return beta * past_value + current_value 260 | 261 | def calc_momentum(values, beta): 262 | result = [] 263 | for v in values: 264 | try: 265 | prev_value = result[-1] 266 | except IndexError: 267 | prev_value = 0 268 | 269 | new_value = momentum(prev_value, v, beta) 270 | result.append(new_value) 271 | return np.array(result) 272 | 273 | def calc_nesterov(values, beta): 274 | result = calc_momentum(values, beta) 275 | return beta * result + values 276 | 277 | def figure21(results): 278 | parm = 'linear.weight' 279 | 280 | fig, axs = plt.subplots(1, 3, figsize=(15, 5)) 281 | 282 | for i, ax in enumerate(axs): 283 | desc = list(results.keys())[i] 284 | gradients = np.array(results[desc]['grads'][''][parm]).squeeze() 285 | momentums = calc_momentum(gradients, 0.9) 286 | nesterovs = calc_nesterov(gradients, 0.9) 287 | ax.plot(gradients, c='k', label='Gradients') 288 | if i > 0: 289 | ax.plot(momentums, c='r', label='Momentums') 290 | if i > 1: 291 | ax.plot(nesterovs, c='b', label='Nesterov Momentums') 292 | ax.set_title(desc) 293 | ax.set_ylabel('Gradient') 294 | ax.set_xlabel('Mini-batches') 295 | ax.set_ylim([-2, 1.5]) 296 | ax.legend(fontsize=12) 297 | 298 | fig.tight_layout() 299 | return fig 300 | 301 | def plot_scheduler(dummy_optimizer, dummy_scheduler, logscale=True, ax=None): 302 | learning_rates = [] 303 | for i in range(12): 304 | current_lr = list(map(lambda d: d['lr'], dummy_scheduler.optimizer.state_dict()['param_groups'])) 305 | learning_rates.append(current_lr) 306 | dummy_optimizer.step() 307 | if isinstance(dummy_scheduler, ReduceLROnPlateau): 308 | dummy_loss = 0.1 309 | dummy_scheduler.step(dummy_loss) 310 | else: 311 | dummy_scheduler.step() 312 | 313 | if ax is None: 314 | fig, ax = plt.subplots(1, 1, figsize=(5, 4)) 315 | 316 | ax.plot(learning_rates) 317 | if logscale: 318 | ax.set_yscale('log') 319 | ax.set_xlabel('Steps') 320 | ax.set_ylabel('Learning Rate') 321 | ax.set_title(type(dummy_scheduler).__name__) 322 | fig = ax.get_figure() 323 | fig.tight_layout() 324 | return fig 325 | 326 | def figure26(dummy_optimizer, dummy_schedulers): 327 | fig, axs = plt.subplots(1, 3, figsize=(15, 4)) 328 | fig = plot_scheduler(dummy_optimizer, dummy_schedulers[0], ax=axs[0], logscale=False) 329 | fig = plot_scheduler(dummy_optimizer, dummy_schedulers[1], ax=axs[1], logscale=False) 330 | fig = plot_scheduler(dummy_optimizer, dummy_schedulers[2], ax=axs[2], logscale=False) 331 | axs[0].set_ylim([9e-5, 1e-3]) 332 | axs[1].set_ylim([9e-5, 1e-3]) 333 | axs[2].set_ylim([9e-5, 1e-3]) 334 | axs[0].set_title('CyclicLR - mode=triangular') 335 | axs[1].set_title('CyclicLR - mode=triangular2') 336 | axs[2].set_title('CyclicLR - mode=exp_range') 337 | fig.tight_layout() 338 | return fig 339 | 340 | def compare_optimizers(model, loss_fn, optimizers, train_loader, val_loader=None, schedulers=None, layers_to_hook='', n_epochs=50): 341 | from stepbystep.v3 import StepByStep 342 | results = {} 343 | model_state = deepcopy(model).state_dict() 344 | 345 | for desc, opt in optimizers.items(): 346 | model.load_state_dict(model_state) 347 | 348 | optimizer = opt['class'](model.parameters(), **opt['parms']) 349 | 350 | sbs = StepByStep(model, loss_fn, optimizer) 351 | sbs.set_loaders(train_loader, val_loader) 352 | 353 | try: 354 | if schedulers is not None: 355 | sched = schedulers[desc] 356 | scheduler = sched['class'](optimizer, **sched['parms']) 357 | sbs.set_lr_scheduler(scheduler) 358 | except KeyError: 359 | pass 360 | 361 | sbs.capture_parameters(layers_to_hook) 362 | sbs.capture_gradients(layers_to_hook) 363 | sbs.train(n_epochs) 364 | sbs.remove_hooks() 365 | 366 | parms = deepcopy(sbs._parameters) 367 | grads = deepcopy(sbs._gradients) 368 | 369 | lrs = sbs.learning_rates[:] 370 | if not len(lrs): 371 | lrs = [list(map(lambda p: p['lr'], optimizer.state_dict()['param_groups']))] * n_epochs 372 | 373 | results.update({desc: {'parms': parms, 374 | 'grads': grads, 375 | 'losses': np.array(sbs.losses), 376 | 'val_losses': np.array(sbs.val_losses), 377 | 'state': optimizer.state_dict(), 378 | 'lrs': lrs}}) 379 | 380 | return results 381 | 382 | def figure28(results, b, w, bs, ws, all_losses): 383 | axs = [] 384 | fig = plt.figure(figsize=(15, 12)) 385 | for i in range(3): 386 | axs.append(plt.subplot2grid((5, 3), (0, i), rowspan=2)) 387 | for i in range(3): 388 | axs.append(plt.subplot2grid((5, 3), (3, i), rowspan=2)) 389 | for i in range(3): 390 | axs.append(plt.subplot2grid((5, 3), (2, i))) 391 | 392 | lrs = [results[k]['lrs'] for k in ['SGD + Momentum', 'SGD + Momentum + Step', 'SGD + Momentum + Cycle']] 393 | for ax, l, title in zip(axs[6:], lrs, ['No Scheduler', 'StepLR', 'CyclicLR']): 394 | ax.plot(l) 395 | ax.set_title(title) 396 | if title == 'CyclicLR': 397 | ax.set_xlabel('Mini-batches') 398 | else: 399 | ax.set_xlabel('Epochs') 400 | ax.set_ylabel('Learning Rate') 401 | ax.set_ylim([0.0, .11]) 402 | 403 | fig = plot_paths(results, b, w, bs, ws, all_losses, axs=axs[:6]) 404 | for ax in axs[:6]: 405 | ax.set_xlabel('Bias') 406 | fig.tight_layout() 407 | -------------------------------------------------------------------------------- /plots/chapter7.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | def figure1(): 5 | data = {'AlexNet': (61, .727, 41.8), 6 | 'ResNet-18': (12, 2, 30.24 ), 7 | 'ResNet-34': (22, 4, 26.7), 8 | 'ResNet-50': (26, 4, 24.6), 9 | 'ResNet-101': (45, 8, 23.4), 10 | 'ResNet-152': (60, 11, 23), 11 | 'VGG-16': (138, 16, 28.5), 12 | 'VGG-19': (144, 20, 28.7), 13 | 'Inception-V3': (27, 6, 22.5), 14 | 'GoogLeNet': (13, 2, 34.2),} 15 | 16 | names = list(data.keys()) 17 | stats = np.array(list(data.values())) 18 | xoff = [0, 0, 0, -.5, 0, 0, 0, 0, -.7, 0] 19 | yoff = [1.5, 0, -5., .5, 1.3, 1.5, 3.5, 3.5, .6, 0] 20 | 21 | fig, ax = plt.subplots(1, 1, figsize=(10, 6)) 22 | ax.scatter(stats[:, 1], 100-stats[:, 2], s=50*stats[:, 0], c=np.arange(12,2,-1), cmap=plt.cm.jet) 23 | ax.scatter(stats[:, 1], 100-stats[:, 2], c='w', s=4) 24 | for i, txt in enumerate(names): 25 | ax.annotate(txt, (stats[i, 1]-.65+xoff[i], 100-stats[i, 2]+1.7+yoff[i]), fontsize=12) 26 | ax.set_xlim([0, 22]) 27 | ax.set_ylim([50, 85]) 28 | ax.set_xlabel('Number of Operations - GFLOPS') 29 | ax.set_ylabel('Top-1 Accuracy (%)') 30 | ax.set_title('Comparing Architectures') 31 | return fig 32 | 33 | def compare_grayscale(converted, grayscale): 34 | fig, axs = plt.subplots(1, 2, figsize=(8, 4)) 35 | for img, ax, title in zip([converted, grayscale], axs, ['Converted', 'Grayscale']): 36 | ax.imshow(img, cmap=plt.cm.gray) 37 | ax.grid(False) 38 | ax.set_title(title) 39 | ax.set_xticks([]) 40 | ax.set_yticks([]) 41 | fig.tight_layout() 42 | return fig 43 | 44 | def before_batchnorm(batch): 45 | fig, axs = plt.subplots(1, 2, figsize=(12, 4)) 46 | for i in range(2): 47 | feature = batch[0][:, i] 48 | axs[i].hist(feature, bins=np.linspace(-3, 3, 15), alpha=.5) 49 | axs[i].set_xlabel(f'Feature #{i}') 50 | axs[i].set_ylabel('# of points') 51 | axs[i].set_title(f'mean={feature.mean():.4f} var={feature.var():.4f}') 52 | axs[i].set_ylim([0, 13]) 53 | axs[i].label_outer() 54 | fig.tight_layout() 55 | return fig 56 | 57 | def after_batchnorm(batch, normalized): 58 | fig, axs = plt.subplots(1, 2, figsize=(12, 4)) 59 | for i in range(2): 60 | feature = batch[0][:, i] 61 | normed = normalized[:, i] 62 | axs[i].hist(feature, bins=np.linspace(-3, 3, 15), alpha=.5, label='Original') 63 | axs[i].hist(normed, bins=np.linspace(-3, 3, 15), alpha=.5, label='Standardized') 64 | axs[i].set_xlabel(f'Feature #{i}') 65 | axs[i].set_ylabel('# of points') 66 | axs[i].set_title(f'mean={normed.mean():.4f} std={normed.std(unbiased=False):.4f}') 67 | axs[i].legend() 68 | axs[i].set_ylim([0, 13]) 69 | axs[i].label_outer() 70 | fig.tight_layout() 71 | return fig 72 | 73 | def compare_skip(image, noskip_image, skip_image): 74 | fig, axs = plt.subplots(1, 3, figsize=(12, 4)) 75 | for img, ax, title in zip([image, noskip_image, skip_image], axs, ['Original', 'No Skip', 'Skip']): 76 | ax.imshow(img, cmap=plt.cm.gray) 77 | ax.grid(False) 78 | ax.set_title(title) 79 | ax.set_xticks([]) 80 | ax.set_yticks([]) 81 | fig.tight_layout() 82 | return fig 83 | -------------------------------------------------------------------------------- /plots/chapterextra.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.optim as optim 6 | from collections import namedtuple 7 | from matplotlib import animation 8 | from matplotlib import pyplot as plt 9 | import seaborn as sns 10 | import matplotlib 11 | matplotlib.rcParams['animation.writer'] = 'ffmpeg' 12 | 13 | class Basic(object): 14 | """Basic plot class, NOT to be instantiated directly. 15 | """ 16 | def __init__(self, ax): 17 | self._title = '' 18 | self._custom_title = '' 19 | self.n_epochs = 0 20 | 21 | self.ax = ax 22 | self.ax.clear() 23 | self.fig = ax.get_figure() 24 | 25 | @property 26 | def title(self): 27 | title = self._title 28 | if not isinstance(title, tuple): 29 | title = (self._title,) 30 | title = tuple([' '.join([self._custom_title, t]) for t in title]) 31 | return title 32 | 33 | @property 34 | def axes(self): 35 | return (self.ax,) 36 | 37 | def load_data(self, **kwargs): 38 | self._prepare_plot() 39 | return self 40 | 41 | def _prepare_plot(self): 42 | pass 43 | 44 | @staticmethod 45 | def _update(i, object, epoch_start=0): 46 | pass 47 | 48 | def set_title(self, title): 49 | """Prepends a custom title to the plot. 50 | Parameters 51 | ---------- 52 | title: String 53 | Custom title to prepend. 54 | Returns 55 | ------- 56 | None 57 | """ 58 | self._custom_title = title 59 | 60 | def plot(self, epoch): 61 | """Plots data at a given epoch. 62 | Parameters 63 | ---------- 64 | epoch: int 65 | Epoch to use for the plotting. 66 | Returns 67 | ------- 68 | fig: figure 69 | Figure containing the plot. 70 | """ 71 | self.__class__._update(epoch, self) 72 | self.fig.tight_layout() 73 | return self.fig 74 | 75 | def animate(self, epoch_start=0, epoch_end=-1): 76 | """Animates plotted data from `epoch_start` to `epoch_end`. 77 | Parameters 78 | ---------- 79 | epoch_start: int, optional 80 | Epoch to start the animation from. 81 | epoch_end: int, optional 82 | Epoch to end the animation. 83 | Returns 84 | ------- 85 | anim: FuncAnimation 86 | Animation function for the data. 87 | """ 88 | if epoch_end == -1: 89 | epoch_end = self.n_epochs 90 | 91 | anim = animation.FuncAnimation(self.fig, self.__class__._update, 92 | fargs=(self, epoch_start), 93 | frames=(epoch_end - epoch_start), 94 | blit=True) 95 | return anim 96 | 97 | class LayerViolins(Basic): 98 | def __init__(self, ax, title=None): 99 | super(LayerViolins, self).__init__(ax) 100 | self.values = None 101 | self.names = None 102 | self._title = title 103 | 104 | def load_data(self, layer_violins_data): 105 | self.values = layer_violins_data.values 106 | self.names = layer_violins_data.names 107 | self.palette = dict(zip(self.names, sns.palettes.husl_palette(len(self.names), .7))) 108 | self.n_epochs = len(self.values) 109 | self._prepare_plot() 110 | self._update(0, self) 111 | return self 112 | 113 | def _prepare_plot(self): 114 | self.line = self.ax.plot([], []) 115 | 116 | @staticmethod 117 | def _update(i, lv, epoch_start=0): 118 | assert len(lv.names) == len(lv.values[i]), "Layer names and values have different lengths!" 119 | epoch = i + epoch_start 120 | 121 | df = pd.concat([pd.DataFrame(layer_values.ravel(), 122 | columns=[layer_name]).melt(var_name='layers', value_name='values') 123 | for layer_name, layer_values in zip(lv.names, lv.values[i])]) 124 | 125 | lv.ax.clear() 126 | sns.violinplot(data=df, x='layers', y='values', ax=lv.ax, cut=0, palette=lv.palette, density_norm='width', linewidth=1.5, hue='layers') 127 | lv.ax.set_xticklabels(df.layers.unique()) 128 | lv.ax.set_xlabel('Layers') 129 | if lv._title is not None: 130 | lv.ax.set_ylabel(lv._title) 131 | lv.ax.set_ylim([df['values'].min(), df['values'].max()]) 132 | lv.ax.set_title('{} - Epoch: {}'.format(lv.title[0], epoch)) 133 | 134 | return lv.line 135 | 136 | LayerViolinsData = namedtuple('LayerViolinsData', ['names', 'values']) 137 | 138 | def build_model(input_dim, n_layers, units, activation, use_bn=False): 139 | if isinstance(units, list): 140 | assert len(units) == n_layers 141 | else: 142 | units = [units] * n_layers 143 | 144 | model = nn.Sequential() 145 | # Adds first hidden layer with input_dim parameter 146 | model.add_module('h1', nn.Linear(input_dim, units[0], bias=not use_bn)) 147 | model.add_module('a1', activation()) 148 | if use_bn: 149 | model.add_module('bn1', nn.BatchNorm1d(units[0], affine=False)) 150 | 151 | # Adds remaining hidden layers 152 | for i in range(2, n_layers + 1): 153 | model.add_module('h{}'.format(i), nn.Linear(units[i-2], units[i-1], bias=not use_bn)) 154 | model.add_module('a{}'.format(i), activation()) 155 | if use_bn: 156 | model.add_module('bn{}'.format(i), nn.BatchNorm1d(units[i-1], affine=False)) 157 | 158 | # Adds output layer 159 | model.add_module('o', nn.Linear(units[n_layers-1], 1)) 160 | return model 161 | 162 | def get_plot_data(train_loader, n_layers=5, hidden_units=100, activation_fn=None, use_bn=False, before=True, model=None): 163 | import sys 164 | sys.path.append('..') 165 | from stepbystep.v3 import StepByStep 166 | 167 | if model is None: 168 | n_features = train_loader.dataset.tensors[0].shape[1] 169 | if activation_fn is None: 170 | activation_fn = nn.ReLU 171 | model = build_model(n_layers, n_features, hidden_units, activation_fn, use_bn, before) 172 | 173 | loss_fn = nn.BCEWithLogitsLoss() 174 | optimizer = optim.SGD(model.parameters(), lr=1e-2) 175 | 176 | n_layers = len(list(filter(lambda c: c[0][0] == 'h', model.named_children()))) 177 | 178 | sbs = StepByStep(model, loss_fn, optimizer) 179 | sbs.set_loaders(train_loader) 180 | sbs.capture_parameters([f'h{i}' for i in range(1, n_layers + 1)]) 181 | sbs.capture_gradients([f'h{i}' for i in range(1, n_layers + 1)]) 182 | sbs.attach_hooks([f'a{i}' for i in range(1, n_layers + 1)]) 183 | sbs.train(1) 184 | 185 | names = [f'h{i}' for i in range(1, n_layers + 1)] 186 | 187 | parameters = [[np.array(sbs._parameters[f'h{i}']['weight']).reshape(-1,) for i in range(1, n_layers + 1)]] 188 | parms_data = LayerViolinsData(names=names, values=parameters) 189 | 190 | gradients = [[np.array(sbs._gradients[f'h{i}']['weight']).reshape(-1,) for i in range(1, n_layers + 1)]] 191 | gradients_data = LayerViolinsData(names=names, values=gradients) 192 | 193 | activations = [[np.array(sbs.visualization[f'a{i}']).reshape(-1,) for i in range(1, n_layers + 1)]] 194 | activations_data = LayerViolinsData(names=names, values=activations) 195 | 196 | return parms_data, gradients_data, activations_data 197 | 198 | def plot_violins(parms, gradients, activations): 199 | fig, axs = plt.subplots(1, 3, figsize=(15, 5)) 200 | titles = ['Weights', 'Activations', 'Gradients'] 201 | parms_plot = LayerViolins(axs[0], 'Weights').load_data(parms) 202 | act_plot = LayerViolins(axs[1], 'Activations').load_data(activations) 203 | grad_plot = LayerViolins(axs[2], 'Gradients').load_data(gradients) 204 | axs[0].set_ylim(np.array(axs[0].axes.get_ylim()) * 1.1) 205 | axs[1].set_ylim(np.array(axs[1].axes.get_ylim()) + np.array([-.2, .2])) 206 | for i in range(3): axs[i].set_title(titles[i]) 207 | fig.tight_layout() 208 | return fig 209 | 210 | def make_init_fn(config): 211 | def weights_init(m): 212 | for c in config.keys(): 213 | if isinstance(m, c): 214 | try: 215 | weight_init_fn = config[c]['w'] 216 | weight_init_fn(m.weight) 217 | except KeyError: 218 | pass 219 | 220 | if m.bias is not None: 221 | try: 222 | bias_init_fn = config[c]['b'] 223 | bias_init_fn(m.bias) 224 | except KeyError: 225 | pass 226 | return weights_init 227 | 228 | def plot_schemes(n_features, n_layers, hidden_units, loader): 229 | fig, axs = plt.subplots(2, 3, figsize=(15, 5)) 230 | act_fns = [nn.Sigmoid, nn.Tanh, nn.ReLU] 231 | winits = [lambda m: nn.init.normal_(m, mean=0.0, std=0.1), 232 | lambda m: nn.init.xavier_uniform_(m), 233 | lambda m: nn.init.kaiming_uniform_(m, nonlinearity='relu')] 234 | 235 | for i in range(3): 236 | model = build_model(n_features, n_layers, hidden_units, act_fns[i], use_bn=False) 237 | 238 | torch.manual_seed(13) 239 | weights_init = make_init_fn({nn.Linear: {'w': winits[i], 'b': nn.init.zeros_}}) 240 | with torch.no_grad(): 241 | model.apply(weights_init) 242 | 243 | parms, gradients, activations = get_plot_data(loader, model=model) 244 | act_plot = LayerViolins(axs[0, i], 'Activations').load_data(activations) 245 | grad_plot = LayerViolins(axs[1, i], 'Gradients').load_data(gradients) 246 | 247 | names = [r'$Sigmoid + N(0,\sigma=0.1)$', r'$Tanh + Xavier$', r'$ReLU + Kaiming$'] 248 | for j in range(2): 249 | ylims = [] 250 | for i in range(3): 251 | ylims.append(np.array(axs[j, i].axes.get_ylim())) 252 | axs[0, i].set_title(names[i]) 253 | axs[1, i].set_title('') 254 | axs[j, i].label_outer() 255 | for i in range(3): 256 | axs[j, i].set_ylim([1.1 * np.array(ylims).min(), 1.1 * np.array(ylims).max()]) 257 | 258 | for i in range(3): 259 | axs[0, i].set_ylim([-1.1, 8]) 260 | axs[1, i].set_ylim([-0.05, 0.05]) 261 | 262 | fig.tight_layout() 263 | return fig 264 | 265 | def plot_scheme_bn(n_features, n_layers, hidden_units, loader): 266 | fig, axs = plt.subplots(2, 3, figsize=(15, 5)) 267 | 268 | winits = [lambda m: nn.init.normal_(m, mean=0.0, std=0.1), 269 | lambda m: nn.init.kaiming_uniform_(m, nonlinearity='relu'), 270 | lambda m: nn.init.normal_(m, mean=0.0, std=0.1),] 271 | 272 | for i in range(3): 273 | model = build_model(n_features, n_layers, hidden_units, nn.ReLU, use_bn=(i==2)) 274 | 275 | torch.manual_seed(13) 276 | weights_init = make_init_fn({nn.Linear: {'w': winits[i], 'b': nn.init.zeros_}}) 277 | with torch.no_grad(): 278 | model.apply(weights_init) 279 | 280 | parms, gradients, activations = get_plot_data(loader, model=model) 281 | act_plot = LayerViolins(axs[0, i], 'Activations').load_data(activations) 282 | grad_plot = LayerViolins(axs[1, i], 'Gradients').load_data(gradients) 283 | 284 | names = [r'$ReLU + N(0,\sigma=0.1)$', r'$ReLU + Kaiming$', r'$ReLU + N(0,\sigma=0.1) + BN$'] 285 | for j in range(2): 286 | ylims = [] 287 | for i in range(3): 288 | ylims.append(np.array(axs[j, i].axes.get_ylim())) 289 | axs[0, i].set_title(names[i]) 290 | axs[1, i].set_title('') 291 | axs[j, i].label_outer() 292 | for i in range(3): 293 | axs[j, i].set_ylim([1.1 * np.array(ylims).min(), 1.1 * np.array(ylims).max()]) 294 | 295 | for i in range(3): 296 | axs[0, i].set_ylim([-0.5, 8]) 297 | axs[1, i].set_ylim([-0.05, 0.05]) 298 | 299 | fig.tight_layout() 300 | return fig 301 | 302 | def distributions(X_reg, y_reg): 303 | fig, axs = plt.subplots(1, 2, figsize=(10, 4)) 304 | axs[0].hist(X_reg.view(-1,).numpy()) 305 | axs[0].set_xlabel('Feature Values') 306 | axs[0].set_ylabel('Count') 307 | axs[0].set_title('Distribution of X') 308 | axs[1].hist(y_reg.view(-1,).numpy()) 309 | axs[1].set_xlabel('Target Values') 310 | axs[1].set_ylabel('Count') 311 | axs[1].set_title('Distribution of y') 312 | fig.tight_layout() 313 | return fig 314 | 315 | # https://stackoverflow.com/questions/34017866/arrow-on-a-line-plot-with-matplotlib 316 | def add_arrow(line, position=None, direction='right', size=15, color=None, lw=2, alpha=1.0, text=None, text_offset=(0 , 0)): 317 | """ 318 | add an arrow to a line. 319 | 320 | line: Line2D object 321 | position: x-position of the arrow. If None, mean of xdata is taken 322 | direction: 'left' or 'right' 323 | size: size of the arrow in fontsize points 324 | color: if None, line color is taken. 325 | """ 326 | if color is None: 327 | color = line.get_color() 328 | 329 | xdata = line.get_xdata() 330 | ydata = line.get_ydata() 331 | 332 | if position is None: 333 | position = xdata.mean() 334 | # find closest index 335 | start_ind = np.argmin(np.absolute(xdata - position)) 336 | if direction == 'right': 337 | end_ind = start_ind + 1 338 | else: 339 | end_ind = start_ind - 1 340 | 341 | line.axes.annotate('', 342 | xytext=(xdata[start_ind], ydata[start_ind]), 343 | xy=(xdata[end_ind], ydata[end_ind]), 344 | arrowprops=dict(arrowstyle="->", color=color, lw=lw, linestyle='--' if alpha < 1 else '-', alpha=alpha), 345 | size=size, 346 | ) 347 | if text is not None: 348 | line.axes.annotate(text, color=color, 349 | xytext=(xdata[end_ind] + text_offset[0], ydata[end_ind] + text_offset[1]), 350 | xy=(xdata[end_ind], ydata[end_ind]), 351 | size=size, 352 | ) 353 | 354 | def make_line(ax, point): 355 | point = np.vstack([[0., 0.], np.array(point.squeeze().tolist())]) 356 | line = ax.plot(*point.T, lw=0)[0] 357 | return line 358 | 359 | def compare_grads(grads_before, grads_after): 360 | fig, ax = plt.subplots(1, 1, figsize=(5, 3)) 361 | ax.set_xlim([0, 3]) 362 | ax.set_ylim([0, 1.5]) 363 | ax.set_xlabel('Parameter 0') 364 | ax.set_ylabel('Parameter 1') 365 | ax.set_title('Gradients') 366 | add_arrow(make_line(ax, grads_before), lw=2, color='k', text=r'$grad$', 367 | size=12, alpha=1.0, text_offset=(-.13, .03)) 368 | add_arrow(make_line(ax, grads_after), lw=2, color='r', text=r'$clipped\ grad$', 369 | size=12, alpha=1.0, text_offset=(-.33, .03)) 370 | fig.tight_layout() 371 | return fig 372 | 373 | def gradient_distrib(sbs1, layer1, sbs2, layer2): 374 | fig, axs = plt.subplots(1, 2, figsize=(10, 4)) 375 | axs[0].hist(np.array(sbs1._gradients[layer1]['weight']).reshape(-1,), bins=np.linspace(-10, 10, 41)) 376 | axs[0].set_ylim([0, 4000]) 377 | axs[0].set_xlabel('Gradients') 378 | axs[0].set_ylabel('# Updates') 379 | axs[0].set_title('Using clip_grad_value_') 380 | axs[1].hist(np.array(sbs2._gradients[layer2]['weight']).reshape(-1,), bins=np.linspace(-10, 10, 41)) 381 | axs[1].set_ylim([0, 4000]) 382 | axs[1].set_xlabel('Gradients') 383 | axs[1].label_outer() 384 | axs[1].set_title('Using hooks') 385 | fig.tight_layout() 386 | return fig 387 | -------------------------------------------------------------------------------- /plots/replay.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.ticker as ticker 3 | from matplotlib import pyplot as plt 4 | from collections import namedtuple 5 | from copy import deepcopy 6 | from operator import itemgetter 7 | import torch.nn as nn 8 | import torch 9 | 10 | def build_2d_grid(xlim, ylim, n_lines=11, n_points=1000): 11 | """Returns a 2D grid of boundaries given by `xlim` and `ylim`, 12 | composed of `n_lines` evenly spaced lines of `n_points` each. 13 | Parameters 14 | ---------- 15 | xlim : tuple of 2 ints 16 | Boundaries for the X axis of the grid. 17 | ylim : tuple of 2 ints 18 | Boundaries for the Y axis of the grid. 19 | n_lines : int, optional 20 | Number of grid lines. Default is 11. 21 | If n_lines equals n_points, the grid can be used as 22 | coordinates for the surface of a contourplot. 23 | n_points: int, optional 24 | Number of points in each grid line. Default is 1,000. 25 | Returns 26 | ------- 27 | lines : ndarray 28 | For the cases where n_lines is less than n_points, it 29 | returns an array of shape (2 * n_lines, n_points, 2) 30 | containing both vertical and horizontal lines of the grid. 31 | If n_lines equals n_points, it returns an array of shape 32 | (n_points, n_points, 2), containing all evenly spaced 33 | points inside the grid boundaries. 34 | """ 35 | xs = np.linspace(*xlim, num=n_lines) 36 | ys = np.linspace(*ylim, num=n_points) 37 | x0, y0 = np.meshgrid(xs, ys) 38 | lines_x0 = np.atleast_3d(x0.transpose()) 39 | lines_y0 = np.atleast_3d(y0.transpose()) 40 | 41 | xs = np.linspace(*xlim, num=n_points) 42 | ys = np.linspace(*ylim, num=n_lines) 43 | x1, y1 = np.meshgrid(xs, ys) 44 | lines_x1 = np.atleast_3d(x1) 45 | lines_y1 = np.atleast_3d(y1) 46 | 47 | vertical_lines = np.concatenate([lines_x0, lines_y0], axis=2) 48 | horizontal_lines = np.concatenate([lines_x1, lines_y1], axis=2) 49 | 50 | if n_lines != n_points: 51 | lines = np.concatenate([vertical_lines, horizontal_lines], axis=0) 52 | else: 53 | lines = vertical_lines 54 | 55 | return lines 56 | 57 | FeatureSpaceData = namedtuple('FeatureSpaceData', ['line', 'bent_line', 'prediction', 'target']) 58 | FeatureSpaceLines = namedtuple('FeatureSpaceLines', ['grid', 'input', 'contour']) 59 | 60 | class Basic(object): 61 | """Basic plot class, NOT to be instantiated directly. 62 | """ 63 | def __init__(self, ax): 64 | self._title = '' 65 | self._custom_title = '' 66 | self.n_epochs = 0 67 | 68 | self.ax = ax 69 | self.ax.clear() 70 | self.fig = ax.get_figure() 71 | 72 | @property 73 | def title(self): 74 | title = self._title 75 | if not isinstance(title, tuple): 76 | title = (self._title,) 77 | title = tuple([' '.join([self._custom_title, t]) for t in title]) 78 | return title 79 | 80 | @property 81 | def axes(self): 82 | return (self.ax,) 83 | 84 | def load_data(self, **kwargs): 85 | self._prepare_plot() 86 | return self 87 | 88 | def _prepare_plot(self): 89 | pass 90 | 91 | @staticmethod 92 | def _update(i, object, epoch_start=0): 93 | pass 94 | 95 | def set_title(self, title): 96 | """Prepends a custom title to the plot. 97 | Parameters 98 | ---------- 99 | title: String 100 | Custom title to prepend. 101 | Returns 102 | ------- 103 | None 104 | """ 105 | self._custom_title = title 106 | 107 | def plot(self, epoch): 108 | """Plots data at a given epoch. 109 | Parameters 110 | ---------- 111 | epoch: int 112 | Epoch to use for the plotting. 113 | Returns 114 | ------- 115 | fig: figure 116 | Figure containing the plot. 117 | """ 118 | self.__class__._update(epoch, self) 119 | self.fig.tight_layout() 120 | return self.fig 121 | 122 | def animate(self, epoch_start=0, epoch_end=-1): 123 | """Animates plotted data from `epoch_start` to `epoch_end`. 124 | Parameters 125 | ---------- 126 | epoch_start: int, optional 127 | Epoch to start the animation from. 128 | epoch_end: int, optional 129 | Epoch to end the animation. 130 | Returns 131 | ------- 132 | anim: FuncAnimation 133 | Animation function for the data. 134 | """ 135 | if epoch_end == -1: 136 | epoch_end = self.n_epochs 137 | 138 | anim = animation.FuncAnimation(self.fig, self.__class__._update, 139 | fargs=(self, epoch_start), 140 | frames=(epoch_end - epoch_start), 141 | blit=True) 142 | return anim 143 | 144 | class FeatureSpace(Basic): 145 | """Creates an instance of a FeatureSpace object to make plots 146 | and animations. 147 | Parameters 148 | ---------- 149 | ax: AxesSubplot 150 | Subplot of a Matplotlib figure. 151 | scaled_fixed: boolean, optional 152 | If True, axis scales are fixed to the maximum from beginning. 153 | Default is True. 154 | """ 155 | def __init__(self, ax, scale_fixed=True, boundary=True, cmap=None, alpha=1.0): 156 | super(FeatureSpace, self).__init__(ax) 157 | self.ax.grid(False) 158 | self.scale_fixed = scale_fixed 159 | self.boundary = boundary 160 | self.contour = None 161 | self.bent_inputs = None 162 | self.bent_lines = None 163 | self.bent_contour_lines = None 164 | self.grid_lines = None 165 | self.contour_lines = None 166 | self.predictions = None 167 | self.targets = None 168 | 169 | if cmap is None: 170 | cmap = plt.cm.RdBu 171 | self.cmap = cmap 172 | self.alpha = alpha 173 | 174 | self.n_inputs = 0 175 | 176 | self.lines = [] 177 | self.points = [] 178 | 179 | def load_data(self, feature_space_data): 180 | """ Loads feature space data as computed in Replay class. 181 | Parameters 182 | ---------- 183 | feature_space_data: FeatureSpaceData 184 | Namedtuple containing information about original grid 185 | lines, data points and predictions. 186 | Returns 187 | ------- 188 | self: FeatureSpace 189 | Returns the FeatureSpace instance itself. 190 | """ 191 | self.predictions = feature_space_data.prediction 192 | self.targets = feature_space_data.target 193 | self.grid_lines, self.inputs, self.contour_lines = feature_space_data.line 194 | self.bent_lines, self.bent_inputs, self.bent_contour_lines = feature_space_data.bent_line 195 | 196 | self.n_epochs = self.bent_inputs.shape[0] 197 | self.n_inputs = self.bent_inputs.shape[-1] 198 | 199 | self.classes = np.unique(self.targets) 200 | self.bent_inputs = [self.bent_inputs[:, self.targets == target, :] for target in self.classes] 201 | 202 | self._prepare_plot() 203 | return self 204 | 205 | def _prepare_plot(self): 206 | if self.scale_fixed: 207 | xlim = [self.bent_contour_lines[:, :, :, 0].min() - .05, self.bent_contour_lines[:, :, :, 0].max() + .05] 208 | ylim = [self.bent_contour_lines[:, :, :, 1].min() - .05, self.bent_contour_lines[:, :, :, 1].max() + .05] 209 | self.ax.set_xlim(xlim) 210 | self.ax.set_ylim(ylim) 211 | 212 | self.ax.set_xlabel(r"$x_0$", fontsize=12) 213 | self.ax.set_ylabel(r"$x_1$", fontsize=12, rotation=0) 214 | 215 | self.lines = [] 216 | self.points = [] 217 | for c in range(self.grid_lines.shape[0]): 218 | line, = self.ax.plot([], [], linewidth=0.5, color='k') 219 | self.lines.append(line) 220 | for c in range(len(self.classes)): 221 | point = self.ax.scatter([], []) 222 | self.points.append(point) 223 | 224 | contour_x = self.bent_contour_lines[0, :, :, 0] 225 | contour_y = self.bent_contour_lines[0, :, :, 1] 226 | 227 | if self.boundary: 228 | self.contour = self.ax.contourf(contour_x, contour_y, np.zeros(shape=(len(contour_x), len(contour_y))), 229 | cmap=plt.cm.brg, alpha=self.alpha, levels=np.linspace(0, 1, 8)) 230 | 231 | @staticmethod 232 | def _update(i, fs, epoch_start=0, colors=None, **kwargs): 233 | epoch = i + epoch_start 234 | fs.ax.set_title('Epoch: {}'.format(epoch)) 235 | if not fs.scale_fixed: 236 | xlim = [fs.bent_contour_lines[epoch, :, :, 0].min() - .05, fs.bent_contour_lines[epoch, :, :, 0].max() + .05] 237 | ylim = [fs.bent_contour_lines[epoch, :, :, 1].min() - .05, fs.bent_contour_lines[epoch, :, :, 1].max() + .05] 238 | fs.ax.set_xlim(xlim) 239 | fs.ax.set_ylim(ylim) 240 | 241 | if len(fs.lines): 242 | line_coords = fs.bent_lines[epoch].transpose() 243 | 244 | for c, line in enumerate(fs.lines): 245 | line.set_data(*line_coords[:, :, c]) 246 | 247 | if colors is None: 248 | colors = ['r', 'b'] 249 | 250 | if 's' not in kwargs.keys(): 251 | kwargs.update({'s': 10}) 252 | 253 | if 'marker' not in kwargs.keys(): 254 | kwargs.update({'marker': 'o'}) 255 | 256 | input_coords = [coord[epoch].transpose() for coord in fs.bent_inputs] 257 | for c in range(len(fs.points)): 258 | fs.points[c].remove() 259 | fs.points[c] = fs.ax.scatter(*input_coords[c], color=colors[int(fs.classes[c])], **kwargs) 260 | 261 | if fs.boundary: 262 | for c in fs.contour.collections: 263 | c.remove() # removes only the contours, leaves the rest intact 264 | 265 | fs.contour = fs.ax.contourf(fs.bent_contour_lines[epoch, :, :, 0], 266 | fs.bent_contour_lines[epoch, :, :, 1], 267 | fs.predictions[epoch].squeeze(), 268 | cmap=fs.cmap, alpha=fs.alpha, levels=np.linspace(0, 1, 8)) 269 | 270 | fs.ax.xaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f')) 271 | fs.ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f')) 272 | fs.ax.locator_params(tight=True, nbins=7) 273 | 274 | #for tick in fs.ax.xaxis.get_major_ticks(): 275 | # tick.label.set_fontsize(10) 276 | #for tick in fs.ax.yaxis.get_major_ticks(): 277 | # tick.label.set_fontsize(10) 278 | fs.ax.yaxis.set_label_coords(-0.15,0.5) 279 | 280 | return fs.lines 281 | 282 | 283 | def build_feature_space(model, states, X, y, layer_name=None, contour_points=1000, xlim=(-1, 1), ylim=(-1, 1), 284 | display_grid=True, epoch_start=0, epoch_end=-1): 285 | """Builds a FeatureSpace object to be used for plotting and 286 | animating. 287 | The underlying data, that is, grid lines, inputs and contour 288 | lines, before and after the transformations, as well as the 289 | corresponding predictions for the contour lines, can be 290 | later accessed as the second element of the `feature_space` 291 | property. 292 | Only layers with 2 hidden units are supported! 293 | Parameters 294 | ---------- 295 | ax: AxesSubplot 296 | Subplot of a Matplotlib figure. 297 | layer_name: String 298 | Layer to be used for building the space. 299 | contour_points: int, optional 300 | Number of points in each axis of the contour. 301 | Default is 1,000. 302 | xlim: tuple of ints, optional 303 | Boundaries for the X axis of the grid. 304 | ylim: tuple of ints, optional 305 | Boundaries for the Y axis of the grid. 306 | scaled_fixed: boolean, optional 307 | If True, axis scales are fixed to the maximum from beginning. 308 | Default is True. 309 | display_grid: boolean, optional 310 | If True, display grid lines (for 2-dimensional inputs). 311 | Default is True. 312 | epoch_start: int, optional 313 | First epoch to consider. 314 | epoch_end: int, optional 315 | Last epoch to consider. 316 | Returns 317 | ------- 318 | feature_space_plot: FeatureSpace 319 | An instance of a FeatureSpace object to make plots and 320 | animations. 321 | """ 322 | layers = list(model.named_modules()) 323 | last_layer_name, last_layer_class = layers[-1] 324 | is_logit = not isinstance(last_layer_class, nn.Sigmoid) 325 | if is_logit: 326 | activation_idx = -2 327 | func = lambda x: 1 / (1 + np.exp(-x)) 328 | else: 329 | activation_idx = -3 330 | func = lambda x: x 331 | 332 | names = np.array(list(map(itemgetter(0), layers))) 333 | matches = names == layer_name 334 | 335 | if np.any(matches): 336 | activation_idx = np.argmax(matches) 337 | else: 338 | raise AttributeError("No layer named {}".format(layer_name)) 339 | if layer_name is None: 340 | layer_name = layers[activation_idx][0] 341 | 342 | try: 343 | final_dims = layers[activation_idx][1].out_features 344 | except: 345 | try: 346 | final_dims = layers[activation_idx + 1][1].in_features 347 | except: 348 | final_dims = layers[activation_idx - 1][1].out_features 349 | 350 | assert final_dims == 2, 'Only layers with 2-dimensional outputs are supported!' 351 | 352 | y_ind = np.atleast_1d(y.squeeze().argsort()) 353 | X = np.atleast_2d(X.squeeze())[y_ind].reshape(X.shape) 354 | y = np.atleast_1d(y.squeeze())[y_ind] 355 | 356 | if epoch_end == -1: 357 | epoch_end = len(states)-1 358 | epoch_end = min(epoch_end, len(states)-1) 359 | 360 | #input_dims = self.model.input_shape[-1] 361 | input_dims = X.shape[-1] 362 | n_classes = len(np.unique(y)) 363 | 364 | # Builds a 2D grid and the corresponding contour coordinates 365 | grid_lines = np.array([]) 366 | contour_lines = np.array([]) 367 | if input_dims == 2 and display_grid: 368 | grid_lines = build_2d_grid(xlim, ylim) 369 | contour_lines = build_2d_grid(xlim, ylim, contour_points, contour_points) 370 | 371 | # Initializes "bent" variables, that is, the results of the transformations 372 | bent_lines = [] 373 | bent_inputs = [] 374 | bent_contour_lines = [] 375 | bent_preds = [] 376 | 377 | # For each epoch, uses the corresponding weights 378 | for epoch in range(epoch_start, epoch_end + 1): 379 | X_values = get_values_for_epoch(model, states, epoch, X) 380 | bent_inputs.append(X_values[layer_name]) 381 | # Transforms the inputs 382 | #inputs = [TEST_MODE, X] + weights 383 | #bent_inputs.append(get_activations(inputs=inputs)[0]) 384 | 385 | if input_dims == 2 and display_grid: 386 | # Transforms the grid lines 387 | grid_values = get_values_for_epoch(model, states, epoch, grid_lines.reshape(-1, 2)) 388 | #inputs = [TEST_MODE, grid_lines.reshape(-1, 2)] + weights 389 | output_shape = (grid_lines.shape[:2]) + (-1,) 390 | #bent_lines.append(get_activations(inputs=inputs)[0].reshape(output_shape)) 391 | bent_lines.append(grid_values[layer_name].reshape(output_shape)) 392 | 393 | contour_values = get_values_for_epoch(model, states, epoch, contour_lines.reshape(-1, 2)) 394 | #inputs = [TEST_MODE, contour_lines.reshape(-1, 2)] + weights 395 | output_shape = (contour_lines.shape[:2]) + (-1,) 396 | #bent_contour_lines.append(get_activations(inputs=inputs)[0].reshape(output_shape)) 397 | bent_contour_lines.append(contour_values[layer_name].reshape(output_shape)) 398 | # Makes predictions for each point in the contour surface 399 | #bent_preds.append((get_predictions(inputs=inputs)[0].reshape(output_shape) > .5).astype(int)) 400 | bent_preds.append((func(contour_values[last_layer_name]).reshape(output_shape) > .5).astype(int)) 401 | 402 | 403 | bent_inputs = np.array(bent_inputs) 404 | 405 | # Makes lists into ndarrays and wrap them as namedtuples 406 | bent_lines = np.array(bent_lines) 407 | bent_contour_lines = np.array(bent_contour_lines) 408 | bent_preds = np.array(bent_preds) 409 | 410 | line_data = FeatureSpaceLines(grid=grid_lines, input=X, contour=contour_lines) 411 | bent_line_data = FeatureSpaceLines(grid=bent_lines, input=bent_inputs, contour=bent_contour_lines) 412 | _feature_space_data = FeatureSpaceData(line=line_data, bent_line=bent_line_data, 413 | prediction=bent_preds, target=y) 414 | 415 | return _feature_space_data 416 | 417 | def build_decision_boundary(model, states, X, y, layer_name=None, contour_points=1000, xlim=(-1, 1), ylim=(-1, 1), display_grid=True, 418 | epoch_start=0, epoch_end=-1): 419 | """Builds a FeatureSpace object to be used for plotting and 420 | animating the raw inputs and the decision boundary. 421 | The underlying data, that is, grid lines, inputs and contour 422 | lines, as well as the corresponding predictions for the 423 | contour lines, can be later accessed as the second element of 424 | the `decision_boundary` property. 425 | Only inputs with 2 dimensions are supported! 426 | Parameters 427 | ---------- 428 | ax: AxesSubplot 429 | Subplot of a Matplotlib figure. 430 | contour_points: int, optional 431 | Number of points in each axis of the contour. 432 | Default is 1,000. 433 | xlim: tuple of ints, optional 434 | Boundaries for the X axis of the grid. 435 | ylim: tuple of ints, optional 436 | Boundaries for the Y axis of the grid. 437 | display_grid: boolean, optional 438 | If True, display grid lines (for 2-dimensional inputs). 439 | Default is True. 440 | epoch_start: int, optional 441 | First epoch to consider. 442 | epoch_end: int, optional 443 | Last epoch to consider. 444 | Returns 445 | ------- 446 | decision_boundary_plot: FeatureSpace 447 | An instance of a FeatureSpace object to make plots and 448 | animations. 449 | """ 450 | layers = list(model.named_modules()) 451 | last_layer_name, last_layer_class = layers[-1] 452 | is_logit = not isinstance(last_layer_class, nn.Sigmoid) 453 | if is_logit: 454 | activation_idx = -2 455 | func = lambda x: 1 / (1 + np.exp(-x)) 456 | else: 457 | activation_idx = -3 458 | func = lambda x: x 459 | 460 | if layer_name is None: 461 | layer_name = layers[activation_idx][0] 462 | else: 463 | matches = np.array(list(map(itemgetter(0), layers))) == layer_name 464 | if np.any(matches): 465 | activation_idx = np.argmax(matches) 466 | else: 467 | raise AttributeError("No layer named {}".format(layer_name)) 468 | 469 | try: 470 | final_dims = layers[activation_idx][1].out_features 471 | except AttributeError: 472 | final_dims = layers[activation_idx + 1][1].in_features 473 | assert final_dims == 2, 'Only layers with 2-dimensional outputs are supported!' 474 | 475 | y_ind = y.squeeze().argsort() 476 | X = X.squeeze()[y_ind].reshape(X.shape) 477 | y = y.squeeze()[y_ind] 478 | 479 | if epoch_end == -1: 480 | epoch_end = len(states)-1 481 | epoch_end = min(epoch_end, len(states)-1) 482 | 483 | #input_dims = self.model.input_shape[-1] 484 | input_dims = X.shape[-1] 485 | n_classes = len(np.unique(y)) 486 | 487 | # Builds a 2D grid and the corresponding contour coordinates 488 | grid_lines = np.array([]) 489 | if display_grid: 490 | grid_lines = build_2d_grid(xlim, ylim) 491 | 492 | contour_lines = build_2d_grid(xlim, ylim, contour_points, contour_points) 493 | 494 | bent_lines = [] 495 | bent_inputs = [] 496 | bent_contour_lines = [] 497 | bent_preds = [] 498 | # For each epoch, uses the corresponding weights 499 | for epoch in range(epoch_start, epoch_end + 1): 500 | bent_lines.append(grid_lines) 501 | bent_inputs.append(X) 502 | bent_contour_lines.append(contour_lines) 503 | 504 | contour_values = get_values_for_epoch(model, states, epoch, contour_lines.reshape(-1, 2)) 505 | output_shape = (contour_lines.shape[:2]) + (-1,) 506 | # Makes predictions for each point in the contour surface 507 | bent_preds.append((func(contour_values[last_layer_name]).reshape(output_shape) > .5).astype(int)) 508 | 509 | # Makes lists into ndarrays and wrap them as namedtuples 510 | bent_inputs = np.array(bent_inputs) 511 | bent_lines = np.array(bent_lines) 512 | bent_contour_lines = np.array(bent_contour_lines) 513 | bent_preds = np.array(bent_preds) 514 | 515 | line_data = FeatureSpaceLines(grid=grid_lines, input=X, contour=contour_lines) 516 | bent_line_data = FeatureSpaceLines(grid=bent_lines, input=bent_inputs, contour=bent_contour_lines) 517 | _decision_boundary_data = FeatureSpaceData(line=line_data, bent_line=bent_line_data, 518 | prediction=bent_preds, target=y) 519 | 520 | return _decision_boundary_data 521 | 522 | def get_intermediate_values(model, x): 523 | hooks = {} 524 | visualization = {} 525 | layer_names = {} 526 | 527 | def hook_fn(m, i, o): 528 | visualization[layer_names[m]] = o.cpu().detach().numpy() 529 | 530 | for name, layer in model.named_modules(): 531 | if name != '': 532 | layer_names[layer] = name 533 | hooks[name] = layer.register_forward_hook(hook_fn) 534 | 535 | device = list(model.parameters())[0].device.type 536 | # RNNs 537 | model(torch.as_tensor(x).float().unsqueeze(0).to(device)) 538 | # model(torch.as_tensor(x).float().to(device)) 539 | 540 | for hook in hooks.values(): 541 | hook.remove() 542 | 543 | return visualization 544 | 545 | def get_values_for_epoch(model, states, epoch, x): 546 | with torch.no_grad(): 547 | model.load_state_dict(states[epoch]) 548 | 549 | return get_intermediate_values(model, x) 550 | -------------------------------------------------------------------------------- /postBuild: -------------------------------------------------------------------------------- 1 | # jupyter serverextension enable --sys-prefix jupyter_server_proxy 2 | jupyter server extension enable --sys-prefix jupyter_server_proxy 3 | # tensorboard launches at startup 4 | mv tensorboardserverextension.py ${NB_PYTHON_PREFIX}/lib/python*/site-packages/ 5 | # enable tensorboard extension 6 | # jupyter serverextension enable --sys-prefix tensorboardserverextension 7 | jupyter server extension enable --sys-prefix tensorboardserverextension 8 | -------------------------------------------------------------------------------- /revision/v1.2/Revision_Volume1_v1.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/revision/v1.2/Revision_Volume1_v1.2.pdf -------------------------------------------------------------------------------- /revision/v1.2/Revision_Volume2_v1.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/revision/v1.2/Revision_Volume2_v1.2.pdf -------------------------------------------------------------------------------- /revision/v1.2/Revision_Volume3_v1.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/revision/v1.2/Revision_Volume3_v1.2.pdf -------------------------------------------------------------------------------- /runs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dvgodoy/PyTorchStepByStep/2a2201db6fc07549004c67613aeb7c8262b67a37/runs/.gitkeep -------------------------------------------------------------------------------- /stepbystep/v0.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import datetime 3 | import torch 4 | import matplotlib.pyplot as plt 5 | from torch.utils.tensorboard import SummaryWriter 6 | 7 | plt.style.use('fivethirtyeight') 8 | 9 | class StepByStep(object): 10 | def __init__(self, model, loss_fn, optimizer): 11 | # Here we define the attributes of our class 12 | 13 | # We start by storing the arguments as attributes 14 | # to use them later 15 | self.model = model 16 | self.loss_fn = loss_fn 17 | self.optimizer = optimizer 18 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu' 19 | # Let's send the model to the specified device right away 20 | self.model.to(self.device) 21 | 22 | # These attributes are defined here, but since they are 23 | # not informed at the moment of creation, we keep them None 24 | self.train_loader = None 25 | self.val_loader = None 26 | self.writer = None 27 | 28 | # These attributes are going to be computed internally 29 | self.losses = [] 30 | self.val_losses = [] 31 | self.total_epochs = 0 32 | 33 | # Creates the train_step function for our model, 34 | # loss function and optimizer 35 | # Note: there are NO ARGS there! It makes use of the class 36 | # attributes directly 37 | self.train_step_fn = self._make_train_step_fn() 38 | # Creates the val_step function for our model and loss 39 | self.val_step_fn = self._make_val_step_fn() 40 | 41 | def to(self, device): 42 | # This method allows the user to specify a different device 43 | # It sets the corresponding attribute (to be used later in 44 | # the mini-batches) and sends the model to the device 45 | try: 46 | self.device = device 47 | self.model.to(self.device) 48 | except RuntimeError: 49 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu' 50 | print(f"Couldn't send it to {device}, sending it to {self.device} instead.") 51 | self.model.to(self.device) 52 | 53 | def set_loaders(self, train_loader, val_loader=None): 54 | # This method allows the user to define which train_loader (and val_loader, optionally) to use 55 | # Both loaders are then assigned to attributes of the class 56 | # So they can be referred to later 57 | self.train_loader = train_loader 58 | self.val_loader = val_loader 59 | 60 | def set_tensorboard(self, name, folder='runs'): 61 | # This method allows the user to define a SummaryWriter to interface with TensorBoard 62 | suffix = datetime.datetime.now().strftime('%Y%m%d%H%M%S') 63 | self.writer = SummaryWriter(f'{folder}/{name}_{suffix}') 64 | 65 | def _make_train_step_fn(self): 66 | # This method does not need ARGS... it can refer to 67 | # the attributes: self.model, self.loss_fn and self.optimizer 68 | 69 | # Builds function that performs a step in the train loop 70 | def perform_train_step_fn(x, y): 71 | # Sets model to TRAIN mode 72 | self.model.train() 73 | 74 | # Step 1 - Computes our model's predicted output - forward pass 75 | yhat = self.model(x) 76 | # Step 2 - Computes the loss 77 | loss = self.loss_fn(yhat, y) 78 | # Step 3 - Computes gradients for both "a" and "b" parameters 79 | loss.backward() 80 | # Step 4 - Updates parameters using gradients and the learning rate 81 | self.optimizer.step() 82 | self.optimizer.zero_grad() 83 | 84 | # Returns the loss 85 | return loss.item() 86 | 87 | # Returns the function that will be called inside the train loop 88 | return perform_train_step_fn 89 | 90 | def _make_val_step_fn(self): 91 | # Builds function that performs a step in the validation loop 92 | def perform_val_step_fn(x, y): 93 | # Sets model to EVAL mode 94 | self.model.eval() 95 | 96 | # Step 1 - Computes our model's predicted output - forward pass 97 | yhat = self.model(x) 98 | # Step 2 - Computes the loss 99 | loss = self.loss_fn(yhat, y) 100 | # There is no need to compute Steps 3 and 4, since we don't update parameters during evaluation 101 | return loss.item() 102 | 103 | return perform_val_step_fn 104 | 105 | def _mini_batch(self, validation=False): 106 | # The mini-batch can be used with both loaders 107 | # The argument `validation`defines which loader and 108 | # corresponding step function is going to be used 109 | if validation: 110 | data_loader = self.val_loader 111 | step_fn = self.val_step_fn 112 | else: 113 | data_loader = self.train_loader 114 | step_fn = self.train_step_fn 115 | 116 | if data_loader is None: 117 | return None 118 | 119 | # Once the data loader and step function, this is the same 120 | # mini-batch loop we had before 121 | mini_batch_losses = [] 122 | for x_batch, y_batch in data_loader: 123 | x_batch = x_batch.to(self.device) 124 | y_batch = y_batch.to(self.device) 125 | 126 | mini_batch_loss = step_fn(x_batch, y_batch) 127 | mini_batch_losses.append(mini_batch_loss) 128 | 129 | loss = np.mean(mini_batch_losses) 130 | return loss 131 | 132 | def set_seed(self, seed=42): 133 | torch.backends.cudnn.deterministic = True 134 | torch.backends.cudnn.benchmark = False 135 | torch.manual_seed(seed) 136 | np.random.seed(seed) 137 | 138 | def train(self, n_epochs, seed=42): 139 | # To ensure reproducibility of the training process 140 | self.set_seed(seed) 141 | 142 | for epoch in range(n_epochs): 143 | # Keeps track of the numbers of epochs 144 | # by updating the corresponding attribute 145 | self.total_epochs += 1 146 | 147 | # inner loop 148 | # Performs training using mini-batches 149 | loss = self._mini_batch(validation=False) 150 | self.losses.append(loss) 151 | 152 | # VALIDATION 153 | # no gradients in validation! 154 | with torch.no_grad(): 155 | # Performs evaluation using mini-batches 156 | val_loss = self._mini_batch(validation=True) 157 | self.val_losses.append(val_loss) 158 | 159 | # If a SummaryWriter has been set... 160 | if self.writer: 161 | scalars = {'training': loss} 162 | if val_loss is not None: 163 | scalars.update({'validation': val_loss}) 164 | # Records both losses for each epoch under the main tag "loss" 165 | self.writer.add_scalars(main_tag='loss', 166 | tag_scalar_dict=scalars, 167 | global_step=epoch) 168 | 169 | if self.writer: 170 | # Closes the writer 171 | self.writer.close() 172 | 173 | def save_checkpoint(self, filename): 174 | # Builds dictionary with all elements for resuming training 175 | checkpoint = {'epoch': self.total_epochs, 176 | 'model_state_dict': self.model.state_dict(), 177 | 'optimizer_state_dict': self.optimizer.state_dict(), 178 | 'loss': self.losses, 179 | 'val_loss': self.val_losses} 180 | 181 | torch.save(checkpoint, filename) 182 | 183 | def load_checkpoint(self, filename): 184 | # Loads dictionary 185 | checkpoint = torch.load(filename, weights_only=False) 186 | 187 | # Restore state for model and optimizer 188 | self.model.load_state_dict(checkpoint['model_state_dict']) 189 | self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 190 | 191 | self.total_epochs = checkpoint['epoch'] 192 | self.losses = checkpoint['loss'] 193 | self.val_losses = checkpoint['val_loss'] 194 | 195 | self.model.train() # always use TRAIN for resuming training 196 | 197 | def predict(self, x): 198 | # Set is to evaluation mode for predictions 199 | self.model.eval() 200 | # Takes aNumpy input and make it a float tensor 201 | x_tensor = torch.as_tensor(x).float() 202 | # Send input to device and uses model for prediction 203 | y_hat_tensor = self.model(x_tensor.to(self.device)) 204 | # Set it back to train mode 205 | self.model.train() 206 | # Detaches it, brings it to CPU and back to Numpy 207 | return y_hat_tensor.detach().cpu().numpy() 208 | 209 | def plot_losses(self): 210 | fig = plt.figure(figsize=(10, 4)) 211 | plt.plot(self.losses, label='Training Loss', c='b') 212 | plt.plot(self.val_losses, label='Validation Loss', c='r') 213 | plt.yscale('log') 214 | plt.xlabel('Epochs') 215 | plt.ylabel('Loss') 216 | plt.legend() 217 | plt.tight_layout() 218 | return fig 219 | 220 | def add_graph(self): 221 | # Fetches a single mini-batch so we can use add_graph 222 | if self.train_loader and self.writer: 223 | x_sample, y_sample = next(iter(self.train_loader)) 224 | self.writer.add_graph(self.model, x_sample.to(self.device)) 225 | -------------------------------------------------------------------------------- /stepbystep/v1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import datetime 3 | import torch 4 | import random 5 | import matplotlib.pyplot as plt 6 | from torch.utils.tensorboard import SummaryWriter 7 | 8 | plt.style.use('fivethirtyeight') 9 | 10 | class StepByStep(object): 11 | def __init__(self, model, loss_fn, optimizer): 12 | # Here we define the attributes of our class 13 | 14 | # We start by storing the arguments as attributes 15 | # to use them later 16 | self.model = model 17 | self.loss_fn = loss_fn 18 | self.optimizer = optimizer 19 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu' 20 | # Let's send the model to the specified device right away 21 | self.model.to(self.device) 22 | 23 | # These attributes are defined here, but since they are 24 | # not informed at the moment of creation, we keep them None 25 | self.train_loader = None 26 | self.val_loader = None 27 | self.writer = None 28 | 29 | # These attributes are going to be computed internally 30 | self.losses = [] 31 | self.val_losses = [] 32 | self.total_epochs = 0 33 | 34 | # Creates the train_step function for our model, 35 | # loss function and optimizer 36 | # Note: there are NO ARGS there! It makes use of the class 37 | # attributes directly 38 | self.train_step_fn = self._make_train_step_fn() 39 | # Creates the val_step function for our model and loss 40 | self.val_step_fn = self._make_val_step_fn() 41 | 42 | def to(self, device): 43 | # This method allows the user to specify a different device 44 | # It sets the corresponding attribute (to be used later in 45 | # the mini-batches) and sends the model to the device 46 | try: 47 | self.device = device 48 | self.model.to(self.device) 49 | except RuntimeError: 50 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu' 51 | print(f"Couldn't send it to {device}, sending it to {self.device} instead.") 52 | self.model.to(self.device) 53 | 54 | def set_loaders(self, train_loader, val_loader=None): 55 | # This method allows the user to define which train_loader (and val_loader, optionally) to use 56 | # Both loaders are then assigned to attributes of the class 57 | # So they can be referred to later 58 | self.train_loader = train_loader 59 | self.val_loader = val_loader 60 | 61 | def set_tensorboard(self, name, folder='runs'): 62 | # This method allows the user to define a SummaryWriter to interface with TensorBoard 63 | suffix = datetime.datetime.now().strftime('%Y%m%d%H%M%S') 64 | self.writer = SummaryWriter(f'{folder}/{name}_{suffix}') 65 | 66 | def _make_train_step_fn(self): 67 | # This method does not need ARGS... it can refer to 68 | # the attributes: self.model, self.loss_fn and self.optimizer 69 | 70 | # Builds function that performs a step in the train loop 71 | def perform_train_step_fn(x, y): 72 | # Sets model to TRAIN mode 73 | self.model.train() 74 | 75 | # Step 1 - Computes our model's predicted output - forward pass 76 | yhat = self.model(x) 77 | # Step 2 - Computes the loss 78 | loss = self.loss_fn(yhat, y) 79 | # Step 3 - Computes gradients for both "a" and "b" parameters 80 | loss.backward() 81 | # Step 4 - Updates parameters using gradients and the learning rate 82 | self.optimizer.step() 83 | self.optimizer.zero_grad() 84 | 85 | # Returns the loss 86 | return loss.item() 87 | 88 | # Returns the function that will be called inside the train loop 89 | return perform_train_step_fn 90 | 91 | def _make_val_step_fn(self): 92 | # Builds function that performs a step in the validation loop 93 | def perform_val_step_fn(x, y): 94 | # Sets model to EVAL mode 95 | self.model.eval() 96 | 97 | # Step 1 - Computes our model's predicted output - forward pass 98 | yhat = self.model(x) 99 | # Step 2 - Computes the loss 100 | loss = self.loss_fn(yhat, y) 101 | # There is no need to compute Steps 3 and 4, since we don't update parameters during evaluation 102 | return loss.item() 103 | 104 | return perform_val_step_fn 105 | 106 | def _mini_batch(self, validation=False): 107 | # The mini-batch can be used with both loaders 108 | # The argument `validation`defines which loader and 109 | # corresponding step function is going to be used 110 | if validation: 111 | data_loader = self.val_loader 112 | step_fn = self.val_step_fn 113 | else: 114 | data_loader = self.train_loader 115 | step_fn = self.train_step_fn 116 | 117 | if data_loader is None: 118 | return None 119 | 120 | # Once the data loader and step function, this is the same 121 | # mini-batch loop we had before 122 | mini_batch_losses = [] 123 | for x_batch, y_batch in data_loader: 124 | x_batch = x_batch.to(self.device) 125 | y_batch = y_batch.to(self.device) 126 | 127 | mini_batch_loss = step_fn(x_batch, y_batch) 128 | mini_batch_losses.append(mini_batch_loss) 129 | 130 | loss = np.mean(mini_batch_losses) 131 | return loss 132 | 133 | def set_seed(self, seed=42): 134 | torch.backends.cudnn.deterministic = True 135 | torch.backends.cudnn.benchmark = False 136 | torch.manual_seed(seed) 137 | np.random.seed(seed) 138 | random.seed(seed) 139 | try: 140 | self.train_loader.sampler.generator.manual_seed(seed) 141 | except AttributeError: 142 | pass 143 | 144 | def train(self, n_epochs, seed=42): 145 | # To ensure reproducibility of the training process 146 | self.set_seed(seed) 147 | 148 | for epoch in range(n_epochs): 149 | # Keeps track of the numbers of epochs 150 | # by updating the corresponding attribute 151 | self.total_epochs += 1 152 | 153 | # inner loop 154 | # Performs training using mini-batches 155 | loss = self._mini_batch(validation=False) 156 | self.losses.append(loss) 157 | 158 | # VALIDATION 159 | # no gradients in validation! 160 | with torch.no_grad(): 161 | # Performs evaluation using mini-batches 162 | val_loss = self._mini_batch(validation=True) 163 | self.val_losses.append(val_loss) 164 | 165 | # If a SummaryWriter has been set... 166 | if self.writer: 167 | scalars = {'training': loss} 168 | if val_loss is not None: 169 | scalars.update({'validation': val_loss}) 170 | # Records both losses for each epoch under the main tag "loss" 171 | self.writer.add_scalars(main_tag='loss', 172 | tag_scalar_dict=scalars, 173 | global_step=epoch) 174 | 175 | if self.writer: 176 | # Closes the writer 177 | self.writer.close() 178 | 179 | def save_checkpoint(self, filename): 180 | # Builds dictionary with all elements for resuming training 181 | checkpoint = {'epoch': self.total_epochs, 182 | 'model_state_dict': self.model.state_dict(), 183 | 'optimizer_state_dict': self.optimizer.state_dict(), 184 | 'loss': self.losses, 185 | 'val_loss': self.val_losses} 186 | 187 | torch.save(checkpoint, filename) 188 | 189 | def load_checkpoint(self, filename): 190 | # Loads dictionary 191 | checkpoint = torch.load(filename, weights_only=False) 192 | 193 | # Restore state for model and optimizer 194 | self.model.load_state_dict(checkpoint['model_state_dict']) 195 | self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 196 | 197 | self.total_epochs = checkpoint['epoch'] 198 | self.losses = checkpoint['loss'] 199 | self.val_losses = checkpoint['val_loss'] 200 | 201 | self.model.train() # always use TRAIN for resuming training 202 | 203 | def predict(self, x): 204 | # Set is to evaluation mode for predictions 205 | self.model.eval() 206 | # Takes aNumpy input and make it a float tensor 207 | x_tensor = torch.as_tensor(x).float() 208 | # Send input to device and uses model for prediction 209 | y_hat_tensor = self.model(x_tensor.to(self.device)) 210 | # Set it back to train mode 211 | self.model.train() 212 | # Detaches it, brings it to CPU and back to Numpy 213 | return y_hat_tensor.detach().cpu().numpy() 214 | 215 | def plot_losses(self): 216 | fig = plt.figure(figsize=(10, 4)) 217 | plt.plot(self.losses, label='Training Loss', c='b') 218 | plt.plot(self.val_losses, label='Validation Loss', c='r') 219 | plt.yscale('log') 220 | plt.xlabel('Epochs') 221 | plt.ylabel('Loss') 222 | plt.legend() 223 | plt.tight_layout() 224 | return fig 225 | 226 | def add_graph(self): 227 | # Fetches a single mini-batch so we can use add_graph 228 | if self.train_loader and self.writer: 229 | x_sample, y_sample = next(iter(self.train_loader)) 230 | self.writer.add_graph(self.model, x_sample.to(self.device)) 231 | 232 | def count_parameters(self): 233 | return sum(p.numel() for p in self.model.parameters() if p.requires_grad) -------------------------------------------------------------------------------- /stepbystep/v2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import datetime 3 | import torch 4 | import torch.nn as nn 5 | import random 6 | import matplotlib.pyplot as plt 7 | from torch.utils.tensorboard import SummaryWriter 8 | 9 | plt.style.use('fivethirtyeight') 10 | 11 | class StepByStep(object): 12 | def __init__(self, model, loss_fn, optimizer): 13 | # Here we define the attributes of our class 14 | 15 | # We start by storing the arguments as attributes 16 | # to use them later 17 | self.model = model 18 | self.loss_fn = loss_fn 19 | self.optimizer = optimizer 20 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu' 21 | # Let's send the model to the specified device right away 22 | self.model.to(self.device) 23 | 24 | # These attributes are defined here, but since they are 25 | # not informed at the moment of creation, we keep them None 26 | self.train_loader = None 27 | self.val_loader = None 28 | self.writer = None 29 | 30 | # These attributes are going to be computed internally 31 | self.losses = [] 32 | self.val_losses = [] 33 | self.total_epochs = 0 34 | 35 | self.visualization = {} 36 | self.handles = {} 37 | 38 | # Creates the train_step function for our model, 39 | # loss function and optimizer 40 | # Note: there are NO ARGS there! It makes use of the class 41 | # attributes directly 42 | self.train_step_fn = self._make_train_step_fn() 43 | # Creates the val_step function for our model and loss 44 | self.val_step_fn = self._make_val_step_fn() 45 | 46 | def to(self, device): 47 | # This method allows the user to specify a different device 48 | # It sets the corresponding attribute (to be used later in 49 | # the mini-batches) and sends the model to the device 50 | try: 51 | self.device = device 52 | self.model.to(self.device) 53 | except RuntimeError: 54 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu' 55 | print(f"Couldn't send it to {device}, sending it to {self.device} instead.") 56 | self.model.to(self.device) 57 | 58 | def set_loaders(self, train_loader, val_loader=None): 59 | # This method allows the user to define which train_loader (and val_loader, optionally) to use 60 | # Both loaders are then assigned to attributes of the class 61 | # So they can be referred to later 62 | self.train_loader = train_loader 63 | self.val_loader = val_loader 64 | 65 | def set_tensorboard(self, name, folder='runs'): 66 | # This method allows the user to define a SummaryWriter to interface with TensorBoard 67 | suffix = datetime.datetime.now().strftime('%Y%m%d%H%M%S') 68 | self.writer = SummaryWriter(f'{folder}/{name}_{suffix}') 69 | 70 | def _make_train_step_fn(self): 71 | # This method does not need ARGS... it can refer to 72 | # the attributes: self.model, self.loss_fn and self.optimizer 73 | 74 | # Builds function that performs a step in the train loop 75 | def perform_train_step_fn(x, y): 76 | # Sets model to TRAIN mode 77 | self.model.train() 78 | 79 | # Step 1 - Computes our model's predicted output - forward pass 80 | yhat = self.model(x) 81 | # Step 2 - Computes the loss 82 | loss = self.loss_fn(yhat, y) 83 | # Step 3 - Computes gradients for both "a" and "b" parameters 84 | loss.backward() 85 | # Step 4 - Updates parameters using gradients and the learning rate 86 | self.optimizer.step() 87 | self.optimizer.zero_grad() 88 | 89 | # Returns the loss 90 | return loss.item() 91 | 92 | # Returns the function that will be called inside the train loop 93 | return perform_train_step_fn 94 | 95 | def _make_val_step_fn(self): 96 | # Builds function that performs a step in the validation loop 97 | def perform_val_step_fn(x, y): 98 | # Sets model to EVAL mode 99 | self.model.eval() 100 | 101 | # Step 1 - Computes our model's predicted output - forward pass 102 | yhat = self.model(x) 103 | # Step 2 - Computes the loss 104 | loss = self.loss_fn(yhat, y) 105 | # There is no need to compute Steps 3 and 4, since we don't update parameters during evaluation 106 | return loss.item() 107 | 108 | return perform_val_step_fn 109 | 110 | def _mini_batch(self, validation=False): 111 | # The mini-batch can be used with both loaders 112 | # The argument `validation`defines which loader and 113 | # corresponding step function is going to be used 114 | if validation: 115 | data_loader = self.val_loader 116 | step_fn = self.val_step_fn 117 | else: 118 | data_loader = self.train_loader 119 | step_fn = self.train_step_fn 120 | 121 | if data_loader is None: 122 | return None 123 | 124 | # Once the data loader and step function, this is the same 125 | # mini-batch loop we had before 126 | mini_batch_losses = [] 127 | for x_batch, y_batch in data_loader: 128 | x_batch = x_batch.to(self.device) 129 | y_batch = y_batch.to(self.device) 130 | 131 | mini_batch_loss = step_fn(x_batch, y_batch) 132 | mini_batch_losses.append(mini_batch_loss) 133 | 134 | loss = np.mean(mini_batch_losses) 135 | return loss 136 | 137 | def set_seed(self, seed=42): 138 | torch.backends.cudnn.deterministic = True 139 | torch.backends.cudnn.benchmark = False 140 | torch.manual_seed(seed) 141 | np.random.seed(seed) 142 | random.seed(seed) 143 | try: 144 | self.train_loader.sampler.generator.manual_seed(seed) 145 | except AttributeError: 146 | pass 147 | 148 | def train(self, n_epochs, seed=42): 149 | # To ensure reproducibility of the training process 150 | self.set_seed(seed) 151 | 152 | for epoch in range(n_epochs): 153 | # Keeps track of the numbers of epochs 154 | # by updating the corresponding attribute 155 | self.total_epochs += 1 156 | 157 | # inner loop 158 | # Performs training using mini-batches 159 | loss = self._mini_batch(validation=False) 160 | self.losses.append(loss) 161 | 162 | # VALIDATION 163 | # no gradients in validation! 164 | with torch.no_grad(): 165 | # Performs evaluation using mini-batches 166 | val_loss = self._mini_batch(validation=True) 167 | self.val_losses.append(val_loss) 168 | 169 | # If a SummaryWriter has been set... 170 | if self.writer: 171 | scalars = {'training': loss} 172 | if val_loss is not None: 173 | scalars.update({'validation': val_loss}) 174 | # Records both losses for each epoch under the main tag "loss" 175 | self.writer.add_scalars(main_tag='loss', 176 | tag_scalar_dict=scalars, 177 | global_step=epoch) 178 | 179 | if self.writer: 180 | # Closes the writer 181 | self.writer.close() 182 | 183 | def save_checkpoint(self, filename): 184 | # Builds dictionary with all elements for resuming training 185 | checkpoint = {'epoch': self.total_epochs, 186 | 'model_state_dict': self.model.state_dict(), 187 | 'optimizer_state_dict': self.optimizer.state_dict(), 188 | 'loss': self.losses, 189 | 'val_loss': self.val_losses} 190 | 191 | torch.save(checkpoint, filename) 192 | 193 | def load_checkpoint(self, filename): 194 | # Loads dictionary 195 | checkpoint = torch.load(filename, weights_only=False) 196 | 197 | # Restore state for model and optimizer 198 | self.model.load_state_dict(checkpoint['model_state_dict']) 199 | self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 200 | 201 | self.total_epochs = checkpoint['epoch'] 202 | self.losses = checkpoint['loss'] 203 | self.val_losses = checkpoint['val_loss'] 204 | 205 | self.model.train() # always use TRAIN for resuming training 206 | 207 | def predict(self, x): 208 | # Set is to evaluation mode for predictions 209 | self.model.eval() 210 | # Takes aNumpy input and make it a float tensor 211 | x_tensor = torch.as_tensor(x).float() 212 | # Send input to device and uses model for prediction 213 | y_hat_tensor = self.model(x_tensor.to(self.device)) 214 | # Set it back to train mode 215 | self.model.train() 216 | # Detaches it, brings it to CPU and back to Numpy 217 | return y_hat_tensor.detach().cpu().numpy() 218 | 219 | def plot_losses(self): 220 | fig = plt.figure(figsize=(10, 4)) 221 | plt.plot(self.losses, label='Training Loss', c='b') 222 | plt.plot(self.val_losses, label='Validation Loss', c='r') 223 | plt.yscale('log') 224 | plt.xlabel('Epochs') 225 | plt.ylabel('Loss') 226 | plt.legend() 227 | plt.tight_layout() 228 | return fig 229 | 230 | def add_graph(self): 231 | # Fetches a single mini-batch so we can use add_graph 232 | if self.train_loader and self.writer: 233 | x_sample, y_sample = next(iter(self.train_loader)) 234 | self.writer.add_graph(self.model, x_sample.to(self.device)) 235 | 236 | def count_parameters(self): 237 | return sum(p.numel() for p in self.model.parameters() if p.requires_grad) 238 | 239 | @staticmethod 240 | def _visualize_tensors(axs, x, y=None, yhat=None, layer_name='', title=None): 241 | # The number of images is the number of subplots in a row 242 | n_images = len(axs) 243 | # Gets max and min values for scaling the grayscale 244 | minv, maxv = np.min(x[:n_images]), np.max(x[:n_images]) 245 | # For each image 246 | for j, image in enumerate(x[:n_images]): 247 | ax = axs[j] 248 | # Sets title, labels, and removes ticks 249 | if title is not None: 250 | ax.set_title('{} #{}'.format(title, j), fontsize=12) 251 | ax.set_ylabel( 252 | '{}\n{}x{}'.format(layer_name, *np.atleast_2d(image).shape), 253 | rotation=0, labelpad=40 254 | ) 255 | xlabel1 = '' if y is None else '\nLabel: {}'.format(y[j]) 256 | xlabel2 = '' if yhat is None else '\nPredicted: {}'.format(yhat[j]) 257 | xlabel = '{}{}'.format(xlabel1, xlabel2) 258 | if len(xlabel): 259 | ax.set_xlabel(xlabel, fontsize=12) 260 | ax.set_xticks([]) 261 | ax.set_yticks([]) 262 | 263 | # Plots weight as an image 264 | ax.imshow( 265 | np.atleast_2d(image.squeeze()), 266 | cmap='gray', 267 | vmin=minv, 268 | vmax=maxv 269 | ) 270 | return 271 | 272 | def visualize_filters(self, layer_name, **kwargs): 273 | try: 274 | # Gets the layer object from the model 275 | layer = self.model 276 | for name in layer_name.split('.'): 277 | layer = getattr(layer, name) 278 | # We are only looking at filters for 2D convolutions 279 | if isinstance(layer, nn.Conv2d): 280 | # Takes the weight information 281 | weights = layer.weight.data.cpu().numpy() 282 | # The weights have channels_out (filter), channels_in, H, W shape 283 | n_filters, n_channels, _, _ = weights.shape 284 | 285 | # Builds a figure 286 | size = (2 * n_channels + 2, 2 * n_filters) 287 | fig, axes = plt.subplots(n_filters, n_channels, figsize=size) 288 | axes = np.atleast_2d(axes).reshape(n_filters, n_channels) 289 | # For each channel_out (filter) 290 | for i in range(n_filters): 291 | StepByStep._visualize_tensors( 292 | axes[i, :], 293 | weights[i], 294 | layer_name='Filter #{}'.format(i), 295 | title='Channel' if (i == 0) else None 296 | ) 297 | 298 | for ax in axes.flat: 299 | ax.label_outer() 300 | 301 | fig.tight_layout() 302 | return fig 303 | except AttributeError: 304 | return 305 | 306 | def attach_hooks(self, layers_to_hook, hook_fn=None): 307 | # Clear any previous values 308 | self.visualization = {} 309 | # Creates the dictionary to map layer objects to their names 310 | modules = list(self.model.named_modules()) 311 | layer_names = {layer: name for name, layer in modules[1:]} 312 | 313 | if hook_fn is None: 314 | # Hook function to be attached to the forward pass 315 | def hook_fn(layer, inputs, outputs): 316 | # Gets the layer name 317 | name = layer_names[layer] 318 | # Detaches outputs 319 | values = outputs.detach().cpu().numpy() 320 | # Since the hook function may be called multiple times 321 | # for example, if we make predictions for multiple mini-batches 322 | # it concatenates the results 323 | if self.visualization[name] is None: 324 | self.visualization[name] = values 325 | else: 326 | self.visualization[name] = np.concatenate([self.visualization[name], values]) 327 | 328 | for name, layer in modules: 329 | # If the layer is in our list 330 | if name in layers_to_hook: 331 | # Initializes the corresponding key in the dictionary 332 | self.visualization[name] = None 333 | # Register the forward hook and keep the handle in another dict 334 | self.handles[name] = layer.register_forward_hook(hook_fn) 335 | 336 | def remove_hooks(self): 337 | # Loops through all hooks and removes them 338 | for handle in self.handles.values(): 339 | handle.remove() 340 | # Clear the dict, as all hooks have been removed 341 | self.handles = {} 342 | 343 | def visualize_outputs(self, layers, n_images=10, y=None, yhat=None): 344 | layers = list(filter(lambda l: l in self.visualization.keys(), layers)) 345 | shapes = [self.visualization[layer].shape for layer in layers] 346 | n_rows = [shape[1] if len(shape) == 4 else 1 for shape in shapes] 347 | total_rows = np.sum(n_rows) 348 | 349 | fig, axes = plt.subplots(total_rows, n_images, figsize=(1.5*n_images, 1.5*total_rows)) 350 | axes = np.atleast_2d(axes).reshape(total_rows, n_images) 351 | 352 | # Loops through the layers, one layer per row of subplots 353 | row = 0 354 | for i, layer in enumerate(layers): 355 | start_row = row 356 | # Takes the produced feature maps for that layer 357 | output = self.visualization[layer] 358 | 359 | is_vector = len(output.shape) == 2 360 | 361 | for j in range(n_rows[i]): 362 | StepByStep._visualize_tensors( 363 | axes[row, :], 364 | output if is_vector else output[:, j].squeeze(), 365 | y, 366 | yhat, 367 | layer_name=layers[i] if is_vector else '{}\nfil#{}'.format(layers[i], row-start_row), 368 | title='Image' if (row == 0) else None 369 | ) 370 | row += 1 371 | 372 | for ax in axes.flat: 373 | ax.label_outer() 374 | 375 | plt.tight_layout() 376 | return fig 377 | 378 | def correct(self, x, y, threshold=.5): 379 | self.model.eval() 380 | yhat = self.model(x.to(self.device)) 381 | y = y.to(self.device) 382 | self.model.train() 383 | 384 | # We get the size of the batch and the number of classes 385 | # (only 1, if it is binary) 386 | n_samples, n_dims = yhat.shape 387 | if n_dims > 1: 388 | # In a multiclass classification, the biggest logit 389 | # always wins, so we don't bother getting probabilities 390 | 391 | # This is PyTorch's version of argmax, 392 | # but it returns a tuple: (max value, index of max value) 393 | _, predicted = torch.max(yhat, 1) 394 | else: 395 | n_dims += 1 396 | # In binary classification, we NEED to check if the 397 | # last layer is a sigmoid (and then it produces probs) 398 | if isinstance(self.model, nn.Sequential) and \ 399 | isinstance(self.model[-1], nn.Sigmoid): 400 | predicted = (yhat > threshold).long() 401 | # or something else (logits), which we need to convert 402 | # using a sigmoid 403 | else: 404 | predicted = (torch.sigmoid(yhat) > threshold).long() 405 | 406 | # How many samples got classified correctly for each class 407 | result = [] 408 | for c in range(n_dims): 409 | n_class = (y == c).sum().item() 410 | n_correct = (predicted[y == c] == c).sum().item() 411 | result.append((n_correct, n_class)) 412 | return torch.tensor(result) 413 | 414 | @staticmethod 415 | def loader_apply(loader, func, reduce='sum'): 416 | results = [func(x, y) for i, (x, y) in enumerate(loader)] 417 | results = torch.stack(results, axis=0) 418 | 419 | if reduce == 'sum': 420 | results = results.sum(axis=0) 421 | elif reduce == 'mean': 422 | results = results.float().mean(axis=0) 423 | 424 | return results -------------------------------------------------------------------------------- /tensorboardserverextension.py: -------------------------------------------------------------------------------- 1 | from subprocess import Popen 2 | 3 | 4 | def load_jupyter_server_extension(nbapp): 5 | """serve the bokeh-app directory with bokeh server""" 6 | Popen(["tensorboard", "--logdir", "runs", "--port", "6006"]) 7 | --------------------------------------------------------------------------------