├── .gitignore
├── Makefile
├── README.md
├── data
    └── raw
    │   └── nietzsche.txt
├── notebooks
    ├── .gitignore
    ├── 0X_pytorch_in_googles_colab.ipynb
    ├── 0X_pytorch_in_googles_colab.py
    ├── 0X_teacher_forcing.ipynb
    ├── 0X_teacher_forcing.py
    ├── debugging.ipynb
    ├── debugging.py
    ├── foreword.ipynb
    ├── foreword.py
    ├── hooks.ipynb
    ├── hooks.py
    ├── img
    │   ├── common_mistakes.png
    │   ├── dynamic_graph.gif
    │   ├── ml_debt.jpg
    │   ├── pytorch-logo.png
    │   ├── pytorch_logo.png
    │   ├── pytorch_logo_flame.png
    │   ├── software_vs_ml.png
    │   ├── tensorboardx_demo.gif
    │   ├── tensorboardx_demo2.gif
    │   ├── the_real_reason.png
    │   └── visdom.png
    ├── lin_reg.ipynb
    ├── lin_reg.py
    ├── machine_learning_101.ipynb
    ├── machine_learning_101.py
    ├── mean_shift_clustering.ipynb
    ├── mean_shift_clustering.py
    ├── pytorch_basics.ipynb
    ├── pytorch_basics.py
    ├── rnn_from_scratch.ipynb
    ├── rnn_from_scratch.py
    ├── storing_and_loading_models.ipynb
    ├── storing_and_loading_models.py
    ├── the_end.ipynb
    ├── the_end.py
    ├── torch_jit.ipynb
    ├── torch_jit.py
    ├── transfer_learning.ipynb
    ├── transfer_learning.py
    ├── visualize_model_loss_optimizer.ipynb
    ├── visualize_model_loss_optimizer.py
    ├── working_with_data.ipynb
    └── working_with_data.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | data/raw
104 | *.zip
105 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PY = $(wildcard notebooks/*.py)
 2 | IPYNB := $(patsubst notebooks/%.py,notebooks/%.ipynb,$(PY))
 3 | 
 4 | run_notebooks: $(IPYNB)
 5 | 
 6 | notebooks/%.ipynb: notebooks/%.py
 7 | 	@echo $@
 8 | 	jupytext --to py:percent $^
 9 | 	jupyter nbconvert --execute --to notebook --inplace $@
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <img src="notebooks/img/pytorch-logo.png" width="80"> PyTorch Tutorial
  2 | ================================================================================
  3 | 
  4 | This repository contains material to get started with
  5 | [PyTorch](https://pytorch.org/) v1.7.
  6 | It was the base for this
  7 | [[pytorch tutorial]](https://nodata.science/pydata-pytorch-tutorial.html)
  8 | from PyData Berlin 2018.
  9 | 
 10 | <hr>
 11 | 
 12 | Table of Contents
 13 | --------------------------------------------------------------------------------
 14 | 
 15 | ### PART 0 - Foreword
 16 | - [Foreword](notebooks/foreword.ipynb) - Why PyTorch and why not? Why this talk?
 17 | 
 18 | ### PART 1 - Basics
 19 | - [PyTorch basics](notebooks/pytorch_basics.ipynb) - tensors, GPU, autograd -
 20 |   [open in colab](https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/pytorch_basics.ipynb)
 21 | - [Debugging](notebooks/debugging.ipynb) -
 22 |   [open in colab](https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/debugging.ipynb)
 23 | - [Example: linear regression](notebooks/lin_reg.ipynb) -
 24 |   [open in colab](https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/lin_reg.ipynb)
 25 | - [Storing and loading models](notebooks/storing_and_loading_models.ipynb) -
 26 |   [open in colab](https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/storing_and_loading_models.ipynb)
 27 | - [Working with data](notebooks/working_with_data.ipynb) - `Dataset`, `DataLoader`, `Sampler`, `transforms` -
 28 |   [open in colab](https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/working_with_data.ipynb)
 29 | 
 30 | ### PART 2 - Computer Vision
 31 | - [Transfer Learning](notebooks/transfer_learning.ipynb) -
 32 |   [open in colab](https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/transfer_learning.ipynb)
 33 | 
 34 | ### PART 3 - Misc, Cool Applications, Tips, Advanced
 35 | - [Torch JIT](notebooks/torch_jit.ipynb) -
 36 |   [open in colab](https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/torch_jit.ipynb)
 37 | - [Hooks](notebooks/hooks.ipynb) -
 38 |   register functions to be called during the forward and backward pass -
 39 |   [open in colab](https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/hooks.ipynb)
 40 | - [Machine Learning 101 with numpy and PyTorch](notebooks/machine_learning_101.ipynb) -
 41 |   [open in colab](https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/machine_learning_101.ipynb)
 42 | - [PyTorch + GPU in Google's Colab](notebooks/0X_pytorch_in_googles_colab.ipynb)
 43 | - [Teacher Forcing](notebooks/0X_teacher_forcing.ipynb)
 44 | - [RNNs from Scratch](notebooks/rnn_from_scratch.ipynb) -
 45 |   [open in colab](https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/rnn_from_scratch.ipynb)
 46 | - [Mean Shift Clustering](notebooks/mean_shift_clustering.ipynb) -
 47 |   [open in colab](https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/mean_shift_clustering.ipynb)
 48 | 
 49 | ### PART -2 - WIP and TODO
 50 | - TODO `nn` and `nn.Module`
 51 | - TODO Deployment
 52 | - TODO Deployment with TF Serving
 53 | - TODO `nn.init`
 54 | - TODO PyTorch C++ frontend
 55 | 
 56 | ### PART -1 - The End
 57 | - [The End](notebooks/the_end.ipynb)
 58 | 
 59 | <hr>
 60 | 
 61 | 
 62 | Setup
 63 | --------------------------------------------------------------------------------
 64 | 
 65 | ### Requirements
 66 | 
 67 | - Python 3.8
 68 | 
 69 | 
 70 | ### Install Dependencies
 71 | 
 72 | ```bash
 73 | python3.8 -m venv .venv
 74 | source .venv/bin/activate.fish
 75 | pip install -r requirements.txt
 76 | ```
 77 | 
 78 | #### Optional
 79 | Run the following to enable the [jupyter table of contents plugin](https://github.com/jupyterlab/jupyterlab-toc):
 80 | ```bash
 81 | jupyter labextension install @jupyterlab/toc
 82 | ```
 83 | jupyter nbextension enable --py widgetsnbextension
 84 | 
 85 | ### Download data and models
 86 | 
 87 | Download data and models for the tutorial:
 88 | 
 89 | ```bash
 90 | python download_data.py
 91 | ```
 92 | 
 93 | Then you should be ready to go.
 94 | Start jupyter lab:
 95 | 
 96 | ```bash
 97 | jupyter lab
 98 | ```
 99 | 
100 | 
101 | Prior Versions
102 | --------------------------------------------------------------------------------
103 | 
104 | - Version of this tutorial for the PyData 2018 conference:
105 |   [[material]](https://github.com/sotte/pytorch_tutorial/tree/pydata2018)
106 |   [[video]](https://nodata.science/pydata-pytorch-tutorial.html)
107 | 


--------------------------------------------------------------------------------
/notebooks/.gitignore:
--------------------------------------------------------------------------------
1 | *.pt
2 | data/
3 | tf_log/
4 | 


--------------------------------------------------------------------------------
/notebooks/0X_pytorch_in_googles_colab.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Using PyTorch + GPU/TPU in Google's Colab\n",
 8 |     "\n",
 9 |     "> Colaboratory is a Google research project created to help disseminate machine learning education and research. It's a Jupyter notebook environment that requires no setup to use and runs entirely in the cloud.\n",
10 |     "> Colaboratory notebooks are stored in Google Drive and can be shared just as you would with Google Docs or Sheets. Colaboratory is free to use.\n",
11 |     "> -- https://colab.research.google.com/notebooks/welcome.ipynb\n",
12 |     "\n",
13 |     "**Setup**\n",
14 |     "- Go to https://colab.research.google.com\n",
15 |     "- Create a new python 3 notebook\n",
16 |     "- Enable the GPU: \"Edit -> Notebook settings -> Hardware accelerator: GPU -> Save\"\n",
17 |     "- Then try the following:\n",
18 |     "\n",
19 |     "```python\n",
20 |     "import torch\n",
21 |     "\n",
22 |     "print(torch.__version__)\n",
23 |     "\n",
24 |     "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
25 |     "print(DEVICE)\n",
26 |     "```"
27 |    ]
28 |   },
29 |   {
30 |    "cell_type": "markdown",
31 |    "metadata": {},
32 |    "source": [
33 |     "You should get something like this:\n",
34 |     "> 1.0.1.post2\n",
35 |     ">\n",
36 |     "> cuda"
37 |    ]
38 |   },
39 |   {
40 |    "cell_type": "markdown",
41 |    "metadata": {},
42 |    "source": [
43 |     "# Using this Repo in Colab\n",
44 |     "You can use this repo with google colab,\n",
45 |     "but not all notebooks run without changes.\n",
46 |     "Some notebooks import from `utils.py` which is not availbale on colab.\n",
47 |     "You have to remove that line and copy and paste the required function/class into the notebook.\n",
48 |     "\n",
49 |     "It's easy to use colab. Simply append the url from the notebook on github to `https://colab.research.google.com/github/`. E.g. `notebooks/pytorch_basics.ipynb` is available under:\n",
50 |     "https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/pytorch_basics.ipynb)"
51 |    ]
52 |   }
53 |  ],
54 |  "metadata": {
55 |   "kernelspec": {
56 |    "display_name": "Python 3",
57 |    "language": "python",
58 |    "name": "python3"
59 |   },
60 |   "language_info": {
61 |    "codemirror_mode": {
62 |     "name": "ipython",
63 |     "version": 3
64 |    },
65 |    "file_extension": ".py",
66 |    "mimetype": "text/x-python",
67 |    "name": "python",
68 |    "nbconvert_exporter": "python",
69 |    "pygments_lexer": "ipython3",
70 |    "version": "3.8.5"
71 |   }
72 |  },
73 |  "nbformat": 4,
74 |  "nbformat_minor": 2
75 | }
76 | 


--------------------------------------------------------------------------------
/notebooks/0X_pytorch_in_googles_colab.py:
--------------------------------------------------------------------------------
 1 | # ---
 2 | # jupyter:
 3 | #   jupytext:
 4 | #     text_representation:
 5 | #       extension: .py
 6 | #       format_name: percent
 7 | #       format_version: '1.3'
 8 | #       jupytext_version: 1.7.1
 9 | #   kernelspec:
10 | #     display_name: Python 3
11 | #     language: python
12 | #     name: python3
13 | # ---
14 | 
15 | # %% [markdown]
16 | # # Using PyTorch + GPU/TPU in Google's Colab
17 | #
18 | # > Colaboratory is a Google research project created to help disseminate machine learning education and research. It's a Jupyter notebook environment that requires no setup to use and runs entirely in the cloud.
19 | # > Colaboratory notebooks are stored in Google Drive and can be shared just as you would with Google Docs or Sheets. Colaboratory is free to use.
20 | # > -- https://colab.research.google.com/notebooks/welcome.ipynb
21 | #
22 | # **Setup**
23 | # - Go to https://colab.research.google.com
24 | # - Create a new python 3 notebook
25 | # - Enable the GPU: "Edit -> Notebook settings -> Hardware accelerator: GPU -> Save"
26 | # - Then try the following:
27 | #
28 | # ```python
29 | # import torch
30 | #
31 | # print(torch.__version__)
32 | #
33 | # DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
34 | # print(DEVICE)
35 | # ```
36 | 
37 | # %% [markdown]
38 | # You should get something like this:
39 | # > 1.0.1.post2
40 | # >
41 | # > cuda
42 | 
43 | # %% [markdown]
44 | # # Using this Repo in Colab
45 | # You can use this repo with google colab,
46 | # but not all notebooks run without changes.
47 | # Some notebooks import from `utils.py` which is not availbale on colab.
48 | # You have to remove that line and copy and paste the required function/class into the notebook.
49 | #
50 | # It's easy to use colab. Simply append the url from the notebook on github to `https://colab.research.google.com/github/`. E.g. `notebooks/pytorch_basics.ipynb` is available under:
51 | # https://colab.research.google.com/github/sotte/pytorch_tutorial/blob/master/notebooks/pytorch_basics.ipynb)
52 | 


--------------------------------------------------------------------------------
/notebooks/0X_teacher_forcing.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# \"Teacher Forcing\"\n",
 8 |     "\n",
 9 |     "\"Teacher forcing\" is a method used in sequence2sequence models.\n",
10 |     "It replaces a wrong words in the predicted sequence with the correct one.\n",
11 |     "\n",
12 |     "Think of a teacher that corrects your translation as soon as you say a wrong word to prevent you going off on a tangent."
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "markdown",
17 |    "metadata": {},
18 |    "source": [
19 |     "Here is the pseudo code for teacher forcing:\n",
20 |     "\n",
21 |     "```python\n",
22 |     "class Seq2SeqModel(nn.Module):\n",
23 |     "    def __init__(self, p_teacher_forcing: float):\n",
24 |     "        self.p_teacher_forcing = p_teacher_forcing\n",
25 |     "        # ...\n",
26 |     "    \n",
27 |     "    def forward(self, X, y):\n",
28 |     "        # ... some calculation\n",
29 |     "        current_word = torch.zeros(...)\n",
30 |     "        result = []\n",
31 |     "        for i in range(self.sentence_length):\n",
32 |     "            # ... some calculation with current_word\n",
33 |     "            result.append(output)\n",
34 |     "            current_word = torch.argmax(output)\n",
35 |     "            \n",
36 |     "            # teacher forcing\n",
37 |     "            if self.p_teacher_forcing > random.random():\n",
38 |     "                current_word = y[i]\n",
39 |     "        \n",
40 |     "        return torch.stack(result)\n",
41 |     "```\n",
42 |     "\n",
43 |     "Reduce `p_teacher_forcing` during training and let it converge to 0."
44 |    ]
45 |   }
46 |  ],
47 |  "metadata": {
48 |   "kernelspec": {
49 |    "display_name": "Python 3",
50 |    "language": "python",
51 |    "name": "python3"
52 |   },
53 |   "language_info": {
54 |    "codemirror_mode": {
55 |     "name": "ipython",
56 |     "version": 3
57 |    },
58 |    "file_extension": ".py",
59 |    "mimetype": "text/x-python",
60 |    "name": "python",
61 |    "nbconvert_exporter": "python",
62 |    "pygments_lexer": "ipython3",
63 |    "version": "3.8.5"
64 |   }
65 |  },
66 |  "nbformat": 4,
67 |  "nbformat_minor": 2
68 | }
69 | 


--------------------------------------------------------------------------------
/notebooks/0X_teacher_forcing.py:
--------------------------------------------------------------------------------
 1 | # ---
 2 | # jupyter:
 3 | #   jupytext:
 4 | #     text_representation:
 5 | #       extension: .py
 6 | #       format_name: percent
 7 | #       format_version: '1.3'
 8 | #       jupytext_version: 1.7.1
 9 | #   kernelspec:
10 | #     display_name: Python 3
11 | #     language: python
12 | #     name: python3
13 | # ---
14 | 
15 | # %% [markdown]
16 | # # "Teacher Forcing"
17 | #
18 | # "Teacher forcing" is a method used in sequence2sequence models.
19 | # It replaces a wrong words in the predicted sequence with the correct one.
20 | #
21 | # Think of a teacher that corrects your translation as soon as you say a wrong word to prevent you going off on a tangent.
22 | 
23 | # %% [markdown]
24 | # Here is the pseudo code for teacher forcing:
25 | #
26 | # ```python
27 | # class Seq2SeqModel(nn.Module):
28 | #     def __init__(self, p_teacher_forcing: float):
29 | #         self.p_teacher_forcing = p_teacher_forcing
30 | #         # ...
31 | #     
32 | #     def forward(self, X, y):
33 | #         # ... some calculation
34 | #         current_word = torch.zeros(...)
35 | #         result = []
36 | #         for i in range(self.sentence_length):
37 | #             # ... some calculation with current_word
38 | #             result.append(output)
39 | #             current_word = torch.argmax(output)
40 | #             
41 | #             # teacher forcing
42 | #             if self.p_teacher_forcing > random.random():
43 | #                 current_word = y[i]
44 | #         
45 | #         return torch.stack(result)
46 | # ```
47 | #
48 | # Reduce `p_teacher_forcing` during training and let it converge to 0.
49 | 


--------------------------------------------------------------------------------
/notebooks/debugging.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Debugging"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "execution": {
 15 |      "iopub.execute_input": "2020-12-06T08:34:45.891872Z",
 16 |      "iopub.status.busy": "2020-12-06T08:34:45.891378Z",
 17 |      "iopub.status.idle": "2020-12-06T08:34:47.209690Z",
 18 |      "shell.execute_reply": "2020-12-06T08:34:47.210008Z"
 19 |     }
 20 |    },
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stderr",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       "/home/stefan/projects/pytorch_tutorial/.venv/lib/python3.8/site-packages/torch/cuda/__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at  /pytorch/c10/cuda/CUDAFunctions.cpp:100.)\n",
 27 |       "  return torch._C._cuda_getDeviceCount() > 0\n"
 28 |      ]
 29 |     }
 30 |    ],
 31 |    "source": [
 32 |     "import torch\n",
 33 |     "import torch.nn as nn\n",
 34 |     "import torch.nn.functional as F\n",
 35 |     "import torch.optim as optim\n",
 36 |     "import torchvision\n",
 37 |     "\n",
 38 |     "DEVICE = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "**Q: \"No debugger for your code. What do you think?\"**\n",
 46 |     "\n",
 47 |     "**A: \"I would NOT be able to code!\"**\n",
 48 |     "\n",
 49 |     "- Who does \"print-line-debugging\"?\n",
 50 |     "- Who likes debugging in tensorflow?\n",
 51 |     "- What is the intersection of those two groups?\n",
 52 |     "\n",
 53 |     "\n",
 54 |     "## IPDB cheatsheet\n",
 55 |     "IPython Debugger\n",
 56 |     "\n",
 57 |     "Taken from http://wangchuan.github.io/coding/2017/07/12/ipdb-cheat-sheet.html\n",
 58 |     "\n",
 59 |     "- h(help): Print help\n",
 60 |     "\n",
 61 |     "- n(ext): Continue execution until the next line in the current function is reached or it returns.\n",
 62 |     "- s(tep): Execute the current line, stop at the first possible occasion (either in a function that is called or in the current function).\n",
 63 |     "- r(eturn): Continue execution until the current function returns.\n",
 64 |     "- c(ont(inue)): Continue execution, only stop when a breakpoint is encountered.\n",
 65 |     "\n",
 66 |     "- r(eturn): Continue execution until the current function returns.\n",
 67 |     "- a(rgs): Print the argument list of the current function."
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "Note: Python 3.7 has `breakpoint()` built-in! [[PEP 553]](https://www.python.org/dev/peps/pep-0553/)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 2,
 80 |    "metadata": {
 81 |     "execution": {
 82 |      "iopub.execute_input": "2020-12-06T08:34:47.212530Z",
 83 |      "iopub.status.busy": "2020-12-06T08:34:47.212225Z",
 84 |      "iopub.status.idle": "2020-12-06T08:34:47.214085Z",
 85 |      "shell.execute_reply": "2020-12-06T08:34:47.213730Z"
 86 |     }
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "from IPython.core.debugger import set_trace"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 3,
 96 |    "metadata": {
 97 |     "execution": {
 98 |      "iopub.execute_input": "2020-12-06T08:34:47.219577Z",
 99 |      "iopub.status.busy": "2020-12-06T08:34:47.219206Z",
100 |      "iopub.status.idle": "2020-12-06T08:34:47.221480Z",
101 |      "shell.execute_reply": "2020-12-06T08:34:47.221160Z"
102 |     }
103 |    },
104 |    "outputs": [
105 |     {
106 |      "data": {
107 |       "text/plain": [
108 |        "54"
109 |       ]
110 |      },
111 |      "execution_count": 1,
112 |      "metadata": {},
113 |      "output_type": "execute_result"
114 |     }
115 |    ],
116 |    "source": [
117 |     "def my_function(x):\n",
118 |     "    answer = 42\n",
119 |     "    # set_trace()  # <-- uncomment!\n",
120 |     "    answer += x\n",
121 |     "    return answer\n",
122 |     "\n",
123 |     "my_function(12)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "## Example: debuging a NN"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 4,
136 |    "metadata": {
137 |     "execution": {
138 |      "iopub.execute_input": "2020-12-06T08:34:47.223812Z",
139 |      "iopub.status.busy": "2020-12-06T08:34:47.223510Z",
140 |      "iopub.status.idle": "2020-12-06T08:34:47.253488Z",
141 |      "shell.execute_reply": "2020-12-06T08:34:47.253136Z"
142 |     }
143 |    },
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "text/plain": [
148 |        "tensor([[0.4467, 0.0772, 0.7921],\n",
149 |        "        [0.8916, 0.5952, 0.2477],\n",
150 |        "        [0.9442, 0.3809, 0.3629],\n",
151 |        "        [0.3867, 0.1336, 0.0256],\n",
152 |        "        [0.1272, 0.6342, 0.3937]])"
153 |       ]
154 |      },
155 |      "execution_count": 1,
156 |      "metadata": {},
157 |      "output_type": "execute_result"
158 |     }
159 |    ],
160 |    "source": [
161 |     "X = torch.rand((5, 3))\n",
162 |     "X"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 5,
168 |    "metadata": {
169 |     "execution": {
170 |      "iopub.execute_input": "2020-12-06T08:34:47.256914Z",
171 |      "iopub.status.busy": "2020-12-06T08:34:47.256587Z",
172 |      "iopub.status.idle": "2020-12-06T08:34:47.262137Z",
173 |      "shell.execute_reply": "2020-12-06T08:34:47.261822Z"
174 |     }
175 |    },
176 |    "outputs": [],
177 |    "source": [
178 |     "class MyModule(nn.Module):\n",
179 |     "    def __init__(self):\n",
180 |     "        super().__init__()\n",
181 |     "        self.lin = nn.Linear(3, 1)\n",
182 |     "    \n",
183 |     "    def forward(self, X):\n",
184 |     "        # set_trace()\n",
185 |     "        x = self.lin(X)\n",
186 |     "        return X\n",
187 |     "\n",
188 |     "    \n",
189 |     "model = MyModule()\n",
190 |     "y_ = model(X)\n",
191 |     "\n",
192 |     "# assert y_.shape == (5, 1), y_.shape"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "## Debug Layer"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 6,
205 |    "metadata": {
206 |     "execution": {
207 |      "iopub.execute_input": "2020-12-06T08:34:47.264564Z",
208 |      "iopub.status.busy": "2020-12-06T08:34:47.264237Z",
209 |      "iopub.status.idle": "2020-12-06T08:34:47.265995Z",
210 |      "shell.execute_reply": "2020-12-06T08:34:47.265649Z"
211 |     }
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "class DebugModule(nn.Module):\n",
216 |     "    def forward(self, x):\n",
217 |     "        set_trace()\n",
218 |     "        return x"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": 7,
224 |    "metadata": {
225 |     "execution": {
226 |      "iopub.execute_input": "2020-12-06T08:34:47.268382Z",
227 |      "iopub.status.busy": "2020-12-06T08:34:47.268055Z",
228 |      "iopub.status.idle": "2020-12-06T08:34:47.269773Z",
229 |      "shell.execute_reply": "2020-12-06T08:34:47.269424Z"
230 |     }
231 |    },
232 |    "outputs": [],
233 |    "source": [
234 |     "model = nn.Sequential(\n",
235 |     "    nn.Linear(1, 5),\n",
236 |     "    DebugModule(),\n",
237 |     "    nn.Linear(5, 1),\n",
238 |     ")"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": 8,
244 |    "metadata": {
245 |     "execution": {
246 |      "iopub.execute_input": "2020-12-06T08:34:47.271824Z",
247 |      "iopub.status.busy": "2020-12-06T08:34:47.271492Z",
248 |      "iopub.status.idle": "2020-12-06T08:34:47.273920Z",
249 |      "shell.execute_reply": "2020-12-06T08:34:47.273570Z"
250 |     }
251 |    },
252 |    "outputs": [],
253 |    "source": [
254 |     "X = torch.unsqueeze(torch.tensor([1.]), dim=0)\n",
255 |     "# model(X)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "markdown",
260 |    "metadata": {},
261 |    "source": [
262 |     "## Tensorboard and `tensorboardX`\n",
263 |     "Tensorboard and `tensorboardX` are also great to debug a model, e.g. to look at the gradients."
264 |    ]
265 |   }
266 |  ],
267 |  "metadata": {
268 |   "kernelspec": {
269 |    "display_name": "Python 3",
270 |    "language": "python",
271 |    "name": "python3"
272 |   },
273 |   "language_info": {
274 |    "codemirror_mode": {
275 |     "name": "ipython",
276 |     "version": 3
277 |    },
278 |    "file_extension": ".py",
279 |    "mimetype": "text/x-python",
280 |    "name": "python",
281 |    "nbconvert_exporter": "python",
282 |    "pygments_lexer": "ipython3",
283 |    "version": "3.8.5"
284 |   }
285 |  },
286 |  "nbformat": 4,
287 |  "nbformat_minor": 2
288 | }
289 | 


--------------------------------------------------------------------------------
/notebooks/debugging.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     text_representation:
  5 | #       extension: .py
  6 | #       format_name: percent
  7 | #       format_version: '1.3'
  8 | #       jupytext_version: 1.7.1
  9 | #   kernelspec:
 10 | #     display_name: Python 3
 11 | #     language: python
 12 | #     name: python3
 13 | # ---
 14 | 
 15 | # %% [markdown]
 16 | # # Debugging
 17 | 
 18 | # %%
 19 | import torch
 20 | import torch.nn as nn
 21 | import torch.nn.functional as F
 22 | import torch.optim as optim
 23 | import torchvision
 24 | 
 25 | DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 26 | 
 27 | # %% [markdown]
 28 | # **Q: "No debugger for your code. What do you think?"**
 29 | #
 30 | # **A: "I would NOT be able to code!"**
 31 | #
 32 | # - Who does "print-line-debugging"?
 33 | # - Who likes debugging in tensorflow?
 34 | # - What is the intersection of those two groups?
 35 | #
 36 | #
 37 | # ## IPDB cheatsheet
 38 | # IPython Debugger
 39 | #
 40 | # Taken from http://wangchuan.github.io/coding/2017/07/12/ipdb-cheat-sheet.html
 41 | #
 42 | # - h(help): Print help
 43 | #
 44 | # - n(ext): Continue execution until the next line in the current function is reached or it returns.
 45 | # - s(tep): Execute the current line, stop at the first possible occasion (either in a function that is called or in the current function).
 46 | # - r(eturn): Continue execution until the current function returns.
 47 | # - c(ont(inue)): Continue execution, only stop when a breakpoint is encountered.
 48 | #
 49 | # - r(eturn): Continue execution until the current function returns.
 50 | # - a(rgs): Print the argument list of the current function.
 51 | 
 52 | # %% [markdown]
 53 | # Note: Python 3.7 has `breakpoint()` built-in! [[PEP 553]](https://www.python.org/dev/peps/pep-0553/)
 54 | 
 55 | # %%
 56 | from IPython.core.debugger import set_trace
 57 | 
 58 | 
 59 | # %%
 60 | def my_function(x):
 61 |     answer = 42
 62 |     # set_trace()  # <-- uncomment!
 63 |     answer += x
 64 |     return answer
 65 | 
 66 | my_function(12)
 67 | 
 68 | # %% [markdown]
 69 | # ## Example: debuging a NN
 70 | 
 71 | # %%
 72 | X = torch.rand((5, 3))
 73 | X
 74 | 
 75 | 
 76 | # %%
 77 | class MyModule(nn.Module):
 78 |     def __init__(self):
 79 |         super().__init__()
 80 |         self.lin = nn.Linear(3, 1)
 81 |     
 82 |     def forward(self, X):
 83 |         # set_trace()
 84 |         x = self.lin(X)
 85 |         return X
 86 | 
 87 |     
 88 | model = MyModule()
 89 | y_ = model(X)
 90 | 
 91 | # assert y_.shape == (5, 1), y_.shape
 92 | 
 93 | # %% [markdown]
 94 | # ## Debug Layer
 95 | 
 96 | # %%
 97 | class DebugModule(nn.Module):
 98 |     def forward(self, x):
 99 |         set_trace()
100 |         return x
101 | 
102 | 
103 | # %%
104 | model = nn.Sequential(
105 |     nn.Linear(1, 5),
106 |     DebugModule(),
107 |     nn.Linear(5, 1),
108 | )
109 | 
110 | # %%
111 | X = torch.unsqueeze(torch.tensor([1.]), dim=0)
112 | # model(X)
113 | 
114 | # %% [markdown]
115 | # ## Tensorboard and `tensorboardX`
116 | # Tensorboard and `tensorboardX` are also great to debug a model, e.g. to look at the gradients.
117 | 


--------------------------------------------------------------------------------
/notebooks/foreword.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "![](img/the_real_reason.png)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# Foreword\n",
 15 |     "\n",
 16 |     "Material for this tutorial is here: https://github.com/sotte/pytorch_tutorial\n",
 17 |     "\n",
 18 |     "**Prerequisites:**\n",
 19 |     "- you have implemented machine learning models yourself\n",
 20 |     "- you know what deep learning is\n",
 21 |     "- you have used numpy\n",
 22 |     "- maybe you have used tensorflow or similar libs\n",
 23 |     "\n",
 24 |     "- if you use PyTorch on a daily basis, this tutorial is probably not for you\n",
 25 |     "\n",
 26 |     "**Goals:**\n",
 27 |     "- understand PyTorch concepts\n",
 28 |     "- be able to use transfer learning in PyTorch\n",
 29 |     "- be aware of some handy tools/libs"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "Note:\n",
 37 |     "You don't need a GPU to work on this tutorial, but everything is much faster if you have one.\n",
 38 |     "However, you can use Google's Colab with a GPU and work on this tutorial:\n",
 39 |     "[PyTorch + GPU in Google's Colab](0X_pytorch_in_googles_colab.ipynb)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "# Agenda\n",
 47 |     "\n",
 48 |     "See README.md"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "# PyTorch Overview\n",
 56 |     "\n",
 57 |     "\n",
 58 |     "> \"PyTorch - Tensors and Dynamic neural networks in Python\n",
 59 |     "with strong GPU acceleration.\n",
 60 |     "PyTorch is a deep learning framework for fast, flexible experimentation.\"\n",
 61 |     ">\n",
 62 |     "> -- https://pytorch.org/*\n",
 63 |     "\n",
 64 |     "This was the tagline prior to PyTorch 1.0.\n",
 65 |     "Now it's:\n",
 66 |     "\n",
 67 |     "> \"PyTorch - From Research To Production\n",
 68 |     "> \n",
 69 |     "> An open source deep learning platform that provides a seamless path from research prototyping to production deployment.\""
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "## \"Build by run\" - what is that and why do I care?"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "![](img/dynamic_graph.gif)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "This is a much better explanation of PyTorch (I think)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 1,
 96 |    "metadata": {
 97 |     "execution": {
 98 |      "iopub.execute_input": "2020-12-06T08:34:48.900466Z",
 99 |      "iopub.status.busy": "2020-12-06T08:34:48.899857Z",
100 |      "iopub.status.idle": "2020-12-06T08:34:49.335919Z",
101 |      "shell.execute_reply": "2020-12-06T08:34:49.333857Z"
102 |     }
103 |    },
104 |    "outputs": [
105 |     {
106 |      "data": {
107 |       "text/plain": [
108 |        "tensor([[ 1.7654,  1.0075,  2.6082,  0.9247,  1.4656, -1.8325,  1.0004,  0.0030,\n",
109 |        "          1.6098,  1.7323]])"
110 |       ]
111 |      },
112 |      "execution_count": 1,
113 |      "metadata": {},
114 |      "output_type": "execute_result"
115 |     }
116 |    ],
117 |    "source": [
118 |     "import torch\n",
119 |     "from IPython.core.debugger import set_trace\n",
120 |     "\n",
121 |     "def f(x):\n",
122 |     "    res = x + x\n",
123 |     "    # set_trace()  # <-- OMG! =D\n",
124 |     "    return res\n",
125 |     "\n",
126 |     "x = torch.randn(1, 10)\n",
127 |     "f(x)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "I like pytorch because\n",
135 |     "- \"it's just stupid python\"\n",
136 |     "- easy to debug\n",
137 |     "- nice and extensible interface\n",
138 |     "- research-y feel\n",
139 |     "- research is often published as pytorch project"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "## A word about TF\n",
147 |     "TF 2 is about to be released.\n",
148 |     "- eager by default\n",
149 |     "- API cleanup\n",
150 |     "- No more `session.run()`, `tf.control_dependencies()`, `tf.while_loop()`, `tf.cond()`, `tf.global_variables_initializer()`, etc.\n",
151 |     "\n",
152 |     "## TF and PyTorch\n",
153 |     "- static vs dynamic\n",
154 |     "- production vs prototyping "
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "## *\"The tyranny of choice\"*\n",
162 |     "- TensorFlow\n",
163 |     "- MXNet\n",
164 |     "- Keras\n",
165 |     "- CNTK\n",
166 |     "- Chainer\n",
167 |     "- caffe\n",
168 |     "- caffe2\n",
169 |     "- many many more\n",
170 |     "\n",
171 |     "All of them a good!\n"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "# References\n",
179 |     "- Twitter: https://twitter.com/PyTorch\n",
180 |     "- Forum: https://discuss.pytorch.org/\n",
181 |     "- Tutorials: https://pytorch.org/tutorials/\n",
182 |     "- Examples: https://github.com/pytorch/examples\n",
183 |     "- API Reference: https://pytorch.org/docs/stable/index.html\n",
184 |     "- Torchvision: https://pytorch.org/docs/stable/torchvision/index.html\n",
185 |     "- PyTorch Text: https://github.com/pytorch/text\n",
186 |     "- PyTorch Audio: https://github.com/pytorch/audio\n",
187 |     "- AllenNLP: https://allennlp.org/\n",
188 |     "- Object detection/segmentation: https://github.com/facebookresearch/maskrcnn-benchmark\n",
189 |     "- Facebook AI Research Sequence-to-Sequence Toolkit written in PyTorch: https://github.com/pytorch/fairseq\n",
190 |     "- FastAI http://www.fast.ai/\n",
191 |     "- Stanford CS230 Deep Learning notes https://cs230-stanford.github.io"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "# Example Network\n",
199 |     "Just to get an idea of how PyTorch feels like here are some examples of networks."
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 2,
205 |    "metadata": {
206 |     "execution": {
207 |      "iopub.execute_input": "2020-12-06T08:34:49.349448Z",
208 |      "iopub.status.busy": "2020-12-06T08:34:49.347611Z",
209 |      "iopub.status.idle": "2020-12-06T08:34:49.354574Z",
210 |      "shell.execute_reply": "2020-12-06T08:34:49.352797Z"
211 |     }
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "from collections import OrderedDict\n",
216 |     "\n",
217 |     "import torch                     # basic tensor functions\n",
218 |     "import torch.nn as nn            # everything neural network\n",
219 |     "import torch.nn.functional as F  # functional/stateless version of nn\n",
220 |     "import torch.optim as optim      # optimizers :)"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 3,
226 |    "metadata": {
227 |     "execution": {
228 |      "iopub.execute_input": "2020-12-06T08:34:49.367216Z",
229 |      "iopub.status.busy": "2020-12-06T08:34:49.365788Z",
230 |      "iopub.status.idle": "2020-12-06T08:34:49.372334Z",
231 |      "shell.execute_reply": "2020-12-06T08:34:49.371472Z"
232 |     }
233 |    },
234 |    "outputs": [],
235 |    "source": [
236 |     "# Simple sequential model\n",
237 |     "model = nn.Sequential(\n",
238 |     "    nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5),\n",
239 |     "    nn.ReLU(),\n",
240 |     "    nn.Conv2d(20, 64, 5),\n",
241 |     "    nn.ReLU(),\n",
242 |     "    nn.AdaptiveAvgPool2d(1),\n",
243 |     ")"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 4,
249 |    "metadata": {
250 |     "execution": {
251 |      "iopub.execute_input": "2020-12-06T08:34:49.377843Z",
252 |      "iopub.status.busy": "2020-12-06T08:34:49.377031Z",
253 |      "iopub.status.idle": "2020-12-06T08:34:49.381617Z",
254 |      "shell.execute_reply": "2020-12-06T08:34:49.380797Z"
255 |     }
256 |    },
257 |    "outputs": [
258 |     {
259 |      "data": {
260 |       "text/plain": [
261 |        "Sequential(\n",
262 |        "  (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))\n",
263 |        "  (1): ReLU()\n",
264 |        "  (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))\n",
265 |        "  (3): ReLU()\n",
266 |        "  (4): AdaptiveAvgPool2d(output_size=1)\n",
267 |        ")"
268 |       ]
269 |      },
270 |      "execution_count": 1,
271 |      "metadata": {},
272 |      "output_type": "execute_result"
273 |     }
274 |    ],
275 |    "source": [
276 |     "model"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": 5,
282 |    "metadata": {
283 |     "execution": {
284 |      "iopub.execute_input": "2020-12-06T08:34:49.386586Z",
285 |      "iopub.status.busy": "2020-12-06T08:34:49.385730Z",
286 |      "iopub.status.idle": "2020-12-06T08:34:49.409117Z",
287 |      "shell.execute_reply": "2020-12-06T08:34:49.409722Z"
288 |     }
289 |    },
290 |    "outputs": [
291 |     {
292 |      "data": {
293 |       "text/plain": [
294 |        "torch.Size([16, 64, 1, 1])"
295 |       ]
296 |      },
297 |      "execution_count": 1,
298 |      "metadata": {},
299 |      "output_type": "execute_result"
300 |     }
301 |    ],
302 |    "source": [
303 |     "# forward pass\n",
304 |     "model(torch.rand(16, 1, 32, 32)).shape"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 6,
310 |    "metadata": {
311 |     "execution": {
312 |      "iopub.execute_input": "2020-12-06T08:34:49.414480Z",
313 |      "iopub.status.busy": "2020-12-06T08:34:49.413860Z",
314 |      "iopub.status.idle": "2020-12-06T08:34:49.417378Z",
315 |      "shell.execute_reply": "2020-12-06T08:34:49.417808Z"
316 |     }
317 |    },
318 |    "outputs": [
319 |     {
320 |      "data": {
321 |       "text/plain": [
322 |        "Sequential(\n",
323 |        "  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))\n",
324 |        "  (relu1): ReLU()\n",
325 |        "  (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))\n",
326 |        "  (relu2): ReLU()\n",
327 |        "  (aavgp): AdaptiveAvgPool2d(output_size=1)\n",
328 |        ")"
329 |       ]
330 |      },
331 |      "execution_count": 1,
332 |      "metadata": {},
333 |      "output_type": "execute_result"
334 |     }
335 |    ],
336 |    "source": [
337 |     "# Simple sequential model with named layers\n",
338 |     "layers = OrderedDict([\n",
339 |     "    (\"conv1\", nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5)),\n",
340 |     "    (\"relu1\", nn.ReLU()),\n",
341 |     "    (\"conv2\", nn.Conv2d(20,64,5)),\n",
342 |     "    (\"relu2\", nn.ReLU()),\n",
343 |     "    (\"aavgp\", nn.AdaptiveAvgPool2d(1)),\n",
344 |     "])\n",
345 |     "model = nn.Sequential(layers)\n",
346 |     "model"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": 7,
352 |    "metadata": {
353 |     "execution": {
354 |      "iopub.execute_input": "2020-12-06T08:34:49.423476Z",
355 |      "iopub.status.busy": "2020-12-06T08:34:49.423016Z",
356 |      "iopub.status.idle": "2020-12-06T08:34:49.426564Z",
357 |      "shell.execute_reply": "2020-12-06T08:34:49.426224Z"
358 |     }
359 |    },
360 |    "outputs": [
361 |     {
362 |      "data": {
363 |       "text/plain": [
364 |        "Net(\n",
365 |        "  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))\n",
366 |        "  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
367 |        "  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n",
368 |        "  (fc1): Linear(in_features=400, out_features=120, bias=True)\n",
369 |        "  (fc2): Linear(in_features=120, out_features=84, bias=True)\n",
370 |        "  (fc3): Linear(in_features=84, out_features=10, bias=True)\n",
371 |        ")"
372 |       ]
373 |      },
374 |      "execution_count": 1,
375 |      "metadata": {},
376 |      "output_type": "execute_result"
377 |     }
378 |    ],
379 |    "source": [
380 |     "class Net(nn.Module):\n",
381 |     "    def __init__(self):\n",
382 |     "        super(Net, self).__init__()\n",
383 |     "        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5)\n",
384 |     "        self.pool = nn.MaxPool2d(2, 2)\n",
385 |     "        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)\n",
386 |     "        self.fc1 = nn.Linear(in_features=16 * 5 * 5, out_features=120)\n",
387 |     "        self.fc2 = nn.Linear(120, 84)\n",
388 |     "        self.fc3 = nn.Linear(84, 10)\n",
389 |     "\n",
390 |     "    def forward(self, x):\n",
391 |     "        x = self.pool(F.relu(self.conv1(x)))\n",
392 |     "        x = self.pool(F.relu(self.conv2(x)))\n",
393 |     "        x = x.view(-1, 16 * 5 * 5)\n",
394 |     "        x = F.relu(self.fc1(x))\n",
395 |     "        x = F.relu(self.fc2(x))\n",
396 |     "        x = self.fc3(x)\n",
397 |     "        x = F.adaptive_avg_pool2d(x, 1)\n",
398 |     "        return x\n",
399 |     "\n",
400 |     "\n",
401 |     "model = Net()\n",
402 |     "model"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "markdown",
407 |    "metadata": {},
408 |    "source": [
409 |     "# Versions"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": 8,
415 |    "metadata": {
416 |     "execution": {
417 |      "iopub.execute_input": "2020-12-06T08:34:49.428867Z",
418 |      "iopub.status.busy": "2020-12-06T08:34:49.428514Z",
419 |      "iopub.status.idle": "2020-12-06T08:34:49.430642Z",
420 |      "shell.execute_reply": "2020-12-06T08:34:49.430276Z"
421 |     }
422 |    },
423 |    "outputs": [
424 |     {
425 |      "data": {
426 |       "text/plain": [
427 |        "'1.7.0'"
428 |       ]
429 |      },
430 |      "execution_count": 1,
431 |      "metadata": {},
432 |      "output_type": "execute_result"
433 |     }
434 |    ],
435 |    "source": [
436 |     "import torch\n",
437 |     "torch.__version__"
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "code",
442 |    "execution_count": 9,
443 |    "metadata": {
444 |     "execution": {
445 |      "iopub.execute_input": "2020-12-06T08:34:49.432667Z",
446 |      "iopub.status.busy": "2020-12-06T08:34:49.432346Z",
447 |      "iopub.status.idle": "2020-12-06T08:34:49.464593Z",
448 |      "shell.execute_reply": "2020-12-06T08:34:49.464259Z"
449 |     }
450 |    },
451 |    "outputs": [
452 |     {
453 |      "data": {
454 |       "text/plain": [
455 |        "'0.8.1'"
456 |       ]
457 |      },
458 |      "execution_count": 1,
459 |      "metadata": {},
460 |      "output_type": "execute_result"
461 |     }
462 |    ],
463 |    "source": [
464 |     "import torchvision\n",
465 |     "torchvision.__version__"
466 |    ]
467 |   },
468 |   {
469 |    "cell_type": "code",
470 |    "execution_count": 10,
471 |    "metadata": {
472 |     "execution": {
473 |      "iopub.execute_input": "2020-12-06T08:34:49.466917Z",
474 |      "iopub.status.busy": "2020-12-06T08:34:49.466467Z",
475 |      "iopub.status.idle": "2020-12-06T08:34:49.468544Z",
476 |      "shell.execute_reply": "2020-12-06T08:34:49.468230Z"
477 |     }
478 |    },
479 |    "outputs": [
480 |     {
481 |      "data": {
482 |       "text/plain": [
483 |        "'1.19.4'"
484 |       ]
485 |      },
486 |      "execution_count": 1,
487 |      "metadata": {},
488 |      "output_type": "execute_result"
489 |     }
490 |    ],
491 |    "source": [
492 |     "import numpy as np\n",
493 |     "np.__version__"
494 |    ]
495 |   }
496 |  ],
497 |  "metadata": {
498 |   "kernelspec": {
499 |    "display_name": "Python 3",
500 |    "language": "python",
501 |    "name": "python3"
502 |   },
503 |   "language_info": {
504 |    "codemirror_mode": {
505 |     "name": "ipython",
506 |     "version": 3
507 |    },
508 |    "file_extension": ".py",
509 |    "mimetype": "text/x-python",
510 |    "name": "python",
511 |    "nbconvert_exporter": "python",
512 |    "pygments_lexer": "ipython3",
513 |    "version": "3.8.5"
514 |   },
515 |   "toc-autonumbering": true,
516 |   "toc-showcode": false,
517 |   "toc-showmarkdowntxt": false
518 |  },
519 |  "nbformat": 4,
520 |  "nbformat_minor": 4
521 | }
522 | 


--------------------------------------------------------------------------------
/notebooks/foreword.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     text_representation:
  5 | #       extension: .py
  6 | #       format_name: percent
  7 | #       format_version: '1.3'
  8 | #       jupytext_version: 1.7.1
  9 | #   kernelspec:
 10 | #     display_name: Python 3
 11 | #     language: python
 12 | #     name: python3
 13 | # ---
 14 | 
 15 | # %% [markdown]
 16 | # ![](img/the_real_reason.png)
 17 | 
 18 | # %% [markdown]
 19 | # # Foreword
 20 | #
 21 | # Material for this tutorial is here: https://github.com/sotte/pytorch_tutorial
 22 | #
 23 | # **Prerequisites:**
 24 | # - you have implemented machine learning models yourself
 25 | # - you know what deep learning is
 26 | # - you have used numpy
 27 | # - maybe you have used tensorflow or similar libs
 28 | #
 29 | # - if you use PyTorch on a daily basis, this tutorial is probably not for you
 30 | #
 31 | # **Goals:**
 32 | # - understand PyTorch concepts
 33 | # - be able to use transfer learning in PyTorch
 34 | # - be aware of some handy tools/libs
 35 | 
 36 | # %% [markdown]
 37 | # Note:
 38 | # You don't need a GPU to work on this tutorial, but everything is much faster if you have one.
 39 | # However, you can use Google's Colab with a GPU and work on this tutorial:
 40 | # [PyTorch + GPU in Google's Colab](0X_pytorch_in_googles_colab.ipynb)
 41 | 
 42 | # %% [markdown]
 43 | # # Agenda
 44 | #
 45 | # See README.md
 46 | 
 47 | # %% [markdown]
 48 | # # PyTorch Overview
 49 | #
 50 | #
 51 | # > "PyTorch - Tensors and Dynamic neural networks in Python
 52 | # with strong GPU acceleration.
 53 | # PyTorch is a deep learning framework for fast, flexible experimentation."
 54 | # >
 55 | # > -- https://pytorch.org/*
 56 | #
 57 | # This was the tagline prior to PyTorch 1.0.
 58 | # Now it's:
 59 | #
 60 | # > "PyTorch - From Research To Production
 61 | # > 
 62 | # > An open source deep learning platform that provides a seamless path from research prototyping to production deployment."
 63 | 
 64 | # %% [markdown]
 65 | # ## "Build by run" - what is that and why do I care?
 66 | 
 67 | # %% [markdown]
 68 | # ![](img/dynamic_graph.gif)
 69 | 
 70 | # %% [markdown]
 71 | # This is a much better explanation of PyTorch (I think)
 72 | 
 73 | # %%
 74 | import torch
 75 | from IPython.core.debugger import set_trace
 76 | 
 77 | def f(x):
 78 |     res = x + x
 79 |     # set_trace()  # <-- OMG! =D
 80 |     return res
 81 | 
 82 | x = torch.randn(1, 10)
 83 | f(x)
 84 | 
 85 | # %% [markdown]
 86 | # I like pytorch because
 87 | # - "it's just stupid python"
 88 | # - easy to debug
 89 | # - nice and extensible interface
 90 | # - research-y feel
 91 | # - research is often published as pytorch project
 92 | 
 93 | # %% [markdown]
 94 | # ## A word about TF
 95 | # TF 2 is about to be released.
 96 | # - eager by default
 97 | # - API cleanup
 98 | # - No more `session.run()`, `tf.control_dependencies()`, `tf.while_loop()`, `tf.cond()`, `tf.global_variables_initializer()`, etc.
 99 | #
100 | # ## TF and PyTorch
101 | # - static vs dynamic
102 | # - production vs prototyping 
103 | 
104 | # %% [markdown]
105 | # ## *"The tyranny of choice"*
106 | # - TensorFlow
107 | # - MXNet
108 | # - Keras
109 | # - CNTK
110 | # - Chainer
111 | # - caffe
112 | # - caffe2
113 | # - many many more
114 | #
115 | # All of them a good!
116 | #
117 | 
118 | # %% [markdown]
119 | # # References
120 | # - Twitter: https://twitter.com/PyTorch
121 | # - Forum: https://discuss.pytorch.org/
122 | # - Tutorials: https://pytorch.org/tutorials/
123 | # - Examples: https://github.com/pytorch/examples
124 | # - API Reference: https://pytorch.org/docs/stable/index.html
125 | # - Torchvision: https://pytorch.org/docs/stable/torchvision/index.html
126 | # - PyTorch Text: https://github.com/pytorch/text
127 | # - PyTorch Audio: https://github.com/pytorch/audio
128 | # - AllenNLP: https://allennlp.org/
129 | # - Object detection/segmentation: https://github.com/facebookresearch/maskrcnn-benchmark
130 | # - Facebook AI Research Sequence-to-Sequence Toolkit written in PyTorch: https://github.com/pytorch/fairseq
131 | # - FastAI http://www.fast.ai/
132 | # - Stanford CS230 Deep Learning notes https://cs230-stanford.github.io
133 | 
134 | # %% [markdown]
135 | # # Example Network
136 | # Just to get an idea of how PyTorch feels like here are some examples of networks.
137 | 
138 | # %%
139 | from collections import OrderedDict
140 | 
141 | import torch                     # basic tensor functions
142 | import torch.nn as nn            # everything neural network
143 | import torch.nn.functional as F  # functional/stateless version of nn
144 | import torch.optim as optim      # optimizers :)
145 | 
146 | # %%
147 | # Simple sequential model
148 | model = nn.Sequential(
149 |     nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5),
150 |     nn.ReLU(),
151 |     nn.Conv2d(20, 64, 5),
152 |     nn.ReLU(),
153 |     nn.AdaptiveAvgPool2d(1),
154 | )
155 | 
156 | # %%
157 | model
158 | 
159 | # %%
160 | # forward pass
161 | model(torch.rand(16, 1, 32, 32)).shape
162 | 
163 | # %%
164 | # Simple sequential model with named layers
165 | layers = OrderedDict([
166 |     ("conv1", nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5)),
167 |     ("relu1", nn.ReLU()),
168 |     ("conv2", nn.Conv2d(20,64,5)),
169 |     ("relu2", nn.ReLU()),
170 |     ("aavgp", nn.AdaptiveAvgPool2d(1)),
171 | ])
172 | model = nn.Sequential(layers)
173 | model
174 | 
175 | 
176 | # %%
177 | class Net(nn.Module):
178 |     def __init__(self):
179 |         super(Net, self).__init__()
180 |         self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5)
181 |         self.pool = nn.MaxPool2d(2, 2)
182 |         self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
183 |         self.fc1 = nn.Linear(in_features=16 * 5 * 5, out_features=120)
184 |         self.fc2 = nn.Linear(120, 84)
185 |         self.fc3 = nn.Linear(84, 10)
186 | 
187 |     def forward(self, x):
188 |         x = self.pool(F.relu(self.conv1(x)))
189 |         x = self.pool(F.relu(self.conv2(x)))
190 |         x = x.view(-1, 16 * 5 * 5)
191 |         x = F.relu(self.fc1(x))
192 |         x = F.relu(self.fc2(x))
193 |         x = self.fc3(x)
194 |         x = F.adaptive_avg_pool2d(x, 1)
195 |         return x
196 | 
197 | 
198 | model = Net()
199 | model
200 | 
201 | # %% [markdown]
202 | # # Versions
203 | 
204 | # %%
205 | import torch
206 | torch.__version__
207 | 
208 | # %%
209 | import torchvision
210 | torchvision.__version__
211 | 
212 | # %%
213 | import numpy as np
214 | np.__version__
215 | 


--------------------------------------------------------------------------------
/notebooks/hooks.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Hooks\n",
  8 |     "Hooks are simple functions that can be registered to be called during the forward or backward pass of a `nn.Module`.\n",
  9 |     "These functions can be used to print out information or modify the module.\n",
 10 |     "\n",
 11 |     "Here is a simple forward hook example that prints some information about the input and output of a module.\n",
 12 |     "\n",
 13 |     "Tip: Don't forget to remove the hook afterwards!\n",
 14 |     "\n",
 15 |     "Ref:\n",
 16 |     "- https://pytorch.org/tutorials/beginner/former_torchies/nn_tutorial.html#forward-and-backward-function-hooks\n",
 17 |     "- https://pytorch.org/docs/master/nn.html#torch.nn.Module.register_forward_hook\n",
 18 |     "- https://pytorch.org/docs/master/nn.html#torch.nn.Module.register_forward_pre_hook\n",
 19 |     "- https://pytorch.org/docs/master/nn.html#torch.nn.Module.register_backward_hook"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 1,
 25 |    "metadata": {
 26 |     "execution": {
 27 |      "iopub.execute_input": "2020-12-06T08:34:50.972704Z",
 28 |      "iopub.status.busy": "2020-12-06T08:34:50.971990Z",
 29 |      "iopub.status.idle": "2020-12-06T08:34:50.975122Z",
 30 |      "shell.execute_reply": "2020-12-06T08:34:50.974327Z"
 31 |     }
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "def tensorinfo_hook(module, input_, output):\n",
 36 |     "    \"\"\"\n",
 37 |     "    Register this forward hook to print some infos about the tensor/module.\n",
 38 |     "\n",
 39 |     "    Example:\n",
 40 |     "\n",
 41 |     "        >>> from torchvision.models import resnet18\n",
 42 |     "        >>> model = resnet18(pretrained=False)\n",
 43 |     "        >>> hook_fc = model.fc.register_forward_hook(tensorinfo_hook)\n",
 44 |     "        >>> # model(torch.ones(1, 3, 244, 244))\n",
 45 |     "        >>> hook_fc.remove()\n",
 46 |     "\n",
 47 |     "    \"\"\"\n",
 48 |     "    print(f\"Inside '{module.__class__.__name__}' forward\")\n",
 49 |     "    print(f\"  input:     {str(type(input_)):<25}\")\n",
 50 |     "    print(f\"  input[0]:  {str(type(input_[0])):<25} {input_[0].size()}\")\n",
 51 |     "    print(f\"  output:    {str(type(output)):<25} {output.data.size()}\")\n",
 52 |     "    print()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 2,
 58 |    "metadata": {
 59 |     "execution": {
 60 |      "iopub.execute_input": "2020-12-06T08:34:50.979331Z",
 61 |      "iopub.status.busy": "2020-12-06T08:34:50.978607Z",
 62 |      "iopub.status.idle": "2020-12-06T08:34:51.322078Z",
 63 |      "shell.execute_reply": "2020-12-06T08:34:51.322341Z"
 64 |     }
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "import torch\n",
 69 |     "import torch.nn as nn"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 3,
 75 |    "metadata": {
 76 |     "execution": {
 77 |      "iopub.execute_input": "2020-12-06T08:34:51.324816Z",
 78 |      "iopub.status.busy": "2020-12-06T08:34:51.324506Z",
 79 |      "iopub.status.idle": "2020-12-06T08:34:51.325876Z",
 80 |      "shell.execute_reply": "2020-12-06T08:34:51.326156Z"
 81 |     }
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "m = nn.Linear(1, 3)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 4,
 91 |    "metadata": {
 92 |     "execution": {
 93 |      "iopub.execute_input": "2020-12-06T08:34:51.328097Z",
 94 |      "iopub.status.busy": "2020-12-06T08:34:51.327781Z",
 95 |      "iopub.status.idle": "2020-12-06T08:34:51.329533Z",
 96 |      "shell.execute_reply": "2020-12-06T08:34:51.329223Z"
 97 |     }
 98 |    },
 99 |    "outputs": [],
100 |    "source": [
101 |     "hook = m.register_forward_hook(tensorinfo_hook)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 5,
107 |    "metadata": {
108 |     "execution": {
109 |      "iopub.execute_input": "2020-12-06T08:34:51.331641Z",
110 |      "iopub.status.busy": "2020-12-06T08:34:51.331334Z",
111 |      "iopub.status.idle": "2020-12-06T08:34:51.337940Z",
112 |      "shell.execute_reply": "2020-12-06T08:34:51.337595Z"
113 |     }
114 |    },
115 |    "outputs": [
116 |     {
117 |      "name": "stdout",
118 |      "output_type": "stream",
119 |      "text": [
120 |       "Inside 'Linear' forward\n",
121 |       "  input:     <class 'tuple'>          \n",
122 |       "  input[0]:  <class 'torch.Tensor'>    torch.Size([1])\n",
123 |       "  output:    <class 'torch.Tensor'>    torch.Size([3])\n",
124 |       "\n"
125 |      ]
126 |     },
127 |     {
128 |      "data": {
129 |       "text/plain": [
130 |        "tensor([ 0.3400, -1.6105,  0.1007], grad_fn=<AddBackward0>)"
131 |       ]
132 |      },
133 |      "execution_count": 1,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "m(torch.rand(1));"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 6,
145 |    "metadata": {
146 |     "execution": {
147 |      "iopub.execute_input": "2020-12-06T08:34:51.339936Z",
148 |      "iopub.status.busy": "2020-12-06T08:34:51.339628Z",
149 |      "iopub.status.idle": "2020-12-06T08:34:51.341377Z",
150 |      "shell.execute_reply": "2020-12-06T08:34:51.341055Z"
151 |     }
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "hook.remove()"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "## Exercise\n",
163 |     "- Write a context manager hook that removes the hook when leaving the with block."
164 |    ]
165 |   }
166 |  ],
167 |  "metadata": {
168 |   "kernelspec": {
169 |    "display_name": "Python 3",
170 |    "language": "python",
171 |    "name": "python3"
172 |   },
173 |   "language_info": {
174 |    "codemirror_mode": {
175 |     "name": "ipython",
176 |     "version": 3
177 |    },
178 |    "file_extension": ".py",
179 |    "mimetype": "text/x-python",
180 |    "name": "python",
181 |    "nbconvert_exporter": "python",
182 |    "pygments_lexer": "ipython3",
183 |    "version": "3.8.5"
184 |   }
185 |  },
186 |  "nbformat": 4,
187 |  "nbformat_minor": 2
188 | }
189 | 


--------------------------------------------------------------------------------
/notebooks/hooks.py:
--------------------------------------------------------------------------------
 1 | # ---
 2 | # jupyter:
 3 | #   jupytext:
 4 | #     text_representation:
 5 | #       extension: .py
 6 | #       format_name: percent
 7 | #       format_version: '1.3'
 8 | #       jupytext_version: 1.7.1
 9 | #   kernelspec:
10 | #     display_name: Python 3
11 | #     language: python
12 | #     name: python3
13 | # ---
14 | 
15 | # %% [markdown]
16 | # # Hooks
17 | # Hooks are simple functions that can be registered to be called during the forward or backward pass of a `nn.Module`.
18 | # These functions can be used to print out information or modify the module.
19 | #
20 | # Here is a simple forward hook example that prints some information about the input and output of a module.
21 | #
22 | # Tip: Don't forget to remove the hook afterwards!
23 | #
24 | # Ref:
25 | # - https://pytorch.org/tutorials/beginner/former_torchies/nn_tutorial.html#forward-and-backward-function-hooks
26 | # - https://pytorch.org/docs/master/nn.html#torch.nn.Module.register_forward_hook
27 | # - https://pytorch.org/docs/master/nn.html#torch.nn.Module.register_forward_pre_hook
28 | # - https://pytorch.org/docs/master/nn.html#torch.nn.Module.register_backward_hook
29 | 
30 | # %%
31 | def tensorinfo_hook(module, input_, output):
32 |     """
33 |     Register this forward hook to print some infos about the tensor/module.
34 | 
35 |     Example:
36 | 
37 |         >>> from torchvision.models import resnet18
38 |         >>> model = resnet18(pretrained=False)
39 |         >>> hook_fc = model.fc.register_forward_hook(tensorinfo_hook)
40 |         >>> # model(torch.ones(1, 3, 244, 244))
41 |         >>> hook_fc.remove()
42 | 
43 |     """
44 |     print(f"Inside '{module.__class__.__name__}' forward")
45 |     print(f"  input:     {str(type(input_)):<25}")
46 |     print(f"  input[0]:  {str(type(input_[0])):<25} {input_[0].size()}")
47 |     print(f"  output:    {str(type(output)):<25} {output.data.size()}")
48 |     print()
49 | 
50 | 
51 | # %%
52 | import torch
53 | import torch.nn as nn
54 | 
55 | # %%
56 | m = nn.Linear(1, 3)
57 | 
58 | # %%
59 | hook = m.register_forward_hook(tensorinfo_hook)
60 | 
61 | # %%
62 | m(torch.rand(1));
63 | 
64 | # %%
65 | hook.remove()
66 | 
67 | # %% [markdown]
68 | # ## Exercise
69 | # - Write a context manager hook that removes the hook when leaving the with block.
70 | 


--------------------------------------------------------------------------------
/notebooks/img/common_mistakes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sotte/pytorch_tutorial/2b0e44df0d7f19e68237481213a89479f980969d/notebooks/img/common_mistakes.png


--------------------------------------------------------------------------------
/notebooks/img/dynamic_graph.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sotte/pytorch_tutorial/2b0e44df0d7f19e68237481213a89479f980969d/notebooks/img/dynamic_graph.gif


--------------------------------------------------------------------------------
/notebooks/img/ml_debt.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sotte/pytorch_tutorial/2b0e44df0d7f19e68237481213a89479f980969d/notebooks/img/ml_debt.jpg


--------------------------------------------------------------------------------
/notebooks/img/pytorch-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sotte/pytorch_tutorial/2b0e44df0d7f19e68237481213a89479f980969d/notebooks/img/pytorch-logo.png


--------------------------------------------------------------------------------
/notebooks/img/pytorch_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sotte/pytorch_tutorial/2b0e44df0d7f19e68237481213a89479f980969d/notebooks/img/pytorch_logo.png


--------------------------------------------------------------------------------
/notebooks/img/pytorch_logo_flame.png:
--------------------------------------------------------------------------------
 1 | --2019-03-18 14:43:17--  https://pytorch.org/assets/images/pytorch-logo.png
 2 | Resolving pytorch.org (pytorch.org)... 185.199.108.153
 3 | Connecting to pytorch.org (pytorch.org)|185.199.108.153|:443... connected.
 4 | HTTP request sent, awaiting response... 200 OK
 5 | Length: 22916 (22K) [image/png]
 6 | Saving to: ‘pytorch-logo.png’
 7 | 
 8 |      0K .......... .......... ..                              100%  664K=0,03s
 9 | 
10 | 2019-03-18 14:43:18 (664 KB/s) - ‘pytorch-logo.png’ saved [22916/22916]
11 | 
12 | 


--------------------------------------------------------------------------------
/notebooks/img/software_vs_ml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sotte/pytorch_tutorial/2b0e44df0d7f19e68237481213a89479f980969d/notebooks/img/software_vs_ml.png


--------------------------------------------------------------------------------
/notebooks/img/tensorboardx_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sotte/pytorch_tutorial/2b0e44df0d7f19e68237481213a89479f980969d/notebooks/img/tensorboardx_demo.gif


--------------------------------------------------------------------------------
/notebooks/img/tensorboardx_demo2.gif:
--------------------------------------------------------------------------------
 1 | --2018-07-05 11:05:43--  https://raw.githubusercontent.com/lanpa/tensorboard-pytorch/master/screenshots/Demo.gif
 2 | Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.36.133
 3 | Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.36.133|:443... connected.
 4 | HTTP request sent, awaiting response... 200 OK
 5 | Length: 490837 (479K) [image/gif]
 6 | Saving to: ‘Demo.gif.1’
 7 | 
 8 |      0K .......... .......... .......... .......... .......... 10%  959K 0s
 9 |     50K .......... .......... .......... .......... .......... 20% 2.48M 0s
10 |    100K .......... .......... .......... .......... .......... 31% 5.91M 0s
11 |    150K .......... .......... .......... .......... .......... 41% 3.05M 0s
12 |    200K .......... .......... .......... .......... .......... 52% 4.57M 0s
13 |    250K .......... .......... .......... .......... .......... 62% 4.07M 0s
14 |    300K .......... .......... .......... .......... .......... 73% 4.67M 0s
15 |    350K .......... .......... .......... .......... .......... 83% 3.12M 0s
16 |    400K .......... .......... .......... .......... .......... 93% 5.47M 0s
17 |    450K .......... .......... .........                       100% 6.33M=0.2s
18 | 
19 | 2018-07-05 11:05:43 (2.96 MB/s) - ‘Demo.gif.1’ saved [490837/490837]
20 | 
21 | 


--------------------------------------------------------------------------------
/notebooks/img/the_real_reason.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sotte/pytorch_tutorial/2b0e44df0d7f19e68237481213a89479f980969d/notebooks/img/the_real_reason.png


--------------------------------------------------------------------------------
/notebooks/img/visdom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sotte/pytorch_tutorial/2b0e44df0d7f19e68237481213a89479f980969d/notebooks/img/visdom.png


--------------------------------------------------------------------------------
/notebooks/lin_reg.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# LinReg with PyTorch, Gradient Descent, and GPU"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Init, helpers, utils ..."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "execution": {
 22 |      "iopub.execute_input": "2020-12-06T08:34:52.845595Z",
 23 |      "iopub.status.busy": "2020-12-06T08:34:52.843684Z",
 24 |      "iopub.status.idle": "2020-12-06T08:34:53.191367Z",
 25 |      "shell.execute_reply": "2020-12-06T08:34:53.191620Z"
 26 |     }
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "%matplotlib inline"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 2,
 36 |    "metadata": {
 37 |     "execution": {
 38 |      "iopub.execute_input": "2020-12-06T08:34:53.194066Z",
 39 |      "iopub.status.busy": "2020-12-06T08:34:53.193706Z",
 40 |      "iopub.status.idle": "2020-12-06T08:34:53.465407Z",
 41 |      "shell.execute_reply": "2020-12-06T08:34:53.465662Z"
 42 |     }
 43 |    },
 44 |    "outputs": [
 45 |     {
 46 |      "name": "stderr",
 47 |      "output_type": "stream",
 48 |      "text": [
 49 |       "/home/stefan/projects/pytorch_tutorial/.venv/lib/python3.8/site-packages/torch/cuda/__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at  /pytorch/c10/cuda/CUDAFunctions.cpp:100.)\n",
 50 |       "  return torch._C._cuda_getDeviceCount() > 0\n"
 51 |      ]
 52 |     },
 53 |     {
 54 |      "data": {
 55 |       "text/plain": [
 56 |        "device(type='cpu')"
 57 |       ]
 58 |      },
 59 |      "execution_count": 1,
 60 |      "metadata": {},
 61 |      "output_type": "execute_result"
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "import torch\n",
 66 |     "import torch.nn as nn\n",
 67 |     "import torch.nn.functional as F\n",
 68 |     "import torch.optim as optim\n",
 69 |     "import torchvision\n",
 70 |     "\n",
 71 |     "DEVICE = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
 72 |     "DEVICE"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 3,
 78 |    "metadata": {
 79 |     "execution": {
 80 |      "iopub.execute_input": "2020-12-06T08:34:53.467950Z",
 81 |      "iopub.status.busy": "2020-12-06T08:34:53.467641Z",
 82 |      "iopub.status.idle": "2020-12-06T08:34:53.469044Z",
 83 |      "shell.execute_reply": "2020-12-06T08:34:53.469282Z"
 84 |     }
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "from pprint import pprint\n",
 89 |     "\n",
 90 |     "import matplotlib.pyplot as plt\n",
 91 |     "import numpy as np\n",
 92 |     "from IPython.core.debugger import set_trace"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "# The Problem"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 4,
105 |    "metadata": {
106 |     "execution": {
107 |      "iopub.execute_input": "2020-12-06T08:34:53.471764Z",
108 |      "iopub.status.busy": "2020-12-06T08:34:53.471456Z",
109 |      "iopub.status.idle": "2020-12-06T08:34:54.489102Z",
110 |      "shell.execute_reply": "2020-12-06T08:34:54.489343Z"
111 |     }
112 |    },
113 |    "outputs": [
114 |     {
115 |      "data": {
116 |       "text/plain": [
117 |        "[<matplotlib.lines.Line2D at 0x7f47c5298460>]"
118 |       ]
119 |      },
120 |      "execution_count": 1,
121 |      "metadata": {},
122 |      "output_type": "execute_result"
123 |     },
124 |     {
125 |      "data": {
126 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD4CAYAAAAEhuazAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAWl0lEQVR4nO3df4wc5X3H8c9373woaZG4GAeIzz+wYpAw6g97ZS5KKyUNTUyE6gbnhwlKaQNx/wC1aSu1pEhuRRQpSps2/eGmchyUVDKmNDgySkj5EblCqnKEWyuKbAjp6cLiMySYY0kjgbjb22//uF1379jd29ndmXlm5/2SwLczs7cPK/OZZ77PM8+YuwsAkC+FtBsAAEge4Q8AOUT4A0AOEf4AkEOEPwDk0GjaDejGpZde6lu3bk27GQCQKaVS6WV339BqXybCf+vWrZqenk67GQCQKWZWbrePsg8A5BDhDwA5RPgDQA4R/gCQQ4Q/AOQQ4Q8AOUT4A0APSuWKDp2cUalcSbspPcnEPH8ACEmpXNEtR6a0UK1pbLSgo7dPateW8bSbFQk9fwCIaGp2XgvVmmouLVZrmpqdT7tJkRH+ABDR5Lb1GhstaMSkdaMFTW5bn3aTIqPsAwAR7doyrqO3T2pqdl6T29ZnruQjEf4A0JNdW8YzGfoNlH0AIIcIfwDIIcIfAHKI8AeAHCL8ASCHCH8AyKGBhL+Z3WtmL5nZ6aZtbzOzx8zsf+p/jte3m5n9o5nNmNkPzWznINoAAOjeoHr+X5O0Z9W2uyR91923S/pu/bUk3SBpe/2fA5K+PKA2AAC6NJDwd/cnJL2yavNeSV+v//x1Sb/btP3ffNmUpEvM7IpBtAMA0J04a/6XufuL9Z9/Kumy+s8bJZ1tOm6uvg0AkJBEBnzd3SV5lPeY2QEzmzaz6fPnz8fUMgDIpzjD/2eNck79z5fq289J2tR03ER92wruftjdi+5e3LBhQ4zNBID8iTP8H5J0a/3nWyWdaNr+e/VZP5OSft5UHgIAJGAgq3qa2TFJ75F0qZnNSforSZ+X9ICZ3SapLOmj9cMflvRBSTOSXpP0B4NoAwCgewMJf3e/uc2u97U41iXdMYjPBQD0hjt8ASCHCH8AGIBSuaJDJ2dUKlfSbkpXeJIXAPSpVK7oliNTWqjWNDZa0NHbJ4N/yhc9fwDo09TsvBaqNdVcWqzWNDU7n3aT1kT4A0CfJret19hoQSMmrRstaHLb+rSbtCbKPgDQp11bxnX09kk9eGpOlnZjukTPHwBW6XXw9vipOR37/vO65chU8AO/9PwBoEmvg7et6v4hD/rS8weAJr0O3mat7k/PHwCaNEJ8sVqLFOKNuv/U7Lwmt60PutcvSba82kLYisWiT09Pp90MADlRKleCCPF+22FmJXcvttpHzx8AVtm1ZTz1nnvcN45R8weAAMV94xjhDwABinsAmbIPAAQo7gFkwh8AAhXn2ANlHwDIIcIfAHKI8AeQuqw9CGUYUPMHkKosPggllJvA+kH4A0hV1hZEy+LJqhXKPgBSFWU+ewjloSw+tasVev4AUtXtfPZQety9LvwWGsIfQOo6zWdv1NdfePX1IMpDWVu9sx3CH0Cwmnv7owXT6EhBS0vp97hDWPitX4Q/gMR1O1umub6+VHN9bPcmbbzkLcH1uLM4+4fwB5CoKLX71fX1fTsnggvXUMYioiL8ASQqytTOLNTXszZVtYHwB5CoqLNlQq+vZ3X2D49xBJC45hq5pK579u1q62nX3NP+/HZ4jCOAoDR681Hq5e2ODaHmHvrVSSvc4QsgNVHulm137LDccZu02Hv+ZvacpF9IWpJUdfeimb1N0r9L2irpOUkfdXeW8wNyJkq9vN2xWa25py32mn89/Ivu/nLTti9IesXdP29md0kad/e/aPc7qPkDwytKvTzUmn+oOtX80wr/ZyW9x91fNLMrJP2Xu1/d7ncQ/gAQXafwT6Lm75IeNbOSmR2ob7vM3V+s//xTSZetfpOZHTCzaTObPn/+fALNBID8SGK2z2+4+zkze7ukx8zsR8073d3N7E2XH+5+WNJhabnnn0A7AaBvWSlBxR7+7n6u/udLZvZNSbsl/czMrmgq+7wUdzsAIG4hTDvtVqxlHzP7JTO7uPGzpPdLOi3pIUm31g+7VdKJONsBAEnI0rTTuHv+l0n6ppk1Pus+d/9PM3tK0gNmdpuksqSPxtwOAF3ISskiVFmadhpr+Lv7rKRfbbF9XtL74vxsANEkUbIY9pNLFhaia2B5ByDHmsM47tUps1QP70dWlnog/IGcWh3GB2/cEWvJIqtLHw8rwh/IqdVhXHltIdaSRZbq4XlA+AM51SqM4yxZdKqHD/tYQIgIfyCnGmF8/NScBn0XZbswb3VyyctYQGgIf6APw9BjffDUnBaqNR0/NTeQ4I0a5owFpIP1/IEeNULui48+q1uOTKlUzt6q5MdPzemNxcHelBT1RqdG+WnExFhAguj5Az3Keo+1VK7oP6bPXij5jIwMJnh7eUZvVubGDxPCH+hR1mevTM3Oq1pbjn6T9J6rNlzopfcTwL2EeVbmxg8THuAO9CHLNf9G2WqxWtPISEFyV7XmDLoOER7gDsQkyz3W5h76uVdf1/3ffz6zJSxER/gDOdY4eZXKFR0/NZfZEhaiI/yBnGsE/29u36C3X3yRbto5Qa8/Bwh/IMdK5Ypu/srynHxJGhsx3bRzIuVWIQnM8wdybGp2Xov14JekxSUP+gEkDaVyRYdOzmTy3opQ0PMHMqzf2UaT29Zr3WjhQs9/3Yj1Ve9PYvYTy0EMBuEPZNQgQnDXlnEd+9T/r++zr496f1KhnPWb60JB+AMZ1RyCC4s1fenxH+vT11/V0wlgEOEZRyi3upLI+s11oSD8gYwaf+uYCmZyd9Uk/ffMy3rquVdiL4O0K+0MOpTbXUmwHMRgEP5ABpXKFd3zrTOquctMkiuRMkin0s6gQ7nTlUSWb64LBeEPxCDugc/mYCyYNFJYvgKIuwyyVmlnkKFMeSdehD8wYO16x/2cEFa/d3UwHrxxhyqvLcReBkkykLu5ksjy2kppI/yBAWu3nn2vM2HanUzSqnvv2znR98ygbnW6kmDKZ38If2DAWvWOo8yEWd2bbffeKCWWQfSQV4ftvpTvBGbKZ38If2DA2vXKuymXtOrN9ltqGVQPObSwZUygP4Q/EIPVvfLmE8L4W8faPjSlVcDe8d539lXiGVRohxa2TPnsD+EPJKQRTp164e0CttdZNKVyRedefV2jIwUtLa38nVFLQSGGLVM+e0f4Awlq1wtvDuLmgJWkQydnep4h1DjRjBZM+3dvvrBcc6+lIMJ2eBD+QIJa9exbBfEd731n37X65hPNUs31jkvecuH9odXvkTyWdAZ61Muywo3SyZ++/+oLYd5uami77d1qnGhGTG+q0Xfah3yg54/c62UaZD+98tWlk3Z1/n4HWDvV6EOs3yNZhD9yrdcQ76Z2H+Vk0CqIBxHQnWr01O/zLbXwN7M9kv5B0oikI+7++bTagvzqtfbdbe2+16uBtbY3i3LCYTkENKQS/mY2IumQpN+WNCfpKTN7yN2fTqM9yK9eSyuteuWHTs4kPoga5YSz1rGcGPIlrZ7/bkkz7j4rSWZ2v6S9kgh/JKqf0kq3tfs4Rbly6XQs6+TkT1rhv1HS2abXc5Kuaz7AzA5IOiBJmzdvTq5lyJ1B1b7TGESNcsLpdCxTP/Mn2AFfdz8s6bAkFYtFT7k5QFeSHkSNcsLpdGxoSzcgfmmF/zlJm5peT9S3AUMpznp6lBNOp4Flpn7mS1rh/5Sk7WZ2pZZDf7+kj6fUFiBWIdbTW52MmPqZL6mEv7tXzexOSY9oearnve5+Jo22AHGbmp3XG4s1uaSFxfTr6SGejJC81Gr+7v6wpIfT+nwMj9CnKI6/dUyNQata/XWaGNyFFPCAL9CNLPRiK68tqGBSzSWTdPqFn0d6/6BPbgzuQiL8kXFZ6MVObluv0YJpYcnlkr5Rmlvx/NtO4d7rya3T7xzU4G7oV1zojPBHpmWhF7try7g+Utyk+558Xi5paWnlWkCdwr2Xk1s3J4x+B3ezcMWFzljSGZnWaonkEN20c0IXrXvzEsprLdvcy9LL/S4F3Y0kPgPxouePzIt7iuIgyhvtSi1rXbn0UqJJ4mooC1dc6Mzcw795tlgs+vT0dNrNQA71Ut6IerKIo3ZeKld0/NScXFoxvjBI1PzDZ2Yldy+22kfPH+ggas29l5NFXFcuD56a00K1puOn5mIpiXFTWLZR8wc6iFpzD6UWHko7EC56/kAbjbLGwRt36PQLP5d18Z5QauGhtAPhouYPtHDfk8/r4InTWqq5CiZZwVSreVelnFBq4aG0A+mh5g9EUCpXdPDEaVVryx2jJW/8q7u6fyi18FDagTBR8wealMoVfenxH18I/mam7ufaJ6VUrujQyRmVypW0m4KMoecP1DXP1GnWCP0P75qIbdpkL7jLFv0g/BGEEOrTzTNkCia9+52X6oZrr1DltYUVd+RKCiJks7CuEcJF+CN1ofRgV8+Q+fT1VwX9gHNm9KAfhD9SF0oPttNSCr3c7BX3lQyPXkQ/CH+kLqQebLsZMlHa2DxN9KJ18V4lMKMHvSL8kbos9GC7bePqaaIhPLYRaIXwRxCae7AhDP620k0ve2p2XrWmGycLBaMWjyAR/ohNLyEe4sBqFI3y0EK1poKZ7tl7babaj/wg/BGLXkM87sHfuK8qslDCAiTCHzHpNcTjHPxN6qqCQVhkAeGPWPQa4nH2nEOZUgqEgPBHLPoJ8Th6zqVyRS+8+rpGC6almnc8IYU64AwMEuGP2IRS/mgu94yOFPSx3ZvartHTrjTECQHDhvDH0Gsu9ywt1bTxkre0DfB2T8C6+StTF0pYxz6VrRlIQCss6Yy+ZGFJ4SiPYmx17PH6s3BduvBMXCDr6PmjZ/3OnkmqlLLW+MPqdqw+9sFVYR/+s++AtRH+6Fk/s2eSvpmr3fhDu3Y0H7tv54S+MX1Wi0uudSOmfTsnYmsnkBTCHz3rZ05+KNMuu2nHri3jOnbgXQz4YqgQ/uhZP9M5Q1nJs9t2hDJzCRgUc4+ngmlmfy3pU5LO1zf9pbs/XN/3GUm3SVqS9Efu/kin31UsFn16ejqWdiI9SdX81/ocpnFiWJlZyd2LrfbF3fP/e3f/21WNuUbSfkk7JL1D0uNmdpW7L8XcFgSml9501KDudt4+oY+8SaPss1fS/e7+hqSfmNmMpN2SvpdCW5AhvQwSt5u3n/TKoVxdIDRxz/O/08x+aGb3mlnjb/xGSWebjpmrbwM6ahfknbSat9/L7+lH46T1xUef1S1HpoK+JwL50VfP38wel3R5i113S/qypM9qeVr0ZyV9UdInI/zuA5IOSNLmzZv7aSaGRC+DxO0GpZMcbA5lZhPQrK/wd/fruznOzL4i6Vv1l+ckbWraPVHftvp3H5Z0WFoe8O2nnRgOvc4uWl3TT3rN/VBmNgHN4pztc4W7v1j/+U8kXefu+81sh6T7tFznf4ek70ra3mnAl9k+yDpq/khDWrN9vmBmv6blss9zkv5Qktz9jJk9IOlpSVVJdzDTB8OOGUUITWzh7+6f6LDvc5I+F9dnAwA6Y1VPAMghwh8Acojwx9DIwrMFgFCwsNuQyPtskqSXiAayjvAfAlkKvrhOUtxIBURD+A+BrARfnCcpbqQCoiH8h0BWgi/Ok1TSd+0CWUf4D4E4gm91eWYQ5Zq4T1LcSAV0L7blHQaJ5R2Sdd+Tz+vgidNaqrkuWlfQwRt36J5vnRlIuSbvA9NAktJ8mAsyplSu6OCJ06rWljsFC4s1fef0iwMr19A7B8LAPH+sMDU7r1rT1WChYLrh2ivetCY+gGyj548VGnX5hWpNBTPds/daffy6zbr68osp1wBDhJo/3mTQdXnq/EA6qPkjkkHW5bN0AxqQJ9T8Eaukn5cLoDuEP2LV6gHqANJH2Qex4s5bIEyEP2LH3H4gPJR9ACCHCH+siYekAMOHsg86YqomMJzo+Q+hQfbUmaoJDCd6/kNm0D31rDwrAEA0hH9ABrEMwqAfmMJUTWA4Ef6BaO6xj44U9OFdE9q3c0KSIgVvHD11pmoCw4fwD0Rzj32hWtOxJ5/XN6bPSmaqLnVfwqGnDqAbhH8gGj32NxZrckkuaXFp+SdXtBIOPXUAa2G2TyAaPfaPX7dZYyO2vBbOiGkd6+IAiAE9/4A0euw37Zy4ULaRotX8AaAbhH+AVpdtCH0Ag0bZBwByiPAHgBzqK/zN7CNmdsbMamZWXLXvM2Y2Y2bPmtkHmrbvqW+bMbO7+vl8AEBv+u35n5Z0k6Qnmjea2TWS9kvaIWmPpH8xsxEzG5F0SNINkq6RdHP9WABAgvoa8HX3ZyTJzFbv2ivpfnd/Q9JPzGxG0u76vhl3n62/7/76sU/30w4AQDRx1fw3Sjrb9Hquvq3d9jcxswNmNm1m0+fPn4+pmQCQT2v2/M3scUmXt9h1t7ufGHyTlrn7YUmHJalYLHpcnwMAebRm+Lv79T383nOSNjW9nqhvU4ftAICExFX2eUjSfjO7yMyulLRd0vclPSVpu5ldaWZjWh4UfiimNgAA2uhrwNfMPiTpnyRtkPRtM/uBu3/A3c+Y2QNaHsitSrrD3Zfq77lT0iOSRiTd6+5n+vovAABEZu7hl9OLxaJPT0+n3QwAyBQzK7l7sdU+7vAFgBwi/AEghwh/AMghwh8AcojwB4AcIvwBIIcIfwDIIcIfAHKI8AeAHCL8ASCHCH8AyCHCHwByiPAHgBwi/AEgh4Y+/Evlig6dnFGpXEm7KQAQjL4e5hK6UrmiW45MaaFa09hoQUdvn9SuLeNpNwsAUjfUPf+p2XktVGuqubRYrWlqdj7tJgFAEIY6/Ce3rdfYaEEjJq0bLWhy2/q0mwQAQRjqss+uLeM6evukpmbnNbltPSUfAKgb6vCXlk8AhD4ArDTUZR8AQGuEPwDkEOEPADlE+ANADhH+AJBDhD8A5JC5e9ptWJOZnZdUTrsdCbhU0stpNyIgfB8r8X2sxPexUqvvY4u7b2h1cCbCPy/MbNrdi2m3IxR8HyvxfazE97FS1O+Dsg8A5BDhDwA5RPiH5XDaDQgM38dKfB8r8X2sFOn7oOYPADlEzx8AcojwB4AcIvwDY2Z/Y2Y/MrMfmtk3zeyStNuUJjP7iJmdMbOameVyWp+Z7TGzZ81sxszuSrs9aTOze83sJTM7nXZbQmBmm8zspJk9Xf9/5Y+7eR/hH57HJF3r7r8i6ceSPpNye9J2WtJNkp5IuyFpMLMRSYck3SDpGkk3m9k16bYqdV+TtCftRgSkKunP3P0aSZOS7ujm7wjhHxh3f9Tdq/WXU5Im0mxP2tz9GXd/Nu12pGi3pBl3n3X3BUn3S9qbcptS5e5PSHol7XaEwt1fdPdT9Z9/IekZSRvXeh/hH7ZPSvpO2o1AqjZKOtv0ek5d/I+NfDKzrZJ+XdKTax079I9xDJGZPS7p8ha77nb3E/Vj7tby5dzRJNuWhm6+DwCdmdkvS3pQ0qfd/X/XOp7wT4G7X99pv5n9vqQbJb3Pc3AjxlrfR86dk7Sp6fVEfRtwgZmt03LwH3X34928h7JPYMxsj6Q/l/Q77v5a2u1B6p6StN3MrjSzMUn7JT2UcpsQEDMzSV+V9Iy7/1237yP8w/PPki6W9JiZ/cDM/jXtBqXJzD5kZnOS3iXp22b2SNptSlJ98P9OSY9oeSDvAXc/k26r0mVmxyR9T9LVZjZnZrel3aaUvVvSJyT9Vj0zfmBmH1zrTSzvAAA5RM8fAHKI8AeAHCL8ASCHCH8AyCHCHwByiPAHgBwi/AEgh/4PeZyx60XvwHkAAAAASUVORK5CYII=\n",
127 |       "text/plain": [
128 |        "<Figure size 432x288 with 1 Axes>"
129 |       ]
130 |      },
131 |      "metadata": {
132 |       "needs_background": "light"
133 |      },
134 |      "output_type": "display_data"
135 |     }
136 |    ],
137 |    "source": [
138 |     "from sklearn.datasets import make_regression\n",
139 |     "\n",
140 |     "\n",
141 |     "n_features = 1\n",
142 |     "n_samples = 100\n",
143 |     "\n",
144 |     "X, y = make_regression(\n",
145 |     "    n_samples=n_samples,\n",
146 |     "    n_features=n_features,\n",
147 |     "    noise=20,\n",
148 |     "    random_state=42,\n",
149 |     ")\n",
150 |     "\n",
151 |     "fix, ax = plt.subplots()\n",
152 |     "ax.plot(X, y, \".\")"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "# The Solution"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 5,
165 |    "metadata": {
166 |     "execution": {
167 |      "iopub.execute_input": "2020-12-06T08:34:54.491719Z",
168 |      "iopub.status.busy": "2020-12-06T08:34:54.491412Z",
169 |      "iopub.status.idle": "2020-12-06T08:34:54.492986Z",
170 |      "shell.execute_reply": "2020-12-06T08:34:54.493220Z"
171 |     }
172 |    },
173 |    "outputs": [],
174 |    "source": [
175 |     "X = torch.from_numpy(X).float()\n",
176 |     "y = torch.from_numpy(y.reshape((n_samples, n_features))).float()"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 6,
182 |    "metadata": {
183 |     "execution": {
184 |      "iopub.execute_input": "2020-12-06T08:34:54.495698Z",
185 |      "iopub.status.busy": "2020-12-06T08:34:54.495388Z",
186 |      "iopub.status.idle": "2020-12-06T08:34:54.497014Z",
187 |      "shell.execute_reply": "2020-12-06T08:34:54.496762Z"
188 |     }
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "class LinReg(nn.Module):\n",
193 |     "    def __init__(self, input_dim):\n",
194 |     "        super().__init__()\n",
195 |     "        self.beta = nn.Linear(input_dim, 1)\n",
196 |     "        \n",
197 |     "    def forward(self, X):\n",
198 |     "        return self.beta(X)\n",
199 |     "\n",
200 |     "# or just\n",
201 |     "# model = nn.Linear(input_dim, 1)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 7,
207 |    "metadata": {
208 |     "execution": {
209 |      "iopub.execute_input": "2020-12-06T08:34:54.499394Z",
210 |      "iopub.status.busy": "2020-12-06T08:34:54.499084Z",
211 |      "iopub.status.idle": "2020-12-06T08:34:54.501329Z",
212 |      "shell.execute_reply": "2020-12-06T08:34:54.501565Z"
213 |     }
214 |    },
215 |    "outputs": [],
216 |    "source": [
217 |     "model = LinReg(n_features).to(DEVICE)  # <-- here\n",
218 |     "loss_fn = nn.MSELoss()\n",
219 |     "optimizer = optim.SGD(model.parameters(), lr=0.1)\n",
220 |     "\n",
221 |     "\n",
222 |     "X, y = X.to(DEVICE), y.to(DEVICE)  # <-- here"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": 8,
228 |    "metadata": {
229 |     "execution": {
230 |      "iopub.execute_input": "2020-12-06T08:34:54.504699Z",
231 |      "iopub.status.busy": "2020-12-06T08:34:54.504378Z",
232 |      "iopub.status.idle": "2020-12-06T08:34:54.594137Z",
233 |      "shell.execute_reply": "2020-12-06T08:34:54.594373Z"
234 |     }
235 |    },
236 |    "outputs": [
237 |     {
238 |      "data": {
239 |       "text/plain": [
240 |        "<matplotlib.legend.Legend at 0x7f47c31dd940>"
241 |       ]
242 |      },
243 |      "execution_count": 1,
244 |      "metadata": {},
245 |      "output_type": "execute_result"
246 |     },
247 |     {
248 |      "data": {
249 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAkLElEQVR4nO3de5hcVZnv8e9b3R0CGCWGcJEkdBgDCTAGO00uAh4GQYIijIBCAgJHDeeIOmccj0dERplRZjiOeI6OOBrUATQkghcIioA4IpEhhu4QNHdDSOhGLiEGDJck3V3v/LGrOlWV6u667aq9a/8+z5OnLntX1ep64LdXvWvttc3dERGRZEk1ugEiIlJ/Cn8RkQRS+IuIJJDCX0QkgRT+IiIJpPAXEUkghb+ISAIp/CVWzGyLme0xs4MLnn/MzNzM2jOPJ5jZj8zsBTN7ycxWm9nlmW3tmX1fLvh3YYlt+LKZ/cHMdprZejO7tGD7QjPbYGbp7GcWbP+EmT1rZn82s++a2X45204ws2WZNvea2d8P045vFrR/t5ntLOVvEFH4Sxw9CczLPjCzvwQOKNjne0APcCQwDvgA8FzBPge5++ty/v2gxM9/BXgP8AbgMuCrZva2nO2PA1cCKwtfaGZnAlcB78i07SjgH3J2uQ14CHgj8N+AK83snGKNcPf/mdt+YDFwR4l/gyScwl/i6HtAbm/7MuDWgn1OBG5291fcvd/dH3P3n9fiw9398+6+3t3T7v5bYBkwJ2f7je7+S2BXkZdfBnzH3de4+w7gC8DlOdvbgUXuPuDuTwC/AY4bqU1mdiBwPnBLhX+WJIzCX+JoOfB6M5tmZi3ARcD3i+xzo5ldZGaTynlzM5tvZr8rcd/9CQ40a0p8++MIfhlkPQ4cambjMo//P3CpmbWZ2TEEB5UHSnjf84FtBL8aREak8Je4yvb+zwDWAU8XbH8fQY/874EnzWyVmZ1YsM8LZvZizr9pAO5+m7u/pcR2fJMgwO8rcf/XAS/lPM7eH5O5/SlwAfAasJ7gV8KjJbzvZcCtrsW6pEQKf4mr7wHzCUomhSUf3H2Hu1/l7scBhwKrgDvNzHJ2O9jdD8r5t66cBpjZvwDHA+8vI3RfBl6f8zh7f6eZvRG4F/hHYDQwETjTzK4coR2TgFMp8j2IDEXhL7Hk7lsJBn7fBfx4hH1fAL4MvIlgILVqZvYPwFnAO939z2W8dA0wPefxdOA5d99OMPg74O63ZsYpeoElBH/jcD4APOzum8tohyScwl/i7EPAae7+SuEGM/u/Zna8mbWa2RjgI8CmTMhWxcw+Q/Cr4/Ri72dmo8xsNGBAm5mNNrPs/2u3Ah8ys2PN7CDgGuDmzLaNwcttvpmlzOww4EJgpPGHS3PeQ6QkCn+JLXd/wt27hth8APAT4EVgM8G0ysIpky8WzJP/OwAzu9jMhhvA/SdgErAp57VX52y/n6Bm/zZgYeb+2zNtvhf4EvAr4ClgK/D5zLY/A+cBnwB2EJSqVgNfzLRrUuazBgewzWwOMAFN8ZQymcaHRESSRz1/EZEEUviLiCSQwl9EJIEU/iIiCdTa6AaU4uCDD/b29vZGN0NEJFa6u7tfcPfxxbbFIvzb29vp6hpqRp+IiBRjZluH2qayj4hIAin8RUQSSOEvIpJAsaj5F9PX10dvby+7dhW7XkbzGD16NBMmTKCtra3RTRGRJhLb8O/t7WXMmDG0t7eTv0pv83B3tm/fTm9vL5MnT250c0SkicS27LNr1y7GjRvXtMEPYGaMGzeu6X/diEj9xTb8gaYO/qwk/I0isdSzApbdENzGUGzLPiIiDdOzAm45Bwb2QMsouGwpTJzZ6FaVJdY9/2by4IMPcvbZZze6GSJSii3LguD3geB2y7JGt6hsCv+QDQwMNLoJIlJr7acEPX5rCW7bT2l0i8qWqPDv3rqDG3+1ie6tO2ryflu2bGHq1KlcfPHFTJs2jQsuuIBXX32V9vZ2Pv3pT9PR0cEdd9zB/fffz5w5c+jo6OB973sfL7/8MgD33nsvU6dOpaOjgx//eNjL0IpIlEycGZR6TvtsLEs+kKDw7966g4u/vZwb7t/Axd9eXrMDwIYNG7jyyitZt24dr3/96/nGN74BwLhx41i5ciWnn346X/ziF3nggQdYuXIlnZ2dfOUrX2HXrl0sWLCAu+++m+7ubp599tmatEdE6mTiTDjlk7EMfkhQ+C/fvJ09/WnSDn39aZZvrvo63gBMnDiRk046CYBLLrmE3/zmNwBceOGFwecuX87atWs56aSTOOGEE7jlllvYunUr69evZ/LkyUyZMgUz45JLLqlJe0RESpGY2T6zjxrHqNYUff1p2lpTzD5qXE3et3AqZvbxgQceCAQnap1xxhksXrw4b79Vq1bV5PNFRCqRmJ7/jCPHsujDs/m7dx7Dog/PZsaRY2vyvk899RSPPPIIALfddhsnn3xy3vbZs2fz8MMPs2nTJgBeeeUVNm7cyNSpU9myZQtPPPEEwD4HBxGRMCUm/CE4AHz0r95cs+AHOOaYY7jxxhuZNm0aO3bs4CMf+Uje9vHjx3PzzTczb9483vKWtzBnzhzWr1/P6NGjWbhwIe9+97vp6OjgkEMOqVmbRERGkpiyT1haW1v5/ve/n/fcli1b8h6fdtppPProo/u8du7cuaxfvz7M5omIFJWonr+IiARqEv5m9l0ze97MVuc890Yz+4WZ/SFzOzbzvJnZ18xsk5n9zsw6atGGRmhvb2f16tUj7ygiEjG16vnfDMwteO4q4JfuPgX4ZeYxwFnAlMy/K4B/q1EbRESkRDUJf3d/CPhTwdPnArdk7t8C/HXO87d6YDlwkJkdXot2iIhIacKs+R/q7s9k7j8LHJq5fwTQk7Nfb+Y5ERGpk7oM+Lq7A17Oa8zsCjPrMrOubdu2hdQyEZFkCjP8n8uWczK3z2eefxqYmLPfhMxzedx9obt3unvn+PHjQ2xm7Vx77bV8+ctfHnL7nXfeydq1a+vYIhGR4sIM/6XAZZn7lwF35Tx/aWbWz2zgpZzyUFNT+ItIVNRqqudi4BHgGDPrNbMPAdcDZ5jZH4DTM48B7gE2A5uAm4Ara9GGkoRw2bXrrruOo48+mpNPPpkNGzYAcNNNN3HiiScyffp0zj//fF599VX+8z//k6VLl/KpT32KE044gSeeeKLofiIi9VCTM3zdfd4Qm95RZF8HPlqLzy1LCJdd6+7uZsmSJaxatYr+/n46OjqYMWMG5513HgsWLADgmmuu4Tvf+Q4f//jHOeecczj77LO54IILADjooIOK7iciErbkLO9Q7LJrVYb/smXLeO9738sBBxwAwDnnnAPA6tWrueaaa3jxxRd5+eWXOfPMM4u+vtT9RERqLTnLO9TxsmuXX345X//61/n973/P5z//eXbt2lXVfiISAyGUlcOUnPAP4bJrb3/727nzzjt57bXX2LlzJ3fffTcAO3fu5PDDD6evr49FixYN7j9mzBh27tw5+Hio/UQkZrJl5f+4LriNwQEgOWUfCAK/hpdc6+jo4MILL2T69OkccsghnHjiiQB84QtfYNasWYwfP55Zs2YNBv5FF13EggUL+NrXvsYPf/jDIfcTkZgJoawcNgvGX6Ots7PTu7q68p5bt24d06ZNa1CL6itJf6tILIUwoaQWzKzb3TuLbUtWz19EJAzZsvLjtwE24u5RkJyav4hIqSodvF21BLpviUXdP9Y9f3ff5wLqzSYOZTmRplJpCSdmdf/Y9vxHjx7N9u3bmzoc3Z3t27czevToRjdFJDmKhXgp6jidvBZi2/OfMGECvb29NPuKn6NHj2bChAmNboZIcmRDPNvzLzXEs3X/LcuC10S41w8xnu0jIhKanhXRCPEq26HZPiIi5ajxOUEVCXn6aGxr/iIiTa3SsYcSKfxFRKIo5AFklX1ERKIo5AFkhb+ISFSFOPagso+ISAIp/EVEEkjhLyKNF7MLoTQD1fxFpLEiuhzysKJyElgVFP4i0lgxWxAtlgerIlT2EZHGKmc+exTKQyGffFUv6vmLSGOVOp89Kj3uShd+ixiFv4g03nDz2bP19Zd6o1EeitnqnUNR+ItIdOX29lMtkGqFNI3vcUdh4bcqKfxFpP5KnS2TW19PAzMuhTdMjF6PO4azfxT+IlJf5dTuC+vr0+dHL1yjMhZRJoW/iNRXOVM741Bfj9tU1QyFv4jUV7mzZaJeX4/p7B9dxlFE6i+3Rg6l9+yHqq03uube6M8fgi7jKCLRku3Nl1MvH2rfKNTco/7rpAid4SsijVPO2bJD7dskZ9zWW+g9fzPbAuwEBoB+d+80szcCPwDagS3A+919R9htEZGIKadePtS+Ma25N1roNf9M+He6+ws5z30J+JO7X29mVwFj3f3TQ72Hav4iTaycenlUa/4RNVzNv1HhvwE41d2fMbPDgQfd/Zih3kPhLyJSvuHCvx41fwfuN7NuM7si89yh7v5M5v6zwKGFLzKzK8ysy8y6tm3bVodmiogkRz1m+5zs7k+b2SHAL8xsfe5Gd3cz2+fnh7svBBZC0POvQztFRKoXkxJU6OHv7k9nbp83s58AM4HnzOzwnLLP82G3Q0QkdFGYdlqiUMs+ZnagmY3J3gfeCawGlgKXZXa7DLgrzHaIiNRFjKadht3zPxT4iZllP+s2d7/XzB4FbjezDwFbgfeH3A4RKUVMShaRFaNpp6GGv7tvBqYXeX478I4wP1tEylSPkkWzH1zisBBdhpZ3EEmy3DAOe3XKGNXDqxKTpR4U/iJJVRjGc68Pt2QR06WPm5XCXySpCsP4te3hlixiVA9PAoW/SFIVC+MwSxbD1cObfSwgghT+IkmVDePHFxOciF9DQ4V5sYNLUsYCIkbhL1KNZuixrlocBO+qJbUJ3nLDXGMBDaHwF6lUM/RYH18M/bsAr13wlhvmGgtoCIW/SKXi3mPtWQGPfZ/Bkk+qtTbBW8k1emMyN76ZKPxFKhX3HuuWZZAeyDwwmHLG3uUIqgngSsI8JnPjm4nCX6RSce+x5h68Uq3wh/thw89rU8JSmEeewl+kGnEOudyD10s90H1rfEtYUjaFv0iSZQ9ePSuC2T5xLWFJ2RT+IknXsyKY9fPm0+B1h8L0eer1J4DCXyTJelbAzWfDwO7gccuoIPyl6dXjGr4iElXZ6apZA32RvgDJoJ4VsOyG4FYqop6/SJxVe4bx4IyfbM+/rbp6fz3OeG6Gk+siQOEvEle1CMGJM+Hyn+5d32f6/MqDtF6hHPeT6yJC4S8SV7kh2L8bHvxnOPUzlR0AahGeYYRysV8ScT+5LiIU/iJxtf84MAM3IA2bH4Stj4RfBhmqtFPrUB7ql0TcT66LCIW/SBz1rIB7rwJPZw4ABPfDLoMMV9qpdSgP90sizifXRYTCXyQMYQ98DgZjGiwFqRZwD78MMlJpp5ahrPJOqBT+IrU2VO+4mgNC4WsLg3Hu9cFlGMMug9QzkEv5JdEM11NoEIW/SK0V6x1D5TNhhjqYNKrufcI8qp4ZVKrhfkkkYMpn99YdLN+8ndlHjWPGkWNr+t4Kf5FaK9Y7LmcmTGFvdqjXllNiqUUPuTBsp8+v7H1qpcmnfHZv3cHF317Onv40o1pTLPrw7JoeABT+IrU2VK+8lHJJsd5staWWWvWQoxa2DRwT6N66gx+t7MWA8zomAPDjlb08v3M3Bowfsx/ndUyoKqyXb97Onv40aYe+/jTLN29X+ItEXmGvPPeAsP+4oS+aUixgT/lkdSWeWoV21AZgQyx9DVdu6d66g3kLH2HPQHAFtNu7enCgP/M4647uXhYvqLy3PvuocYxqTdHXn6atNcXso8ZV9D5DUfiL1Es2nIbrhQ8VsJXOoulZEazVn2qFNPnvWW4pKIrz60OY8jlSuWX55u305QR9X0HoDz5fZW99xpFjWfTh2ar5izSFoXrhuUGcG7AQLGBW6Qyh7IEm1QIzLtu7XHOlpaCIzK/P9szHHjCKHa/uGbytRUiOVG6ZfdQ42lpssOff1mJFe/616K3POHJszUM/S+EvUk/FevbFgviUT1Zfq8890KSBN0zY+/qo1e+H0L11Bz9e2YsD53dMYMOzO/nBo0+x5o8vMZAOzm0zgtuUUZOB0ZHKLTOOHMviK+aEXvMPm8JfpFKVzKApVjpZdkPxIK42oIer0Teofl/YY88G67d+/QTP/XkXF544ifmzJg3uO++moPwC8IMVT1GswpJ9qlYDo6WUW4r1yKMc9MUo/EUqCfFqeuWFpZOhgrjagB6uRl+D+n2xQdFi4Z677eJvL2d3X3qwp96aMgYcBtJBhD/e+3sA5s+aFNTWM8EPFA1+yO/512pgNMxyS1Qo/CXZKg3xUmr31fwaGO75cgxXoy+ybaSTirJlmG07d/PghufpT/tgqQUYHChNexDK+7XtLcNka+l5PfUBpzDTf776GebPmhTU1ltTgz3/Fss/AJxx7KH81TGH1LzmnxQNC38zmwt8FWgBvu3u1zeqLZJglZZWSq3dV/prYKTnc5VwwMmG+pTdaxnz3HJ2HjqbX782mY3P7eSZF1/DzWg146kdr+IOo9v2rZ0XlmGysqUWYDD4IeiN55ZhsrX0PX1p0hTv+QOcdfzhQKa2vmD2PjX/n69+hrOOP3ywPCSVaUj4m1kLcCNwBtALPGpmS919bSPaIwlWaWmlnNp9SLq37uAX9y3lb//4v2mjnwFrY/O7bmPqiacP9tCzA5APbtzGX6bX88G2f6KNfvo238Q391zNSj968P06bCPvTq1jeXoaj/UdvU/tvLAMA0HvPrfUMirTU097cI3Y3G25tfRSav7Z1+S2YcaRYxX6NdKonv9MYJO7bwYwsyXAuYDCX+qrmtJKqbX7MuWWXgB+tLKXF3bu5uAx+3F+ZgZJ99YdXLTwERbwMK2t/bRYGk/38bOld7AyfTTX3r1mnx76rJZ1tNFPq6XB+5mdWsfKgSD8O2wji0ZlDgy0cknf1cw+6m15ry8sw7S2GO/vnDjYJqBouBeGd7GyzMJLOyv6rqRyjQr/I4CenMe9wKzcHczsCuAKgEmTdKSXENVq7vowB5Lr71nH7V097D+qhfe85U2M2b9tMBhzpzMe/6Y38I8/DYK7tSVFOp0mN8N/2NXD4ivmDJ5otNym0UcreBDaD/dP5cDVz+zTQwdYns7fd3l62uC22an8A8PHj3qu6GyWwjJMsX1Uc4+HyA74uvtCYCFAZ2fnEOP8Io1TuL7LjCPH0p2ewreeTLH24ZcY3fYgHzz5KJ7a/grffGhz8KJX+/jmQ5sHB0M/d/Zxeb30lsx1WbLTFgv/w+8b8MFfBW0txsqBo7l4z9XMzpRrVrdM5drjD+e3T/4pr+ff2mJM6zydZfv/xWDNf9prk2nJ1Pw3pU+gf89PgH5oGcWpZ55X9G9WuDePRoX/08DEnMcTMs+JNFxhvRyg50+v8tyfd/HmQ17Hp88Kesy567vc0d3Lte85js8tXZ13pufVP/k9bzxwFB22cTCgV/rRg4OhPy/opQ94MAhqOC1Fev5tLTb4i2HJFXP41q+fYPO2A+k6cBbTDh3DZzMHoWMOGzP4NxySd8LRXwLnAPDOvL/6HdDTGa2lGyRU5l7/TrWZtQIbgXcQhP6jwHx3X1Ns/87OTu/q6qpjC6XZDLccQO4MEoC/v2t13uyTQi0p48ITJ7L4t08N9swNOHnKwSz7wwv77P/O12/lq7s/P1hPv3jP1TzmRxft+Y9qMa495/i8wdBiNf+q6SIoiWBm3e5edEClIT1/d+83s48B9xFM9fzuUMEvMpLhTiwCuO23T/G5TKDnRnrKgiDPLsy17A8vkDIYJveBYFqiQf76Lq0pzjr+cB7ZvH2fNV6ubH+W/Tb2kQr6+/zNXzzHmr94z2A7s7304eroNZWAi6DIyBpW83f3e4B7GvX5El0jLaebu6176w7+5du3MsPX8MuBaYM96uwc9e6tO/jcXavpL5LoaYd0QVCPFPwQHDDO65jAeR0T9qn5H3PYGL716ydY+8eXGN3WwgdPPooTWrbBxuCNW3FOPWEqp3a+efD96l5Hj8m6PhKuyA74SnMqnMZYeH/sAaMGZ7sULtJVbKndJx/7Ff+e+iJt9POxlqCk8nj/3jnqyzdvJ11Q2sxdDiC35w9Bb35gwLGU0THpIMYeMArYt+afbVOxXvo+0xaXbQ8usu7p4NOfXVXel1brEk3U1uWXhlD4S1VK6aXn1tlzpzHiTn/a8+6nzEi7F12kq9hSu3/dsjZviuKc1DrWpqYOHlAGzyrtT5My48MnT2bM/m1D1vyPOWxM7ddPbz8lWE9/YA/g8Nht+de/HS7cKy3RDPeetVqXX+MGsabwl30ULqMLFK1JD3fRi9xt2XVeWlL5wQ57lwDI3sedVGa2S+EiXcWW2j0i9U7Sq/6V9EAftLQx+a1zWfTW2Xk981JWaCw8q7SmJs6Et14CXf8e/JXp/vy1gIYL90pKNKUcMKo9t0HjBrGn8Jc8heu33P7oU5jtLY1kTzLKXahrpF46ZOaup/cGe0umtz+Qzr/f1hrMgBnq7NB9g3wmqcvvhi3LSLWfwgVFAigSc9Onz4NVi/cttYwU7pWUaOpR09e4Qewp/JvEULX0ckOvcP2W4G7+JesKF+oqdtGL3HJLdp2XUW35wZ7bzlLbXDTIw766VC3KG0OVWkYK90pKNPWo6WvcIPYaMs+/XEme5z/SErvZfbIlltz6eSVXNSrs+bemyOv5j2qxwZ7/SO0baQpmLFRS3ij3YBFG7bxnBTy+GPD88YVaUs0/8iI3z1+KG26AdLggLyy/wL7L6Zaq2PotULzmn91/qPePRLmlWuWWNyo5WIT1yyVbZlq1JJyafESu5yuVUfhHRCkDpEMFeW75pbB+XslVjZrhEnU1U255Iyq18Ki0QyJL4R8RIw2QDhfkhQOh2feLZZklSrJljbnXZ+bm20iviE4tPCrtkMhSzT8iCnv+xQZIFeR11HUz3PNJSA8EJ2hlT9IqpZQTlVp4VNohDZPomn8pA6ZRMNRVjqLc5qbVsyIT/P3BYx8I/kFpJZSo1MKj0g6JpKYO/+FOQoqiphggjbueFfDgPwc9/n1Y9Eoo6t1LhZo6/Ic7CUlkH4MzdXZD/vqf0NIGb50f3rTJSugsW6lCU4f/cCchScREoQc7OEMmHdT4jzoVpp0Lr23PPyMXohGymtEjVWjq8C9lXReJgKj0YAtnyJz6mfzF16LQxuHaG6VylEReU4c/qI4eC1HpwQ63lEIlJ3uF/UumVqtzSiI1ffhLDESpBzvUDJly2jg4TTQNrfuF+ytBM3qkQgp/abw49GBLbWPhNNH+3arFSyQp/CUacnuwURj8LaaUXvaWZZkrdmWkUqrFSyQp/CU8lYR4FAdWy9F+CrTsF0wXtRS864Z4tV8SQ+Ev4ag0xMMe/A37V0UcSlgiKPwlLJWGeJiDv/X6VaFBWIkBhb+Eo9IQD7PnHJUppSIRoPCXcFQT4mH0nHtWwEu9kGqBNMMfkKI64CxSQwp/CU9Uyh+55Z5UK8y4dOg1eoYqDemAIE1G4S/NL7fckwbeMHHoAC9WGgK4+ey9B4TLf6oDgMReqtENkJjrWQHLbghuoyo7/mAtI48/FNv38cV7V/oc2J25MLpIvKnnL5WrdvZMvUopI40/FLajcN/Hbyt4w+hf/U5kJAp/qVw1s2fqfTLXUOMPQ7Ujd9/p8+GxRTDQF6zrP31+eO0UqROFv1Sumjn5UZl2WUo7Js6Ey3+mAV9pKgp/qVw10zmjspJnqe2IyswlkRox93Dql2Z2LbAA2JZ56mp3vyez7TPAh4AB4G/c/b7h3quzs9O7urpCaac0UL1q/iN9jqZxSpMys2537yy2Leye//9z9y8XNOZY4CLgOOBNwANmdrS7F7titjSzSnrT5QZ1qfP2FfqSMI0o+5wLLHH33cCTZrYJmAk80oC2SJxUMkg81Lz9eq8cql8XEjFhz/P/mJn9zsy+a2bZaykeAfTk7NObeU5keEMF+XCKzduv5H2qkT1o/cd1wW2Uz4mQxKiq529mDwCHFdn0WeDfgC8QTIr+AnAD8MEy3vsK4AqASZMmVdNMaRaVDBIPNShdz8HmqMxsEslRVfi7++ml7GdmNwE/zTx8GpiYs3lC5rnC914ILIRgwLeadkqTqHR2UWFNv95r7kdlZpNIjjBn+xzu7s9k7n8CmOXuF5nZccBtBHX+NwG/BKYMN+Cr2T4Se6r5SwM0arbPl8zsBIKyzxbgfwC4+xozux1YC/QDH9VMH2l6mlEkERNa+Lv7B4bZdh1wXVifLSIiw9OqniIiCaTwFxFJIIW/NI84XFtAJCK0sFuzSPpsknovES0Scwr/ZhCn4AvrIKUTqUTKovBvBnEJvjAPUjqRSqQsCv9mEJfgC/MgVe+zdkViTuHfDMIIvsLyTC3KNWEfpHQilUjJFP7NopbB13Uz3PNJSKehdT+Yez3ce1X15Rr1zkUiQ+Ev+XpWZIK/P3jcvxvW3VW7co165yKRoHn+km/LMvD03sepFEw7d9818UUk1tTzl3ztp0DLfjCwGywF77oBOi+HQ49VuUakiSj8Jd9QdflqyjVJPwFNJIIU/rKvWtbl43QCmkiCqOYv4ar39XJFpCQKfwlXsQuoi0jDqewj4dLcfpFIUvhL+DS3XyRyVPYREUkghb+MTBdJEWk6KvvI8DRVU6QpqeffjGrZU9dUTZGmpJ5/s6l1Tz0u1woQkbIo/KOkFssg1PqCKZqqKdKUFP5RkdtjT7XCW+fD9PnBtnKCN4yeuqZqijQdhX9U5PXYB4ILqjy2CLBgbf1SSzjqqYtICRT+UZHtsffvAjz4N9CX2ejllXDUUxeREWi2T1Rke+yd/z1nLZw2rYsjIqFQzz9Ksj326fP2lm1AJRwRqTmFfxQVlm0U+iJSYyr7iIgkkMJfRCSBqgp/M3ufma0xs7SZdRZs+4yZbTKzDWZ2Zs7zczPPbTKzq6r5fBERqUy1Pf/VwHnAQ7lPmtmxwEXAccBc4Btm1mJmLcCNwFnAscC8zL4iIlJHVQ34uvs6ADMr3HQusMTddwNPmtkmIDtqucndN2detySz79pq2iEiIuUJq+Z/BNCT87g389xQz+/DzK4wsy4z69q2bVtIzRQRSaYRe/5m9gBwWJFNn3X3u2rfpIC7LwQWAnR2dnpYnyMikkQjhr+7n17B+z4NTMx5PCHzHMM8LyIidRJW2WcpcJGZ7Wdmk4EpwArgUWCKmU02s1EEg8JLQ2qDiIgMoaoBXzN7L/CvwHjgZ2a2yt3PdPc1ZnY7wUBuP/BRdx/IvOZjwH1AC/Bdd19T1V8gIiJlM/fol9M7Ozu9q6ur0c0QEYkVM+t2985i23SGr4hIAin8RUQSSOEvIpJACn8RkQRS+IuIJJDCX0QkgRT+IiIJpPAXEUkghb+ISAIp/EVEEkjhLyKSQAp/EZEEUviLiCSQwl9EJIGaP/x7VsCyG4JbEREBqryYS+T1rIBbzoGBPdAyCi5bChNnNrpVIiIN19w9/y3LguD3geB2y7JGt0hEJBKaO/zbTwl6/NYS3Laf0ugWiYhEQnOXfSbODEo9W5YFwa+Sj4gI0OzhD0HgK/RFRPI0d9lHRESKUviLiCSQwl9EJIEU/iIiCaTwFxFJIIW/iEgCmbs3ug0jMrNtwNZGt6MODgZeaHQjIkTfRz59H/n0feQr9n0c6e7ji+0ci/BPCjPrcvfORrcjKvR95NP3kU/fR75yvw+VfUREEkjhLyKSQAr/aFnY6AZEjL6PfPo+8un7yFfW96Gav4hIAqnnLyKSQAp/EZEEUvhHjJn9i5mtN7PfmdlPzOygRrepkczsfWa2xszSZpbIaX1mNtfMNpjZJjO7qtHtaTQz+66ZPW9mqxvdligws4lm9iszW5v5f+V/lfI6hX/0/AI43t3fAmwEPtPg9jTaauA84KFGN6QRzKwFuBE4CzgWmGdmxza2VQ13MzC30Y2IkH7gk+5+LDAb+Ggp/40o/CPG3e939/7Mw+XAhEa2p9HcfZ27b2h0OxpoJrDJ3Te7+x5gCXBug9vUUO7+EPCnRrcjKtz9GXdfmbm/E1gHHDHS6xT+0fZB4OeNboQ01BFAT87jXkr4H1uSyczagbcCvx1p3+a/jGMEmdkDwGFFNn3W3e/K7PNZgp9zi+rZtkYo5fsQkeGZ2euAHwF/6+5/Hml/hX8DuPvpw203s8uBs4F3eAJOxBjp+0i4p4GJOY8nZJ4TGWRmbQTBv8jdf1zKa1T2iRgzmwv8H+Acd3+10e2RhnsUmGJmk81sFHARsLTBbZIIMTMDvgOsc/evlPo6hX/0fB0YA/zCzFaZ2Tcb3aBGMrP3mlkvMAf4mZnd1+g21VNm8P9jwH0EA3m3u/uaxraqscxsMfAIcIyZ9ZrZhxrdpgY7CfgAcFomM1aZ2btGepGWdxARSSD1/EVEEkjhLyKSQAp/EZEEUviLiCSQwl9EJIEU/iIiCaTwFxFJoP8CsIW8jPoTnoIAAAAASUVORK5CYII=\n",
250 |       "text/plain": [
251 |        "<Figure size 432x288 with 1 Axes>"
252 |       ]
253 |      },
254 |      "metadata": {
255 |       "needs_background": "light"
256 |      },
257 |      "output_type": "display_data"
258 |     }
259 |    ],
260 |    "source": [
261 |     "# Train step\n",
262 |     "model.train()  # <-- here\n",
263 |     "optimizer.zero_grad()\n",
264 |     "\n",
265 |     "y_ = model(X)\n",
266 |     "loss = loss_fn(y_, y)\n",
267 |     "\n",
268 |     "loss.backward()\n",
269 |     "optimizer.step()\n",
270 |     "\n",
271 |     "# Eval\n",
272 |     "model.eval()  # <-- here\n",
273 |     "with torch.no_grad():\n",
274 |     "    y_ = model(X)    \n",
275 |     "\n",
276 |     "# Vis\n",
277 |     "fig, ax = plt.subplots()\n",
278 |     "ax.plot(X.cpu().numpy(), y_.cpu().numpy(), \".\", label=\"pred\")\n",
279 |     "ax.plot(X.cpu().numpy(), y.cpu().numpy(), \".\", label=\"data\")\n",
280 |     "ax.set_title(f\"MSE: {loss.item():0.1f}\")\n",
281 |     "ax.legend();"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "Note: I did gradient descent with all the data. I did not split the data into `train` and `valid` which should be done!"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": 9,
294 |    "metadata": {
295 |     "execution": {
296 |      "iopub.execute_input": "2020-12-06T08:34:54.597411Z",
297 |      "iopub.status.busy": "2020-12-06T08:34:54.597041Z",
298 |      "iopub.status.idle": "2020-12-06T08:34:54.598974Z",
299 |      "shell.execute_reply": "2020-12-06T08:34:54.599203Z"
300 |     }
301 |    },
302 |    "outputs": [
303 |     {
304 |      "data": {
305 |       "text/plain": [
306 |        "[0,\n",
307 |        " 1,\n",
308 |        " 2,\n",
309 |        " 3,\n",
310 |        " 4,\n",
311 |        " 5,\n",
312 |        " 6,\n",
313 |        " 7,\n",
314 |        " 8,\n",
315 |        " 9,\n",
316 |        " 10,\n",
317 |        " 11,\n",
318 |        " 12,\n",
319 |        " 13,\n",
320 |        " 14,\n",
321 |        " 15,\n",
322 |        " 16,\n",
323 |        " 17,\n",
324 |        " 18,\n",
325 |        " 19,\n",
326 |        " 20,\n",
327 |        " 21,\n",
328 |        " 22,\n",
329 |        " 23,\n",
330 |        " 24,\n",
331 |        " 25,\n",
332 |        " 26,\n",
333 |        " 27,\n",
334 |        " 28,\n",
335 |        " 29,\n",
336 |        " 30,\n",
337 |        " 31,\n",
338 |        " 32,\n",
339 |        " 33,\n",
340 |        " 34,\n",
341 |        " 35,\n",
342 |        " 36,\n",
343 |        " 37,\n",
344 |        " 38,\n",
345 |        " 39,\n",
346 |        " 40,\n",
347 |        " 41,\n",
348 |        " 42,\n",
349 |        " 43,\n",
350 |        " 44,\n",
351 |        " 45,\n",
352 |        " 46,\n",
353 |        " 47,\n",
354 |        " 48,\n",
355 |        " 49,\n",
356 |        " 50,\n",
357 |        " 51,\n",
358 |        " 52,\n",
359 |        " 53,\n",
360 |        " 54,\n",
361 |        " 55,\n",
362 |        " 56,\n",
363 |        " 57,\n",
364 |        " 58,\n",
365 |        " 59,\n",
366 |        " 60,\n",
367 |        " 61,\n",
368 |        " 62,\n",
369 |        " 63,\n",
370 |        " 64,\n",
371 |        " 65,\n",
372 |        " 66,\n",
373 |        " 67,\n",
374 |        " 68,\n",
375 |        " 69,\n",
376 |        " 70,\n",
377 |        " 71,\n",
378 |        " 72,\n",
379 |        " 73,\n",
380 |        " 74,\n",
381 |        " 75,\n",
382 |        " 76,\n",
383 |        " 77,\n",
384 |        " 78,\n",
385 |        " 79,\n",
386 |        " 80,\n",
387 |        " 81,\n",
388 |        " 82,\n",
389 |        " 83,\n",
390 |        " 84,\n",
391 |        " 85,\n",
392 |        " 86,\n",
393 |        " 87,\n",
394 |        " 88,\n",
395 |        " 89,\n",
396 |        " 90,\n",
397 |        " 91,\n",
398 |        " 92,\n",
399 |        " 93,\n",
400 |        " 94,\n",
401 |        " 95,\n",
402 |        " 96,\n",
403 |        " 97,\n",
404 |        " 98,\n",
405 |        " 99]"
406 |       ]
407 |      },
408 |      "execution_count": 1,
409 |      "metadata": {},
410 |      "output_type": "execute_result"
411 |     }
412 |    ],
413 |    "source": [
414 |     "list(range(100))"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "markdown",
419 |    "metadata": {},
420 |    "source": [
421 |     "# Exercise:\n",
422 |     "- Write a proper training loop for this linear regression example.\n",
423 |     "- Split data into train and valid.\n",
424 |     "- Use the Dataset and DataLoader abstraction.\n",
425 |     "- Create a logistic regression module.\n",
426 |     "- Create a Multi Layer Perceptron (MLP)."
427 |    ]
428 |   }
429 |  ],
430 |  "metadata": {
431 |   "kernelspec": {
432 |    "display_name": "Python 3",
433 |    "language": "python",
434 |    "name": "python3"
435 |   },
436 |   "language_info": {
437 |    "codemirror_mode": {
438 |     "name": "ipython",
439 |     "version": 3
440 |    },
441 |    "file_extension": ".py",
442 |    "mimetype": "text/x-python",
443 |    "name": "python",
444 |    "nbconvert_exporter": "python",
445 |    "pygments_lexer": "ipython3",
446 |    "version": "3.8.5"
447 |   }
448 |  },
449 |  "nbformat": 4,
450 |  "nbformat_minor": 2
451 | }
452 | 


--------------------------------------------------------------------------------
/notebooks/lin_reg.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     text_representation:
  5 | #       extension: .py
  6 | #       format_name: percent
  7 | #       format_version: '1.3'
  8 | #       jupytext_version: 1.7.1
  9 | #   kernelspec:
 10 | #     display_name: Python 3
 11 | #     language: python
 12 | #     name: python3
 13 | # ---
 14 | 
 15 | # %% [markdown]
 16 | # # LinReg with PyTorch, Gradient Descent, and GPU
 17 | 
 18 | # %% [markdown]
 19 | # ## Init, helpers, utils ...
 20 | 
 21 | # %%
 22 | # %matplotlib inline
 23 | 
 24 | # %%
 25 | import torch
 26 | import torch.nn as nn
 27 | import torch.nn.functional as F
 28 | import torch.optim as optim
 29 | import torchvision
 30 | 
 31 | DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 32 | DEVICE
 33 | 
 34 | # %%
 35 | from pprint import pprint
 36 | 
 37 | import matplotlib.pyplot as plt
 38 | import numpy as np
 39 | from IPython.core.debugger import set_trace
 40 | 
 41 | # %% [markdown]
 42 | # # The Problem
 43 | 
 44 | # %%
 45 | from sklearn.datasets import make_regression
 46 | 
 47 | 
 48 | n_features = 1
 49 | n_samples = 100
 50 | 
 51 | X, y = make_regression(
 52 |     n_samples=n_samples,
 53 |     n_features=n_features,
 54 |     noise=20,
 55 |     random_state=42,
 56 | )
 57 | 
 58 | fix, ax = plt.subplots()
 59 | ax.plot(X, y, ".")
 60 | 
 61 | # %% [markdown]
 62 | # # The Solution
 63 | 
 64 | # %%
 65 | X = torch.from_numpy(X).float()
 66 | y = torch.from_numpy(y.reshape((n_samples, n_features))).float()
 67 | 
 68 | 
 69 | # %%
 70 | class LinReg(nn.Module):
 71 |     def __init__(self, input_dim):
 72 |         super().__init__()
 73 |         self.beta = nn.Linear(input_dim, 1)
 74 |         
 75 |     def forward(self, X):
 76 |         return self.beta(X)
 77 | 
 78 | # or just
 79 | # model = nn.Linear(input_dim, 1)
 80 | 
 81 | 
 82 | # %%
 83 | model = LinReg(n_features).to(DEVICE)  # <-- here
 84 | loss_fn = nn.MSELoss()
 85 | optimizer = optim.SGD(model.parameters(), lr=0.1)
 86 | 
 87 | 
 88 | X, y = X.to(DEVICE), y.to(DEVICE)  # <-- here
 89 | 
 90 | # %%
 91 | # Train step
 92 | model.train()  # <-- here
 93 | optimizer.zero_grad()
 94 | 
 95 | y_ = model(X)
 96 | loss = loss_fn(y_, y)
 97 | 
 98 | loss.backward()
 99 | optimizer.step()
100 | 
101 | # Eval
102 | model.eval()  # <-- here
103 | with torch.no_grad():
104 |     y_ = model(X)    
105 | 
106 | # Vis
107 | fig, ax = plt.subplots()
108 | ax.plot(X.cpu().numpy(), y_.cpu().numpy(), ".", label="pred")
109 | ax.plot(X.cpu().numpy(), y.cpu().numpy(), ".", label="data")
110 | ax.set_title(f"MSE: {loss.item():0.1f}")
111 | ax.legend();
112 | 
113 | # %% [markdown]
114 | # Note: I did gradient descent with all the data. I did not split the data into `train` and `valid` which should be done!
115 | 
116 | # %%
117 | list(range(100))
118 | 
119 | # %% [markdown]
120 | # # Exercise:
121 | # - Write a proper training loop for this linear regression example.
122 | # - Split data into train and valid.
123 | # - Use the Dataset and DataLoader abstraction.
124 | # - Create a logistic regression module.
125 | # - Create a Multi Layer Perceptron (MLP).
126 | 


--------------------------------------------------------------------------------
/notebooks/machine_learning_101.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     text_representation:
  5 | #       extension: .py
  6 | #       format_name: percent
  7 | #       format_version: '1.3'
  8 | #       jupytext_version: 1.7.1
  9 | #   kernelspec:
 10 | #     display_name: Python 3
 11 | #     language: python
 12 | #     name: python3
 13 | # ---
 14 | 
 15 | # %% [markdown]
 16 | # # ML 101 Recap
 17 | #
 18 | # **ML = model + loss + optimizer**
 19 | #
 20 | #
 21 | # ## Linear regression example
 22 | #
 23 | # 0. Data
 24 | #
 25 | # 1. Model:
 26 | #   - $f(X) = X \beta = \hat y$
 27 | #
 28 | # 2. Loss / criterion:
 29 | #   - $ err_i = y_i - f(X_i)$
 30 | #   - $MSE = \frac{1}{n} \sum_{i=1}^{N} err_i^2$
 31 | #   
 32 | # 3. Optimize:
 33 | #   - minimize the MSE yields the optimal $\hat\beta$ (after doing some math)
 34 | #   - $\hat\beta = (X^TX)^{-1}X^Ty$
 35 | #   - (or, more generally, use gradient descent to optimize the parameters)
 36 | 
 37 | # %%
 38 | import numpy as np
 39 | from numpy.linalg import inv
 40 | from numpy.linalg import multi_dot as mdot
 41 | 
 42 | import matplotlib.pyplot as plt
 43 | 
 44 | # %matplotlib inline
 45 | 
 46 | # %% [markdown]
 47 | # ## LinReg with numpy
 48 | 
 49 | # %%
 50 | X = np.random.random((5, 3))
 51 | y = np.random.random(5)
 52 | X.shape, y.shape
 53 | 
 54 | # %% [markdown]
 55 | # Calculate the optimal parameter:
 56 | # $\hat\beta = (X^T X)^{-1} X^T y$
 57 | 
 58 | # %%
 59 | XT = X.T  # transpose
 60 | 
 61 | beta_ = mdot([inv(XT @ X), XT, y])
 62 | beta_
 63 | 
 64 | # %%
 65 | XT = X.T  # transpose
 66 | 
 67 | beta_ = inv(XT @ X) @ XT @ y
 68 | beta_
 69 | 
 70 | 
 71 | # %% [markdown]
 72 | # The model $f$:
 73 | 
 74 | # %%
 75 | def f(X, beta):
 76 |     return X @ beta
 77 | 
 78 | f(X, beta_)
 79 | 
 80 | # %% [markdown]
 81 | # ## LinReg with PyTorch
 82 | 
 83 | # %%
 84 | import torch
 85 | 
 86 | # %%
 87 | # X = torch.rand((5, 3))
 88 | # y = torch.rand(5)
 89 | X = torch.from_numpy(X)
 90 | y = torch.from_numpy(y)
 91 | X.shape, y.shape
 92 | 
 93 | # %% [markdown]
 94 | # $\hat\beta = (X^T X)^{-1} X^T y$
 95 | 
 96 | # %%
 97 | XT = X.t()
 98 | 
 99 | beta__ = (XT @ X).inverse() @ XT @ y
100 | beta__
101 | 
102 | # %%
103 | beta__.numpy() - beta_
104 | 
105 | # %% [markdown]
106 | # ## LinReg with PyTorch and Gradent Descent
107 | #
108 | # Previously, we had to do some math to calculate the optimal $\hat\beta$.
109 | # PyTorch calculates the gradients for us automatically (more on that later)
110 | # and we can use some version of gradient desctent to find our $\hat\beta$.
111 | 
112 | # %%
113 | from sklearn.datasets import make_regression
114 | 
115 | n_features = 1
116 | n_samples = 100
117 | 
118 | X, y = make_regression(
119 |     n_samples=n_samples,
120 |     n_features=n_features,
121 |     noise=10,
122 | )
123 | 
124 | dom_np = np.linspace(X.min(), X.max(), 20)
125 | dom = torch.from_numpy(dom_np).unsqueeze(-1).float()
126 | 
127 | fix, ax = plt.subplots()
128 | ax.plot(X, y, ".")
129 | 
130 | # %%
131 | X = torch.from_numpy(X).float()
132 | y = torch.from_numpy(y).float().unsqueeze(-1)
133 | X.shape, y.shape
134 | 
135 | # %%
136 | from torch import nn
137 | 
138 | class LinReg(nn.Module):
139 |     def __init__(self, input_dim):
140 |         super().__init__()
141 |         self.beta = nn.Linear(input_dim, 1)
142 |         
143 |     def forward(self, X):
144 |         return self.beta(X)
145 | 
146 | 
147 | model = LinReg(n_features)
148 | 
149 | # %%
150 | loss_fn = nn.MSELoss()
151 | 
152 | # %%
153 | from torch import optim
154 | 
155 | optimizer = optim.SGD(model.parameters(), lr=0.01)
156 | 
157 | # %%
158 | # Train step
159 | model.train()
160 | optimizer.zero_grad()
161 | 
162 | y_ = model(X)
163 | 
164 | loss = loss_fn(y_, y)
165 | loss.backward()
166 | optimizer.step()
167 | 
168 | # Eval
169 | model.eval()
170 | with torch.no_grad():
171 |     y_ = model(dom)
172 |     
173 | 
174 | # Vis
175 | fig, ax = plt.subplots()
176 | ax.plot(X.numpy(), y.numpy(), ".", label="data")
177 | ax.plot(dom_np, y_.numpy(), "-", label="pred")
178 | ax.set_title(f"MSE: {loss.item():0.1f}")
179 | ax.legend();
180 | 
181 | # %%
182 | model.beta
183 | 
184 | # %%
185 | model.beta.weight
186 | 
187 | # %%
188 | model.beta.weight.data
189 | 
190 | # %%
191 | model.beta.bias
192 | 
193 | # %% [markdown]
194 | # ## LinReg with GPU
195 | #
196 | # Simply move the data and the model to the GPU.
197 | 
198 | # %%
199 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
200 | 
201 | model = LinReg(n_features).to(device)  # <-- here
202 | optimizer = optim.SGD(model.parameters(), lr=0.0001)
203 | criterion = nn.MSELoss()
204 | 
205 | X, y = X.to(device), y.to(device)  # <-- here
206 | dom = dom.to(device)
207 | 
208 | # %% [markdown]
209 | # The rest stays the same.
210 | 
211 | # %%
212 | # Train step
213 | model.train()
214 | optimizer.zero_grad()
215 | 
216 | y_ = model(X)
217 | 
218 | loss = loss_fn(y_, y)
219 | loss.backward()
220 | optimizer.step()
221 | 
222 | # Eval
223 | model.eval()
224 | with torch.no_grad():
225 |     y_ = model(dom)
226 |     
227 | 
228 | # Vis
229 | fig, ax = plt.subplots()
230 | ax.plot(X.cpu().numpy(), y.cpu().numpy(), ".", label="data")
231 | ax.plot(dom_np, y_.cpu().numpy(), "-", label="pred")
232 | ax.set_title(f"MSE: {loss.cpu().item():0.1f}")
233 | ax.legend();
234 | 


--------------------------------------------------------------------------------
/notebooks/mean_shift_clustering.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     text_representation:
  5 | #       extension: .py
  6 | #       format_name: percent
  7 | #       format_version: '1.3'
  8 | #       jupytext_version: 1.7.1
  9 | #   kernelspec:
 10 | #     display_name: Python 3
 11 | #     language: python
 12 | #     name: python3
 13 | # ---
 14 | 
 15 | # %% [markdown]
 16 | # # Clustering with PyTorch
 17 | 
 18 | # %% [markdown]
 19 | # "PyTorch is a python package that provides [...]
 20 | # Tensor computation (like numpy) with strong GPU acceleration [...]"
 21 | #
 22 | # So, let's use it for some Mean-shift clustering.
 23 | 
 24 | # %%
 25 | import math
 26 | import operator
 27 | 
 28 | import numpy as np
 29 | import matplotlib.pyplot as plt
 30 | 
 31 | import torch
 32 | 
 33 | # %matplotlib inline
 34 | 
 35 | # %% [markdown]
 36 | # # Mean shitft clustering with numpy
 37 | 
 38 | # %% [markdown]
 39 | # ## Create data
 40 | 
 41 | # %%
 42 | n_clusters = 6
 43 | n_samples = 1000
 44 | 
 45 | # %% [markdown]
 46 | # To generate our data, we're going to pick `n_clusters` random points, which we'll call centroids, and for each point we're going to generate `n_samples` random points about it.
 47 | 
 48 | # %%
 49 | centroids = np.random.uniform(-35, 35, (n_clusters, 2))
 50 | slices = [np.random.multivariate_normal(centroids[i], np.diag([5., 5.]), n_samples)
 51 |           for i in range(n_clusters)]
 52 | data = np.concatenate(slices).astype(np.float32)
 53 | 
 54 | 
 55 | # %% [markdown]
 56 | # Plot the data and the centroids:
 57 | 
 58 | # %%
 59 | def plot_data(centroids, data, n_samples):
 60 |     colour = plt.cm.rainbow(np.linspace(0,1,len(centroids)))
 61 | 
 62 |     fig, ax = plt.subplots(figsize=(4, 4))
 63 |     for i, centroid in enumerate(centroids):
 64 |         samples = data[i * n_samples : (i + 1) * n_samples]
 65 |         ax.scatter(samples[:, 0], samples[:, 1], c=colour[i], s=1)
 66 |         ax.plot(centroid[0], centroid[1], markersize=10, marker="x", color='k', mew=5)
 67 |         ax.plot(centroid[0], centroid[1], markersize=5, marker="x", color='m', mew=2)
 68 |     plt.axis('equal')
 69 |     
 70 | plot_data(centroids, data, n_samples)
 71 | 
 72 | # %% [markdown]
 73 | # ## The mean shift algorithm
 74 | #
 75 | # "Mean shift is a **non-parametric** feature-space analysis technique for locating the maxima of a density function, a so-called **mode-seeking algorithm**. Application domains include cluster analysis in computer vision and image processing." -- https://en.wikipedia.org/wiki/Mean_shift
 76 | #
 77 | # Think of mean-shift clustering as k-means but you don't have to specify the number of clusters.
 78 | # (You have to specify the **bandwidth** but that can be automated.)
 79 | 
 80 | # %% [markdown]
 81 | # Algo:
 82 | # ```python
 83 | # # PSEUDO CODE
 84 | # while not_converged():
 85 | #     for i, point in enumerate(points):
 86 | #         # distance for the given point to all other points
 87 | #         distances = calc_distances(point, points)
 88 | #         
 89 | #         # turn distance into weights using a gaussian
 90 | #         weights = gaussian(dist, bandwidth=2.5)
 91 | #         
 92 | #         # update the weights by using the weights
 93 | #         points[i] = (weights * points).sum(0) / weights.sum()
 94 | #
 95 | # return points
 96 | # ```
 97 | 
 98 | # %% [markdown]
 99 | # ## The implementation
100 | #
101 | # Let's implement this with numpy:
102 | 
103 | # %%
104 | from numpy import exp, sqrt, array
105 | 
106 | 
107 | # %%
108 | def distance(x, X):
109 |     # return np.linalg.norm(x - X, axis=1)
110 |     return sqrt(((x - X)**2).sum(1))
111 | 
112 | 
113 | # %% [markdown]
114 | # Let's try it out. (More on how this function works shortly)
115 | 
116 | # %%
117 | a = array([1, 2])
118 | b = array([[1, 2],
119 |            [2, 3],
120 |            [-1, -3]])
121 | 
122 | dist = distance(a, b)
123 | dist
124 | 
125 | 
126 | # %%
127 | def gaussian(dist, bandwidth):
128 |     return exp(-0.5 * ((dist / bandwidth))**2) / (bandwidth * math.sqrt(2 * math.pi))
129 | 
130 | 
131 | # %%
132 | gaussian(dist, 2.5)
133 | 
134 | 
135 | # %% [markdown]
136 | # Now we can do a single mean shift step:
137 | 
138 | # %%
139 | def meanshift_step(X, bandwidth=2.5):
140 |     for i, x in enumerate(X):
141 |         dist = distance(x, X)
142 |         weight = gaussian(dist, bandwidth)
143 |         X[i] = (weight[:, None] * X).sum(0) / weight.sum()
144 |     return X
145 | 
146 | 
147 | # %% [markdown]
148 | # Data before:
149 | 
150 | # %%
151 | plot_data(centroids, data, n_samples)
152 | 
153 | # %% [markdown]
154 | # Data after:
155 | 
156 | # %%
157 | _X = meanshift_step(np.copy(data))
158 | plot_data(centroids, _X, n_samples)
159 | 
160 | 
161 | # %% [markdown]
162 | # Just repeath this/iterate a few times and we have the complete mean shift algorithm:
163 | 
164 | # %%
165 | def meanshift(X):
166 |     X = np.copy(X)
167 |     for _ in range(5):
168 |         X = meanshift_step(X)
169 |     return X
170 | 
171 | 
172 | # %%
173 | # %%time
174 | X = meanshift(data)
175 | 
176 | # %%
177 | plot_data(centroids, X, n_samples)
178 | 
179 | # %% [markdown]
180 | # # Mean shift in PyTorch (with GPU)
181 | #
182 | # PyTorch is like numpy and the interface is very similar.
183 | #
184 | # We actually don't have to adjust anything really to use torch instead of numpy.
185 | 
186 | # %%
187 | import torch
188 | from torch import exp, sqrt
189 | 
190 | 
191 | # %% [markdown]
192 | # We oncly have to copy the data into a PyTorch GPU tensor.
193 | 
194 | # %%
195 | def meanshift_torch(X):
196 |     X = torch.from_numpy(np.copy(X)).cuda()
197 |     for it in range(5):
198 |         X = meanshift_step(X)
199 |     return X
200 | 
201 | 
202 | # %%
203 | # %time X = meanshift_torch(data).cpu().numpy()
204 | plot_data(centroids+2, X, n_samples)
205 | 
206 | 
207 | # %% [markdown]
208 | # Same results, but the implementation is about the same speed.
209 | #
210 | # CUDA kernels have to be started for each calculation and the kernels don't have enough to do.
211 | # Let's not process individual points, but batches of points.
212 | 
213 | # %% [markdown]
214 | # ## Batch processing
215 | 
216 | # %%
217 | def distance_batch(a, b):
218 |     return sqrt(((a[None,:] - b[:,None]) ** 2).sum(2))
219 | 
220 | 
221 | # %%
222 | a = torch.rand(2, 2)
223 | b = torch.rand(3, 2)
224 | distance_batch(b, a)
225 | 
226 | 
227 | # %% [markdown]
228 | # `distance_batch` contains some broadcast magic that allows us to compute the distance from each point in a batch to all points in the data.
229 | 
230 | # %%
231 | def meanshift_torch2(data, batch_size=500):
232 |     n = len(data)
233 |     X = torch.from_numpy(np.copy(data)).cuda()
234 |     for _ in range(5):
235 |         for i in range(0, n, batch_size):
236 |             s = slice(i, min(n, i + batch_size))
237 |             weight = gaussian(distance_batch(X, X[s]), 2.5)
238 |             num = (weight[:, :, None] * X).sum(dim=1)
239 |             X[s] = num / weight.sum(1)[:, None]
240 |     return X
241 | 
242 | 
243 | # %%
244 | # %time X = meanshift_torch2(data, batch_size=1).cpu().numpy()
245 | 
246 | # %%
247 | # %time X = meanshift_torch2(data, batch_size=10).cpu().numpy()
248 | 
249 | # %%
250 | # %time X = meanshift_torch2(data, batch_size=100).cpu().numpy()
251 | 
252 | # %%
253 | # %time X = meanshift_torch2(data, batch_size=1000).cpu().numpy()
254 | 
255 | # %%
256 | # %time X = meanshift_torch2(data, batch_size=6000).cpu().numpy()
257 | 
258 | # %%
259 | plot_data(centroids+2, X, n_samples)
260 | 
261 | # %% [markdown]
262 | # # Mean shift in scikit-learn
263 | #
264 | # Of course, sklearn also offers `MeanShift`.
265 | # Let's see how it performs
266 | 
267 | # %%
268 | from sklearn.cluster import MeanShift
269 | 
270 | # %%
271 | # %%time
272 | model = MeanShift()
273 | model.fit(data)
274 | 
275 | # %% [markdown]
276 | # This is a faster than our naive implementation, but much slower than the GPU version.
277 | #
278 | 
279 | # %% [markdown]
280 | # # Note
281 | # Keep in mind that this demo is not saying that A is faster than B.
282 | # It rather shows that you can use PyTorch in fun ways!
283 | #
284 | # Ref:
285 | # - https://pytorch.org/docs/stable/notes/broadcasting.html
286 | # - https://pytorch.org/docs/stable/notes/cuda.html
287 | # - https://github.com/fastai/fastai/blob/master/tutorials/meanshift.ipynb
288 | 


--------------------------------------------------------------------------------
/notebooks/pytorch_basics.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     text_representation:
  5 | #       extension: .py
  6 | #       format_name: percent
  7 | #       format_version: '1.3'
  8 | #       jupytext_version: 1.7.1
  9 | #   kernelspec:
 10 | #     display_name: Python 3
 11 | #     language: python
 12 | #     name: python3
 13 | # ---
 14 | 
 15 | # %% [markdown] toc-hr-collapsed=true toc-nb-collapsed=true
 16 | # # PyTorch Basics
 17 | # - tensors like numpy
 18 | # - tensors on the gpu
 19 | # - tensors and automatic derivatives
 20 | # - tensors as neural network abstractions: `torch.nn`
 21 | # - optimizers: `nn.optim`
 22 | 
 23 | # %% [markdown]
 24 | # ## Init, helpers, utils, ...
 25 | 
 26 | # %%
 27 | import torch
 28 | import torch.nn as nn
 29 | import torch.nn.functional as F
 30 | import torch.optim as optim
 31 | import torchvision
 32 | 
 33 | # %%
 34 | from pprint import pprint
 35 | 
 36 | import matplotlib.pyplot as plt
 37 | import numpy as np
 38 | from IPython.core.debugger import set_trace
 39 | 
 40 | # %% [markdown] toc-hr-collapsed=true toc-nb-collapsed=true
 41 | # # Tensors
 42 | # tensors - the atoms of machine learning
 43 | 
 44 | # %% [markdown]
 45 | # ## Tensors in numpy and pytorch
 46 | 
 47 | # %%
 48 | import numpy as np
 49 | from numpy.linalg import inv
 50 | from numpy.linalg import multi_dot as mdot
 51 | 
 52 | # %%
 53 | # numpy
 54 | np.eye(3)
 55 | 
 56 | # %%
 57 | # torch
 58 | torch.eye(3)
 59 | 
 60 | # %%
 61 | # numpy
 62 | X = np.random.random((5, 3))
 63 | X
 64 | 
 65 | # %%
 66 | # pytorch
 67 | Y = torch.rand((5, 3))
 68 | Y
 69 | 
 70 | # %%
 71 | X.shape
 72 | 
 73 | # %%
 74 | Y.shape
 75 | 
 76 | # %%
 77 | # numpy
 78 | X.T @ X
 79 | 
 80 | # %%
 81 | # torch
 82 | Y.t() @ Y
 83 | 
 84 | # %%
 85 | # numpy
 86 | inv(X.T @ X)
 87 | 
 88 | # %%
 89 | # torch
 90 | torch.inverse(Y.t() @ Y)
 91 | 
 92 | # %% [markdown]
 93 | # ## More on PyTorch Tensors
 94 | 
 95 | # %% [markdown]
 96 | # Operations are also available as methods.
 97 | 
 98 | # %%
 99 | A = torch.eye(3)
100 | A.add(1)
101 | 
102 | # %%
103 | A
104 | 
105 | # %% [markdown]
106 | # Any operation that mutates a tensor in-place has a `_` suffix.
107 | 
108 | # %%
109 | A.add_(1)
110 | A
111 | 
112 | # %% [markdown]
113 | # ## Indexing and broadcasting
114 | # It works as expected/like numpy:
115 | 
116 | # %%
117 | A[0, 0]
118 | 
119 | # %%
120 | A[0]
121 | 
122 | # %%
123 | A[0:2]
124 | 
125 | # %%
126 | A[:, 1:3]
127 | 
128 | # %% [markdown]
129 | # ## Converting
130 | 
131 | # %%
132 | A = torch.eye(3)
133 | A
134 | 
135 | # %%
136 | # torch --> numpy
137 | B = A.numpy()
138 | B
139 | 
140 | # %% [markdown]
141 | # Note: torch and numpy can share the same memory / zero-copy
142 | 
143 | # %%
144 | A.add_(.5)
145 | A
146 | 
147 | # %%
148 | B
149 | 
150 | # %%
151 | # numpy --> torch
152 | torch.from_numpy(np.eye(3))
153 | 
154 | # %% [markdown]
155 | # ## Much more
156 | 
157 | # %%
158 | [o for o in dir(torch) if not o.startswith("_")]
159 | 
160 | # %%
161 | [o for o in dir(A) if not o.startswith("_")]
162 | 
163 | # %% [markdown]
164 | # # But what about the GPU?
165 | # How do I use the GPU?
166 | #
167 | # If you have a GPU make sure that the right pytorch is installed
168 | # (check https://pytorch.org/ for details).
169 | 
170 | # %%
171 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
172 | device
173 | 
174 | # %% [markdown]
175 | # If you have a GPU you should get something like: 
176 | # `device(type='cuda', index=0)`
177 | #
178 | # You can move data to the GPU by doing `.to(device)`.
179 | 
180 | # %%
181 | data = torch.eye(3)
182 | data = data.to(device)
183 | data
184 | 
185 | # %% [markdown]
186 | # Now the computation happens on the GPU.
187 | 
188 | # %%
189 | res = data + data
190 | res
191 | 
192 | # %%
193 | res.device
194 | 
195 | # %% [markdown]
196 | # Note: before `v0.4` one had to use `.cuda()` and `.cpu()` to move stuff to and from the GPU.
197 | # This littered the code with many:
198 | # ```python
199 | # if CUDA:
200 | #     model = model.cuda()
201 | # ```
202 | 
203 | # %% [markdown]
204 | # # Automatic differentiation with `autograd`
205 | # Prior to `v0.4` PyTorch used the class `Variable` to record gradients. You had to wrap `Tensor`s in `Variable`s.
206 | # `Variable`s behaved exactly like `Tensors`.
207 | #
208 | # With `v0.4` `Tensor` can record gradients directly if you tell it do do so, e.g. `torch.ones(3, requires_grad=True)`.
209 | # There is no need for `Variable` anymore.
210 | # Many tutorials still use `Variable`, be aware!
211 | #
212 | # Ref:
213 | # - https://pytorch.org/docs/stable/autograd.html
214 | # - https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
215 | 
216 | # %% [markdown]
217 | # You rarely use `torch.autograd` directly.
218 | # Pretty much everything is part or `torch.Tensor` now.
219 | # Simply add `requires_grad=True` to the tensors you want to calculate the gradients for.
220 | # `nn.Module` track gradients automatically.
221 | 
222 | # %%
223 | from torch import autograd
224 | 
225 | # %%
226 | x = torch.tensor(2.)
227 | x
228 | 
229 | # %%
230 | x = torch.tensor(2., requires_grad=True)
231 | x
232 | 
233 | # %%
234 | print(x.requires_grad)
235 | 
236 | # %%
237 | print(x.grad)
238 | 
239 | # %%
240 | y = x ** 2
241 | 
242 | print("Grad of x:", x.grad)
243 | 
244 | # %%
245 | y = x ** 2
246 | y.backward()
247 | 
248 | print("Grad of x:", x.grad)
249 | 
250 | # %%
251 | # What is going to happen here?
252 | # x = torch.tensor(2.)
253 | # x.backward()
254 | 
255 | # %%
256 | # Don't record the gradient
257 | # Useful for inference
258 | 
259 | params = torch.tensor(2., requires_grad=True)
260 | 
261 | with torch.no_grad():
262 |     y = x * x
263 |     print(x.grad_fn)
264 | 
265 | # %% [markdown]
266 | # `nn.Module` and `nn.Parameter` keep track of gradients for you.
267 | 
268 | # %%
269 | lin = nn.Linear(2, 1, bias=True)
270 | lin.weight
271 | 
272 | # %%
273 | type(lin.weight)
274 | 
275 | # %%
276 | isinstance(lin.weight, torch.FloatTensor)
277 | 
278 | # %% [markdown]
279 | # ## `torch.nn`
280 | # The neural network modules contains many different layers.
281 | 
282 | # %%
283 | from torch import nn
284 | 
285 | # %%
286 | lin_reg = nn.Linear(1, 1, bias=True)
287 | lin_reg
288 | 
289 | # %%
290 | nn.Conv2d
291 | 
292 | # %%
293 | nn.Conv3d
294 | 
295 | # %%
296 | nn.BatchNorm2d
297 | 
298 | # %% [markdown]
299 | # ### Activations
300 | 
301 | # %%
302 | nn.ReLU
303 | 
304 | # %%
305 | nn.Sigmoid
306 | 
307 | # %% [markdown]
308 | # ### Losses
309 | 
310 | # %%
311 | nn.Softmax
312 | 
313 | # %%
314 | nn.CrossEntropyLoss
315 | 
316 | # %%
317 | nn.BCELoss
318 | 
319 | # %%
320 | nn.MSELoss
321 | 
322 | # %% [markdown]
323 | # ### Functional (stateless) alternatives
324 | 
325 | # %%
326 | from torch.nn import functional as F
327 | 
328 | # %%
329 | F.mse_loss
330 | 
331 | # %%
332 | F.relu
333 | 
334 | # %%
335 | F.relu6
336 | 
337 | # %% [markdown]
338 | # ## `torch.optim`
339 | 
340 | # %%
341 | from torch import optim
342 | 
343 | # %%
344 | optim.SGD
345 | 
346 | # %%
347 | optim.Adam
348 | 
349 | # %%
350 | optim.AdamW
351 | 
352 | # %% [markdown]
353 | # # Exercise
354 | # - Do you remember the analytical solution to solve for the parameters of linear regression? Implement it.
355 | 


--------------------------------------------------------------------------------
/notebooks/rnn_from_scratch.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# RNN from scratch with PyTorch\n",
   8 |     "A RNN ist just a normal NN.\n",
   9 |     "It's very easy to implement in PyTorch due to its dynamic nature.\n",
  10 |     "\n",
  11 |     "We'll build a very simple character based language model.\n",
  12 |     "\n",
  13 |     "Taken from http://www.fast.ai/"
  14 |    ]
  15 |   },
  16 |   {
  17 |    "cell_type": "markdown",
  18 |    "metadata": {},
  19 |    "source": [
  20 |     "## Init and helpers"
  21 |    ]
  22 |   },
  23 |   {
  24 |    "cell_type": "code",
  25 |    "execution_count": 1,
  26 |    "metadata": {
  27 |     "execution": {
  28 |      "iopub.execute_input": "2020-12-06T08:47:20.187029Z",
  29 |      "iopub.status.busy": "2020-12-06T08:47:20.186373Z",
  30 |      "iopub.status.idle": "2020-12-06T08:47:20.309644Z",
  31 |      "shell.execute_reply": "2020-12-06T08:47:20.310735Z"
  32 |     }
  33 |    },
  34 |    "outputs": [],
  35 |    "source": [
  36 |     "from pathlib import Path\n",
  37 |     "import numpy as np"
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "markdown",
  42 |    "metadata": {},
  43 |    "source": [
  44 |     "## Data"
  45 |    ]
  46 |   },
  47 |   {
  48 |    "cell_type": "code",
  49 |    "execution_count": 2,
  50 |    "metadata": {
  51 |     "execution": {
  52 |      "iopub.execute_input": "2020-12-06T08:47:20.315957Z",
  53 |      "iopub.status.busy": "2020-12-06T08:47:20.314473Z",
  54 |      "iopub.status.idle": "2020-12-06T08:47:20.387279Z",
  55 |      "shell.execute_reply": "2020-12-06T08:47:20.386890Z"
  56 |     }
  57 |    },
  58 |    "outputs": [
  59 |     {
  60 |      "name": "stdout",
  61 |      "output_type": "stream",
  62 |      "text": [
  63 |       "I already have the data.\n"
  64 |      ]
  65 |     }
  66 |    ],
  67 |    "source": [
  68 |     "NIETSCHE_PATH = Path(\"../data/raw/nietzsche.txt\")\n",
  69 |     "if NIETSCHE_PATH.is_file():\n",
  70 |     "    print(\"I already have the data.\")\n",
  71 |     "else:\n",
  72 |     "    !wget -o ../data/raw/nietzsche.txt https://s3.amazonaws.com/text-datasets/nietzsche.txt\n",
  73 |     "        \n",
  74 |     "with NIETSCHE_PATH.open() as f:\n",
  75 |     "    data = f.read()"
  76 |    ]
  77 |   },
  78 |   {
  79 |    "cell_type": "markdown",
  80 |    "metadata": {},
  81 |    "source": [
  82 |     "A tweet of Nietzsche:"
  83 |    ]
  84 |   },
  85 |   {
  86 |    "cell_type": "code",
  87 |    "execution_count": 3,
  88 |    "metadata": {
  89 |     "execution": {
  90 |      "iopub.execute_input": "2020-12-06T08:47:20.390460Z",
  91 |      "iopub.status.busy": "2020-12-06T08:47:20.390010Z",
  92 |      "iopub.status.idle": "2020-12-06T08:47:20.392264Z",
  93 |      "shell.execute_reply": "2020-12-06T08:47:20.392662Z"
  94 |     }
  95 |    },
  96 |    "outputs": [
  97 |     {
  98 |      "name": "stdout",
  99 |      "output_type": "stream",
 100 |      "text": [
 101 |       "PREFACE\n",
 102 |       "\n",
 103 |       "\n",
 104 |       "SUPPOSING that Truth is a woman--what then? Is there not ground\n",
 105 |       "for suspecting that all philosophers, in so far as they have been\n",
 106 |       "\n"
 107 |      ]
 108 |     }
 109 |    ],
 110 |    "source": [
 111 |     "print(data[:140])"
 112 |    ]
 113 |   },
 114 |   {
 115 |    "cell_type": "markdown",
 116 |    "metadata": {},
 117 |    "source": [
 118 |     "We need to know the alphabet and we add a padding value \"\\0\" to the alphabet."
 119 |    ]
 120 |   },
 121 |   {
 122 |    "cell_type": "code",
 123 |    "execution_count": 4,
 124 |    "metadata": {
 125 |     "execution": {
 126 |      "iopub.execute_input": "2020-12-06T08:47:20.410193Z",
 127 |      "iopub.status.busy": "2020-12-06T08:47:20.409477Z",
 128 |      "iopub.status.idle": "2020-12-06T08:47:20.412698Z",
 129 |      "shell.execute_reply": "2020-12-06T08:47:20.412171Z"
 130 |     }
 131 |    },
 132 |    "outputs": [
 133 |     {
 134 |      "data": {
 135 |       "text/plain": [
 136 |        "85"
 137 |       ]
 138 |      },
 139 |      "execution_count": 1,
 140 |      "metadata": {},
 141 |      "output_type": "execute_result"
 142 |     }
 143 |    ],
 144 |    "source": [
 145 |     "alphabet = [\"\\0\", *sorted(list(set(data)))]\n",
 146 |     "n_alphabet = len(alphabet)\n",
 147 |     "n_alphabet"
 148 |    ]
 149 |   },
 150 |   {
 151 |    "cell_type": "code",
 152 |    "execution_count": 5,
 153 |    "metadata": {
 154 |     "execution": {
 155 |      "iopub.execute_input": "2020-12-06T08:47:20.416803Z",
 156 |      "iopub.status.busy": "2020-12-06T08:47:20.416181Z",
 157 |      "iopub.status.idle": "2020-12-06T08:47:20.418426Z",
 158 |      "shell.execute_reply": "2020-12-06T08:47:20.418912Z"
 159 |     }
 160 |    },
 161 |    "outputs": [],
 162 |    "source": [
 163 |     "char2index = {c: i for i, c in enumerate(alphabet)}\n",
 164 |     "index2char = {i: c for i, c in enumerate(alphabet)}"
 165 |    ]
 166 |   },
 167 |   {
 168 |    "cell_type": "markdown",
 169 |    "metadata": {},
 170 |    "source": [
 171 |     "Convert the data into a list of integers"
 172 |    ]
 173 |   },
 174 |   {
 175 |    "cell_type": "code",
 176 |    "execution_count": 6,
 177 |    "metadata": {
 178 |     "execution": {
 179 |      "iopub.execute_input": "2020-12-06T08:47:20.459645Z",
 180 |      "iopub.status.busy": "2020-12-06T08:47:20.456579Z",
 181 |      "iopub.status.idle": "2020-12-06T08:47:20.461267Z",
 182 |      "shell.execute_reply": "2020-12-06T08:47:20.461520Z"
 183 |     }
 184 |    },
 185 |    "outputs": [],
 186 |    "source": [
 187 |     "index = [char2index[c] for c in data]"
 188 |    ]
 189 |   },
 190 |   {
 191 |    "cell_type": "code",
 192 |    "execution_count": 7,
 193 |    "metadata": {
 194 |     "execution": {
 195 |      "iopub.execute_input": "2020-12-06T08:47:20.463805Z",
 196 |      "iopub.status.busy": "2020-12-06T08:47:20.463476Z",
 197 |      "iopub.status.idle": "2020-12-06T08:47:20.465132Z",
 198 |      "shell.execute_reply": "2020-12-06T08:47:20.465383Z"
 199 |     }
 200 |    },
 201 |    "outputs": [
 202 |     {
 203 |      "name": "stdout",
 204 |      "output_type": "stream",
 205 |      "text": [
 206 |       "[40, 42, 29, 30, 25, 27, 29, 1, 1, 1, 43, 45, 40, 40, 39, 43, 33, 38, 31, 2, 73, 61, 54, 73, 2]\n",
 207 |       "PREFACE\n",
 208 |       "\n",
 209 |       "\n",
 210 |       "SUPPOSING that \n"
 211 |      ]
 212 |     }
 213 |    ],
 214 |    "source": [
 215 |     "print(index[:25])\n",
 216 |     "print(\"\".join(index2char[i] for i in index[:25]))"
 217 |    ]
 218 |   },
 219 |   {
 220 |    "cell_type": "code",
 221 |    "execution_count": 8,
 222 |    "metadata": {
 223 |     "execution": {
 224 |      "iopub.execute_input": "2020-12-06T08:47:20.467519Z",
 225 |      "iopub.status.busy": "2020-12-06T08:47:20.467164Z",
 226 |      "iopub.status.idle": "2020-12-06T08:47:20.468936Z",
 227 |      "shell.execute_reply": "2020-12-06T08:47:20.469190Z"
 228 |     }
 229 |    },
 230 |    "outputs": [
 231 |     {
 232 |      "data": {
 233 |       "text/plain": [
 234 |        "[40, 42, 29]"
 235 |       ]
 236 |      },
 237 |      "execution_count": 1,
 238 |      "metadata": {},
 239 |      "output_type": "execute_result"
 240 |     }
 241 |    ],
 242 |    "source": [
 243 |     "index[0: 3]"
 244 |    ]
 245 |   },
 246 |   {
 247 |    "cell_type": "code",
 248 |    "execution_count": 9,
 249 |    "metadata": {
 250 |     "execution": {
 251 |      "iopub.execute_input": "2020-12-06T08:47:20.531092Z",
 252 |      "iopub.status.busy": "2020-12-06T08:47:20.521077Z",
 253 |      "iopub.status.idle": "2020-12-06T08:47:22.563153Z",
 254 |      "shell.execute_reply": "2020-12-06T08:47:22.563421Z"
 255 |     }
 256 |    },
 257 |    "outputs": [],
 258 |    "source": [
 259 |     "X, y = [], []\n",
 260 |     "for i in range(len(index) - 4):\n",
 261 |     "    X.append(index[i : i + 3])\n",
 262 |     "    y.append(index[i + 3])\n",
 263 |     "    \n",
 264 |     "X = np.stack(X)\n",
 265 |     "y = np.stack(y)"
 266 |    ]
 267 |   },
 268 |   {
 269 |    "cell_type": "code",
 270 |    "execution_count": 10,
 271 |    "metadata": {
 272 |     "execution": {
 273 |      "iopub.execute_input": "2020-12-06T08:47:22.565676Z",
 274 |      "iopub.status.busy": "2020-12-06T08:47:22.565356Z",
 275 |      "iopub.status.idle": "2020-12-06T08:47:22.567072Z",
 276 |      "shell.execute_reply": "2020-12-06T08:47:22.567331Z"
 277 |     }
 278 |    },
 279 |    "outputs": [
 280 |     {
 281 |      "data": {
 282 |       "text/plain": [
 283 |        "((600889, 3), (600889,))"
 284 |       ]
 285 |      },
 286 |      "execution_count": 1,
 287 |      "metadata": {},
 288 |      "output_type": "execute_result"
 289 |     }
 290 |    ],
 291 |    "source": [
 292 |     "X.shape, y.shape"
 293 |    ]
 294 |   },
 295 |   {
 296 |    "cell_type": "code",
 297 |    "execution_count": 11,
 298 |    "metadata": {
 299 |     "execution": {
 300 |      "iopub.execute_input": "2020-12-06T08:47:22.569823Z",
 301 |      "iopub.status.busy": "2020-12-06T08:47:22.569358Z",
 302 |      "iopub.status.idle": "2020-12-06T08:47:22.571051Z",
 303 |      "shell.execute_reply": "2020-12-06T08:47:22.571308Z"
 304 |     }
 305 |    },
 306 |    "outputs": [
 307 |     {
 308 |      "data": {
 309 |       "text/plain": [
 310 |        "(array([40, 42, 29]), 30)"
 311 |       ]
 312 |      },
 313 |      "execution_count": 1,
 314 |      "metadata": {},
 315 |      "output_type": "execute_result"
 316 |     }
 317 |    ],
 318 |    "source": [
 319 |     "X[0], y[0]"
 320 |    ]
 321 |   },
 322 |   {
 323 |    "cell_type": "code",
 324 |    "execution_count": 12,
 325 |    "metadata": {
 326 |     "execution": {
 327 |      "iopub.execute_input": "2020-12-06T08:47:22.573504Z",
 328 |      "iopub.status.busy": "2020-12-06T08:47:22.573159Z",
 329 |      "iopub.status.idle": "2020-12-06T08:47:22.574945Z",
 330 |      "shell.execute_reply": "2020-12-06T08:47:22.575186Z"
 331 |     }
 332 |    },
 333 |    "outputs": [
 334 |     {
 335 |      "data": {
 336 |       "text/plain": [
 337 |        "numpy.ndarray"
 338 |       ]
 339 |      },
 340 |      "execution_count": 1,
 341 |      "metadata": {},
 342 |      "output_type": "execute_result"
 343 |     }
 344 |    ],
 345 |    "source": [
 346 |     "type(y)"
 347 |    ]
 348 |   },
 349 |   {
 350 |    "cell_type": "code",
 351 |    "execution_count": 13,
 352 |    "metadata": {
 353 |     "execution": {
 354 |      "iopub.execute_input": "2020-12-06T08:47:22.577419Z",
 355 |      "iopub.status.busy": "2020-12-06T08:47:22.577102Z",
 356 |      "iopub.status.idle": "2020-12-06T08:47:22.791865Z",
 357 |      "shell.execute_reply": "2020-12-06T08:47:22.792148Z"
 358 |     }
 359 |    },
 360 |    "outputs": [],
 361 |    "source": [
 362 |     "import torch\n",
 363 |     "from torch.utils.data import DataLoader, Dataset, TensorDataset\n",
 364 |     "\n",
 365 |     "\n",
 366 |     "train_ds = TensorDataset(torch.from_numpy(X), torch.from_numpy(y))\n",
 367 |     "train_dl = DataLoader(train_ds, batch_size=500)"
 368 |    ]
 369 |   },
 370 |   {
 371 |    "cell_type": "markdown",
 372 |    "metadata": {},
 373 |    "source": [
 374 |     "# The model"
 375 |    ]
 376 |   },
 377 |   {
 378 |    "cell_type": "code",
 379 |    "execution_count": 14,
 380 |    "metadata": {
 381 |     "execution": {
 382 |      "iopub.execute_input": "2020-12-06T08:47:22.794452Z",
 383 |      "iopub.status.busy": "2020-12-06T08:47:22.794136Z",
 384 |      "iopub.status.idle": "2020-12-06T08:47:22.795818Z",
 385 |      "shell.execute_reply": "2020-12-06T08:47:22.795504Z"
 386 |     }
 387 |    },
 388 |    "outputs": [],
 389 |    "source": [
 390 |     "import torch\n",
 391 |     "import torch.nn as nn\n",
 392 |     "import torch.nn.functional as F\n",
 393 |     "import torch.optim as optim"
 394 |    ]
 395 |   },
 396 |   {
 397 |    "cell_type": "code",
 398 |    "execution_count": 15,
 399 |    "metadata": {
 400 |     "execution": {
 401 |      "iopub.execute_input": "2020-12-06T08:47:22.799526Z",
 402 |      "iopub.status.busy": "2020-12-06T08:47:22.799090Z",
 403 |      "iopub.status.idle": "2020-12-06T08:47:22.801673Z",
 404 |      "shell.execute_reply": "2020-12-06T08:47:22.801383Z"
 405 |     }
 406 |    },
 407 |    "outputs": [
 408 |     {
 409 |      "name": "stderr",
 410 |      "output_type": "stream",
 411 |      "text": [
 412 |       "/home/stefan/projects/pytorch_tutorial/.venv/lib/python3.8/site-packages/torch/cuda/__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at  /pytorch/c10/cuda/CUDAFunctions.cpp:100.)\n",
 413 |       "  return torch._C._cuda_getDeviceCount() > 0\n"
 414 |      ]
 415 |     },
 416 |     {
 417 |      "data": {
 418 |       "text/plain": [
 419 |        "device(type='cpu')"
 420 |       ]
 421 |      },
 422 |      "execution_count": 1,
 423 |      "metadata": {},
 424 |      "output_type": "execute_result"
 425 |     }
 426 |    ],
 427 |    "source": [
 428 |     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
 429 |     "device"
 430 |    ]
 431 |   },
 432 |   {
 433 |    "cell_type": "code",
 434 |    "execution_count": 16,
 435 |    "metadata": {
 436 |     "execution": {
 437 |      "iopub.execute_input": "2020-12-06T08:47:22.805642Z",
 438 |      "iopub.status.busy": "2020-12-06T08:47:22.805321Z",
 439 |      "iopub.status.idle": "2020-12-06T08:47:22.807151Z",
 440 |      "shell.execute_reply": "2020-12-06T08:47:22.806837Z"
 441 |     }
 442 |    },
 443 |    "outputs": [],
 444 |    "source": [
 445 |     "class CharModel(nn.Module):\n",
 446 |     "    def __init__(self, n_vocab, n_embedding, n_hidden):\n",
 447 |     "        super().__init__()\n",
 448 |     "        self.emb = nn.Embedding(n_vocab, n_embedding)\n",
 449 |     "        self.lin_in = nn.Linear(n_embedding, n_hidden)\n",
 450 |     "        \n",
 451 |     "        self.lin_hidden = nn.Linear(n_hidden, n_hidden)\n",
 452 |     "        self.lin_out = nn.Linear(n_hidden, n_vocab)\n",
 453 |     "        \n",
 454 |     "    def forward(self, X):\n",
 455 |     "        c1, c2, c3 = X[:, 0], X[:, 1], X[:, 2]\n",
 456 |     "        \n",
 457 |     "        in1 = F.relu(self.lin_in(self.emb(c1)))\n",
 458 |     "        h = F.tanh(self.lin_hidden(in1))\n",
 459 |     "                   \n",
 460 |     "        in2 = F.relu(self.lin_in(self.emb(c2)))\n",
 461 |     "        h = F.tanh(self.lin_hidden(h + in2))\n",
 462 |     "        \n",
 463 |     "        in3 = F.relu(self.lin_in(self.emb(c3)))\n",
 464 |     "        h = F.tanh(self.lin_hidden(h + in3))\n",
 465 |     "        \n",
 466 |     "        return F.log_softmax(self.lin_out(h), dim=-1)"
 467 |    ]
 468 |   },
 469 |   {
 470 |    "cell_type": "code",
 471 |    "execution_count": 17,
 472 |    "metadata": {
 473 |     "execution": {
 474 |      "iopub.execute_input": "2020-12-06T08:47:22.809256Z",
 475 |      "iopub.status.busy": "2020-12-06T08:47:22.808949Z",
 476 |      "iopub.status.idle": "2020-12-06T08:47:22.811552Z",
 477 |      "shell.execute_reply": "2020-12-06T08:47:22.811233Z"
 478 |     }
 479 |    },
 480 |    "outputs": [],
 481 |    "source": [
 482 |     "n_embedding = 40\n",
 483 |     "n_hidden = 256\n",
 484 |     "\n",
 485 |     "model = CharModel(n_alphabet, n_embedding=40, n_hidden=128)\n",
 486 |     "model = model.to(device)"
 487 |    ]
 488 |   },
 489 |   {
 490 |    "cell_type": "code",
 491 |    "execution_count": 18,
 492 |    "metadata": {
 493 |     "execution": {
 494 |      "iopub.execute_input": "2020-12-06T08:47:22.813741Z",
 495 |      "iopub.status.busy": "2020-12-06T08:47:22.813436Z",
 496 |      "iopub.status.idle": "2020-12-06T08:47:22.815254Z",
 497 |      "shell.execute_reply": "2020-12-06T08:47:22.814935Z"
 498 |     }
 499 |    },
 500 |    "outputs": [],
 501 |    "source": [
 502 |     "optimizer = optim.Adam(model.parameters(), 0.001)\n",
 503 |     "#criterion = nn.CrossEntropyLoss()\n",
 504 |     "criterion = F.nll_loss"
 505 |    ]
 506 |   },
 507 |   {
 508 |    "cell_type": "code",
 509 |    "execution_count": 19,
 510 |    "metadata": {
 511 |     "execution": {
 512 |      "iopub.execute_input": "2020-12-06T08:47:22.818691Z",
 513 |      "iopub.status.busy": "2020-12-06T08:47:22.818372Z",
 514 |      "iopub.status.idle": "2020-12-06T08:47:22.819647Z",
 515 |      "shell.execute_reply": "2020-12-06T08:47:22.819884Z"
 516 |     }
 517 |    },
 518 |    "outputs": [],
 519 |    "source": [
 520 |     "def fit(model, n_epoch=2):\n",
 521 |     "    optimizer = optim.Adam(model.parameters(), 0.001)\n",
 522 |     "    \n",
 523 |     "    for epoch in range(n_epoch):\n",
 524 |     "        print(f\"Epoch {epoch}:\")\n",
 525 |     "        running_loss, correct = 0.0, 0\n",
 526 |     "\n",
 527 |     "        model.train()\n",
 528 |     "        for X, y in train_dl:\n",
 529 |     "            X, y = X.to(device), y.to(device)\n",
 530 |     "            optimizer.zero_grad()\n",
 531 |     "\n",
 532 |     "            y_ = model(X)\n",
 533 |     "            loss = criterion(y_, y)\n",
 534 |     "\n",
 535 |     "            loss.backward()\n",
 536 |     "            optimizer.step()\n",
 537 |     "\n",
 538 |     "            _, y_label_ = torch.max(y_, 1)\n",
 539 |     "            correct += (y_label_ == y).sum().item()\n",
 540 |     "            running_loss += loss.item() * X.shape[0]\n",
 541 |     "\n",
 542 |     "        print(f\"  Train Loss: {running_loss / len(train_dl.dataset):0.4f}\")\n",
 543 |     "        print(f\"  Train Acc:  {correct / len(train_dl.dataset):0.2f}\")"
 544 |    ]
 545 |   },
 546 |   {
 547 |    "cell_type": "code",
 548 |    "execution_count": 20,
 549 |    "metadata": {
 550 |     "execution": {
 551 |      "iopub.execute_input": "2020-12-06T08:47:22.891629Z",
 552 |      "iopub.status.busy": "2020-12-06T08:47:22.885064Z",
 553 |      "iopub.status.idle": "2020-12-06T08:47:44.559163Z",
 554 |      "shell.execute_reply": "2020-12-06T08:47:44.558554Z"
 555 |     }
 556 |    },
 557 |    "outputs": [
 558 |     {
 559 |      "name": "stderr",
 560 |      "output_type": "stream",
 561 |      "text": [
 562 |       "/home/stefan/projects/pytorch_tutorial/.venv/lib/python3.8/site-packages/torch/nn/functional.py:1628: UserWarning: nn.functional.tanh is deprecated. Use torch.tanh instead.\n",
 563 |       "  warnings.warn(\"nn.functional.tanh is deprecated. Use torch.tanh instead.\")\n"
 564 |      ]
 565 |     },
 566 |     {
 567 |      "name": "stdout",
 568 |      "output_type": "stream",
 569 |      "text": [
 570 |       "Epoch 0:\n",
 571 |       "  Train Loss: 2.2354\n",
 572 |       "  Train Acc:  0.37\n",
 573 |       "Epoch 1:\n",
 574 |       "  Train Loss: 1.9220\n",
 575 |       "  Train Acc:  0.44\n"
 576 |      ]
 577 |     }
 578 |    ],
 579 |    "source": [
 580 |     "fit(model, 2)"
 581 |    ]
 582 |   },
 583 |   {
 584 |    "cell_type": "code",
 585 |    "execution_count": 21,
 586 |    "metadata": {
 587 |     "execution": {
 588 |      "iopub.execute_input": "2020-12-06T08:47:44.564413Z",
 589 |      "iopub.status.busy": "2020-12-06T08:47:44.563821Z",
 590 |      "iopub.status.idle": "2020-12-06T08:47:44.565659Z",
 591 |      "shell.execute_reply": "2020-12-06T08:47:44.566157Z"
 592 |     }
 593 |    },
 594 |    "outputs": [],
 595 |    "source": [
 596 |     "def predict(word):\n",
 597 |     "    word_idx = [char2index[c] for c in word]\n",
 598 |     "    word_idx\n",
 599 |     "    with torch.no_grad():\n",
 600 |     "        X = torch.tensor(word_idx).unsqueeze(0).to(device)\n",
 601 |     "        model.eval()\n",
 602 |     "        y_ = model(X).cpu()\n",
 603 |     "    pred = index2char[torch.argmax(y_).item()]\n",
 604 |     "    print(f\"{word} --> '{pred}'\")"
 605 |    ]
 606 |   },
 607 |   {
 608 |    "cell_type": "code",
 609 |    "execution_count": 22,
 610 |    "metadata": {
 611 |     "execution": {
 612 |      "iopub.execute_input": "2020-12-06T08:47:44.569376Z",
 613 |      "iopub.status.busy": "2020-12-06T08:47:44.568799Z",
 614 |      "iopub.status.idle": "2020-12-06T08:47:44.573254Z",
 615 |      "shell.execute_reply": "2020-12-06T08:47:44.572657Z"
 616 |     }
 617 |    },
 618 |    "outputs": [
 619 |     {
 620 |      "name": "stdout",
 621 |      "output_type": "stream",
 622 |      "text": [
 623 |       "the --> ' '\n"
 624 |      ]
 625 |     }
 626 |    ],
 627 |    "source": [
 628 |     "predict(\"the\")"
 629 |    ]
 630 |   },
 631 |   {
 632 |    "cell_type": "code",
 633 |    "execution_count": 23,
 634 |    "metadata": {
 635 |     "execution": {
 636 |      "iopub.execute_input": "2020-12-06T08:47:44.578272Z",
 637 |      "iopub.status.busy": "2020-12-06T08:47:44.577584Z",
 638 |      "iopub.status.idle": "2020-12-06T08:47:44.586743Z",
 639 |      "shell.execute_reply": "2020-12-06T08:47:44.587254Z"
 640 |     }
 641 |    },
 642 |    "outputs": [
 643 |     {
 644 |      "name": "stdout",
 645 |      "output_type": "stream",
 646 |      "text": [
 647 |       "wom --> 'e'\n"
 648 |      ]
 649 |     }
 650 |    ],
 651 |    "source": [
 652 |     "predict(\"wom\")"
 653 |    ]
 654 |   },
 655 |   {
 656 |    "cell_type": "code",
 657 |    "execution_count": 24,
 658 |    "metadata": {
 659 |     "execution": {
 660 |      "iopub.execute_input": "2020-12-06T08:47:44.591272Z",
 661 |      "iopub.status.busy": "2020-12-06T08:47:44.590478Z",
 662 |      "iopub.status.idle": "2020-12-06T08:47:44.594301Z",
 663 |      "shell.execute_reply": "2020-12-06T08:47:44.594786Z"
 664 |     }
 665 |    },
 666 |    "outputs": [
 667 |     {
 668 |      "name": "stdout",
 669 |      "output_type": "stream",
 670 |      "text": [
 671 |       "man --> ' '\n"
 672 |      ]
 673 |     }
 674 |    ],
 675 |    "source": [
 676 |     "predict(\"man\")"
 677 |    ]
 678 |   },
 679 |   {
 680 |    "cell_type": "code",
 681 |    "execution_count": 25,
 682 |    "metadata": {
 683 |     "execution": {
 684 |      "iopub.execute_input": "2020-12-06T08:47:44.599994Z",
 685 |      "iopub.status.busy": "2020-12-06T08:47:44.599344Z",
 686 |      "iopub.status.idle": "2020-12-06T08:47:44.602202Z",
 687 |      "shell.execute_reply": "2020-12-06T08:47:44.602759Z"
 688 |     }
 689 |    },
 690 |    "outputs": [
 691 |     {
 692 |      "name": "stdout",
 693 |      "output_type": "stream",
 694 |      "text": [
 695 |       "hum --> 'a'\n"
 696 |      ]
 697 |     }
 698 |    ],
 699 |    "source": [
 700 |     "predict(\"hum\")"
 701 |    ]
 702 |   },
 703 |   {
 704 |    "cell_type": "code",
 705 |    "execution_count": 26,
 706 |    "metadata": {
 707 |     "execution": {
 708 |      "iopub.execute_input": "2020-12-06T08:47:44.613497Z",
 709 |      "iopub.status.busy": "2020-12-06T08:47:44.612587Z",
 710 |      "iopub.status.idle": "2020-12-06T08:47:44.614830Z",
 711 |      "shell.execute_reply": "2020-12-06T08:47:44.615544Z"
 712 |     }
 713 |    },
 714 |    "outputs": [],
 715 |    "source": [
 716 |     "class CharModel(nn.Module):\n",
 717 |     "    def __init__(self, n_vocab, n_embedding, n_hidden):\n",
 718 |     "        super().__init__()\n",
 719 |     "        self.emb = nn.Embedding(n_vocab, n_embedding)\n",
 720 |     "        self.lin_in = nn.Linear(n_embedding, n_hidden)\n",
 721 |     "        self.lin_hidden = nn.Linear(n_hidden, n_hidden)\n",
 722 |     "        self.lin_out = nn.Linear(n_hidden, n_vocab)\n",
 723 |     "        \n",
 724 |     "    def forward(self, X):\n",
 725 |     "        c1, c2, c3 = X[:, 0], X[:, 1], X[:, 2]\n",
 726 |     "        \n",
 727 |     "        in1 = F.relu(self.lin_in(self.emb(c1)))       \n",
 728 |     "        in2 = F.relu(self.lin_in(self.emb(c2)))\n",
 729 |     "        in3 = F.relu(self.lin_in(self.emb(c3)))\n",
 730 |     "\n",
 731 |     "        h = F.tanh(self.lin_hidden(in1))\n",
 732 |     "        h = F.tanh(self.lin_hidden(h + in2))\n",
 733 |     "        h = F.tanh(self.lin_hidden(h + in3))\n",
 734 |     "        \n",
 735 |     "        return F.log_softmax(self.lin_out(h), dim=-1)"
 736 |    ]
 737 |   },
 738 |   {
 739 |    "cell_type": "code",
 740 |    "execution_count": 27,
 741 |    "metadata": {
 742 |     "execution": {
 743 |      "iopub.execute_input": "2020-12-06T08:47:44.620975Z",
 744 |      "iopub.status.busy": "2020-12-06T08:47:44.620431Z",
 745 |      "iopub.status.idle": "2020-12-06T08:48:12.736384Z",
 746 |      "shell.execute_reply": "2020-12-06T08:48:12.736856Z"
 747 |     }
 748 |    },
 749 |    "outputs": [
 750 |     {
 751 |      "name": "stdout",
 752 |      "output_type": "stream",
 753 |      "text": [
 754 |       "Epoch 0:\n",
 755 |       "  Train Loss: 2.2230\n",
 756 |       "  Train Acc:  0.37\n",
 757 |       "Epoch 1:\n",
 758 |       "  Train Loss: 1.9139\n",
 759 |       "  Train Acc:  0.44\n",
 760 |       "\n",
 761 |       "the --> ' '\n",
 762 |       "wom --> 'e'\n",
 763 |       "man --> ' '\n",
 764 |       "hum --> 'a'\n"
 765 |      ]
 766 |     }
 767 |    ],
 768 |    "source": [
 769 |     "model = CharModel(n_alphabet, n_embedding=n_embedding, n_hidden=128).to(device)\n",
 770 |     "fit(model)\n",
 771 |     "\n",
 772 |     "print()\n",
 773 |     "predict(\"the\")\n",
 774 |     "predict(\"wom\")\n",
 775 |     "predict(\"man\")\n",
 776 |     "predict(\"hum\")"
 777 |    ]
 778 |   },
 779 |   {
 780 |    "cell_type": "code",
 781 |    "execution_count": 28,
 782 |    "metadata": {
 783 |     "execution": {
 784 |      "iopub.execute_input": "2020-12-06T08:48:12.743238Z",
 785 |      "iopub.status.busy": "2020-12-06T08:48:12.742679Z",
 786 |      "iopub.status.idle": "2020-12-06T08:48:12.744508Z",
 787 |      "shell.execute_reply": "2020-12-06T08:48:12.744957Z"
 788 |     }
 789 |    },
 790 |    "outputs": [],
 791 |    "source": [
 792 |     "class CharModel(nn.Module):\n",
 793 |     "    def __init__(self, n_vocab, n_embedding, n_hidden):\n",
 794 |     "        super().__init__()\n",
 795 |     "        self.emb = nn.Embedding(n_vocab, n_embedding)\n",
 796 |     "        self.lin_in = nn.Linear(n_embedding, n_hidden)\n",
 797 |     "        self.lin_hidden = nn.Linear(n_hidden, n_hidden)\n",
 798 |     "        self.lin_out = nn.Linear(n_hidden, n_vocab)\n",
 799 |     "        \n",
 800 |     "        self.n_hidden = n_hidden\n",
 801 |     "        \n",
 802 |     "    def forward(self, X):\n",
 803 |     "        c1, c2, c3 = X[:, 0], X[:, 1], X[:, 2]\n",
 804 |     "        \n",
 805 |     "        in1 = F.relu(self.lin_in(self.emb(c1)))       \n",
 806 |     "        in2 = F.relu(self.lin_in(self.emb(c2)))\n",
 807 |     "        in3 = F.relu(self.lin_in(self.emb(c3)))\n",
 808 |     "        \n",
 809 |     "        h = torch.zeros(X.shape[0], n_hidden, requires_grad=True).to(device)\n",
 810 |     "        h = F.tanh(self.lin_hidden(h + in1))\n",
 811 |     "        h = F.tanh(self.lin_hidden(h + in2))\n",
 812 |     "        h = F.tanh(self.lin_hidden(h + in3))\n",
 813 |     "        \n",
 814 |     "        return F.log_softmax(self.lin_out(h), dim=-1)"
 815 |    ]
 816 |   },
 817 |   {
 818 |    "cell_type": "code",
 819 |    "execution_count": 29,
 820 |    "metadata": {
 821 |     "execution": {
 822 |      "iopub.execute_input": "2020-12-06T08:48:12.748353Z",
 823 |      "iopub.status.busy": "2020-12-06T08:48:12.747800Z",
 824 |      "iopub.status.idle": "2020-12-06T08:48:50.701859Z",
 825 |      "shell.execute_reply": "2020-12-06T08:48:50.701338Z"
 826 |     }
 827 |    },
 828 |    "outputs": [
 829 |     {
 830 |      "name": "stdout",
 831 |      "output_type": "stream",
 832 |      "text": [
 833 |       "Epoch 0:\n",
 834 |       "  Train Loss: 2.0938\n",
 835 |       "  Train Acc:  0.40\n",
 836 |       "Epoch 1:\n",
 837 |       "  Train Loss: 1.8023\n",
 838 |       "  Train Acc:  0.47\n",
 839 |       "\n",
 840 |       "the --> ' '\n",
 841 |       "wom --> 'e'\n",
 842 |       "man --> ' '\n",
 843 |       "hum --> 'a'\n"
 844 |      ]
 845 |     }
 846 |    ],
 847 |    "source": [
 848 |     "model = CharModel(n_alphabet, n_embedding=n_embedding, n_hidden=n_hidden).to(device)\n",
 849 |     "fit(model)\n",
 850 |     "\n",
 851 |     "print()\n",
 852 |     "predict(\"the\")\n",
 853 |     "predict(\"wom\")\n",
 854 |     "predict(\"man\")\n",
 855 |     "predict(\"hum\")"
 856 |    ]
 857 |   },
 858 |   {
 859 |    "cell_type": "code",
 860 |    "execution_count": 30,
 861 |    "metadata": {
 862 |     "execution": {
 863 |      "iopub.execute_input": "2020-12-06T08:48:50.707961Z",
 864 |      "iopub.status.busy": "2020-12-06T08:48:50.707177Z",
 865 |      "iopub.status.idle": "2020-12-06T08:48:50.709589Z",
 866 |      "shell.execute_reply": "2020-12-06T08:48:50.709092Z"
 867 |     }
 868 |    },
 869 |    "outputs": [],
 870 |    "source": [
 871 |     "class CharModel(nn.Module):\n",
 872 |     "    def __init__(self, n_vocab, n_embedding, n_hidden):\n",
 873 |     "        super().__init__()\n",
 874 |     "        self.emb = nn.Embedding(n_vocab, n_embedding)\n",
 875 |     "        self.lin_in = nn.Linear(n_embedding, n_hidden)\n",
 876 |     "        self.lin_hidden = nn.Linear(n_hidden, n_hidden)\n",
 877 |     "        self.lin_out = nn.Linear(n_hidden, n_vocab)\n",
 878 |     "        \n",
 879 |     "        self.n_hidden = n_hidden\n",
 880 |     "        \n",
 881 |     "    def forward(self, X):\n",
 882 |     "        h = torch.zeros(X.shape[0], n_hidden, requires_grad=True).to(device)\n",
 883 |     "        for i in range(X.shape[1]):\n",
 884 |     "            c = X[:, i]\n",
 885 |     "            in_ = F.relu(self.lin_in(self.emb(c)))\n",
 886 |     "            h = F.tanh(self.lin_hidden(h + in_))\n",
 887 |     "\n",
 888 |     "        return F.log_softmax(self.lin_out(h), dim=-1)"
 889 |    ]
 890 |   },
 891 |   {
 892 |    "cell_type": "code",
 893 |    "execution_count": 31,
 894 |    "metadata": {
 895 |     "execution": {
 896 |      "iopub.execute_input": "2020-12-06T08:48:50.713113Z",
 897 |      "iopub.status.busy": "2020-12-06T08:48:50.712537Z",
 898 |      "iopub.status.idle": "2020-12-06T08:49:28.649837Z",
 899 |      "shell.execute_reply": "2020-12-06T08:49:28.650337Z"
 900 |     }
 901 |    },
 902 |    "outputs": [
 903 |     {
 904 |      "name": "stdout",
 905 |      "output_type": "stream",
 906 |      "text": [
 907 |       "Epoch 0:\n",
 908 |       "  Train Loss: 2.0920\n",
 909 |       "  Train Acc:  0.40\n",
 910 |       "Epoch 1:\n",
 911 |       "  Train Loss: 1.7984\n",
 912 |       "  Train Acc:  0.47\n",
 913 |       "\n",
 914 |       "the --> ' '\n",
 915 |       "wom --> 'a'\n",
 916 |       "man --> ' '\n",
 917 |       "hum --> 'a'\n"
 918 |      ]
 919 |     }
 920 |    ],
 921 |    "source": [
 922 |     "model = CharModel(n_alphabet, n_embedding=n_embedding, n_hidden=n_hidden).to(device)\n",
 923 |     "fit(model)\n",
 924 |     "\n",
 925 |     "print()\n",
 926 |     "predict(\"the\")\n",
 927 |     "predict(\"wom\")\n",
 928 |     "predict(\"man\")\n",
 929 |     "predict(\"hum\")"
 930 |    ]
 931 |   },
 932 |   {
 933 |    "cell_type": "code",
 934 |    "execution_count": 32,
 935 |    "metadata": {
 936 |     "execution": {
 937 |      "iopub.execute_input": "2020-12-06T08:49:28.653466Z",
 938 |      "iopub.status.busy": "2020-12-06T08:49:28.652870Z",
 939 |      "iopub.status.idle": "2020-12-06T08:49:28.656600Z",
 940 |      "shell.execute_reply": "2020-12-06T08:49:28.657108Z"
 941 |     }
 942 |    },
 943 |    "outputs": [
 944 |     {
 945 |      "name": "stdout",
 946 |      "output_type": "stream",
 947 |      "text": [
 948 |       "the huma --> 'n'\n"
 949 |      ]
 950 |     }
 951 |    ],
 952 |    "source": [
 953 |     "predict(\"the huma\")"
 954 |    ]
 955 |   },
 956 |   {
 957 |    "cell_type": "code",
 958 |    "execution_count": 33,
 959 |    "metadata": {
 960 |     "execution": {
 961 |      "iopub.execute_input": "2020-12-06T08:49:28.660105Z",
 962 |      "iopub.status.busy": "2020-12-06T08:49:28.659516Z",
 963 |      "iopub.status.idle": "2020-12-06T08:49:28.664202Z",
 964 |      "shell.execute_reply": "2020-12-06T08:49:28.663749Z"
 965 |     }
 966 |    },
 967 |    "outputs": [
 968 |     {
 969 |      "name": "stdout",
 970 |      "output_type": "stream",
 971 |      "text": [
 972 |       "those  --> 'o'\n"
 973 |      ]
 974 |     }
 975 |    ],
 976 |    "source": [
 977 |     "predict(\"those \")"
 978 |    ]
 979 |   },
 980 |   {
 981 |    "cell_type": "code",
 982 |    "execution_count": 34,
 983 |    "metadata": {
 984 |     "execution": {
 985 |      "iopub.execute_input": "2020-12-06T08:49:28.667736Z",
 986 |      "iopub.status.busy": "2020-12-06T08:49:28.666915Z",
 987 |      "iopub.status.idle": "2020-12-06T08:49:28.671387Z",
 988 |      "shell.execute_reply": "2020-12-06T08:49:28.670940Z"
 989 |     }
 990 |    },
 991 |    "outputs": [
 992 |     {
 993 |      "name": "stdout",
 994 |      "output_type": "stream",
 995 |      "text": [
 996 |       "those o --> 'f'\n"
 997 |      ]
 998 |     }
 999 |    ],
1000 |    "source": [
1001 |     "predict(\"those o\")"
1002 |    ]
1003 |   },
1004 |   {
1005 |    "cell_type": "code",
1006 |    "execution_count": 35,
1007 |    "metadata": {
1008 |     "execution": {
1009 |      "iopub.execute_input": "2020-12-06T08:49:28.674900Z",
1010 |      "iopub.status.busy": "2020-12-06T08:49:28.674088Z",
1011 |      "iopub.status.idle": "2020-12-06T08:49:28.678752Z",
1012 |      "shell.execute_reply": "2020-12-06T08:49:28.678304Z"
1013 |     }
1014 |    },
1015 |    "outputs": [
1016 |     {
1017 |      "name": "stdout",
1018 |      "output_type": "stream",
1019 |      "text": [
1020 |       "those of  --> 's'\n"
1021 |      ]
1022 |     }
1023 |    ],
1024 |    "source": [
1025 |     "predict(\"those of \")"
1026 |    ]
1027 |   },
1028 |   {
1029 |    "cell_type": "code",
1030 |    "execution_count": 36,
1031 |    "metadata": {
1032 |     "execution": {
1033 |      "iopub.execute_input": "2020-12-06T08:49:28.682279Z",
1034 |      "iopub.status.busy": "2020-12-06T08:49:28.681453Z",
1035 |      "iopub.status.idle": "2020-12-06T08:49:28.685718Z",
1036 |      "shell.execute_reply": "2020-12-06T08:49:28.686154Z"
1037 |     }
1038 |    },
1039 |    "outputs": [
1040 |     {
1041 |      "name": "stdout",
1042 |      "output_type": "stream",
1043 |      "text": [
1044 |       "those of u --> 'p'\n"
1045 |      ]
1046 |     }
1047 |    ],
1048 |    "source": [
1049 |     "predict(\"those of u\")"
1050 |    ]
1051 |   },
1052 |   {
1053 |    "cell_type": "markdown",
1054 |    "metadata": {},
1055 |    "source": [
1056 |     "You can use `nn.Sequential` to make it a bit more readable."
1057 |    ]
1058 |   },
1059 |   {
1060 |    "cell_type": "code",
1061 |    "execution_count": 37,
1062 |    "metadata": {
1063 |     "execution": {
1064 |      "iopub.execute_input": "2020-12-06T08:49:28.692174Z",
1065 |      "iopub.status.busy": "2020-12-06T08:49:28.691515Z",
1066 |      "iopub.status.idle": "2020-12-06T08:49:28.693952Z",
1067 |      "shell.execute_reply": "2020-12-06T08:49:28.693423Z"
1068 |     }
1069 |    },
1070 |    "outputs": [],
1071 |    "source": [
1072 |     "class CharModel(nn.Module):\n",
1073 |     "    def __init__(self, n_vocab, n_embedding, n_hidden):\n",
1074 |     "        super().__init__()\n",
1075 |     "        self.i2e = nn.Sequential(\n",
1076 |     "            nn.Embedding(n_vocab, n_embedding),\n",
1077 |     "            nn.Linear(n_embedding, n_hidden),\n",
1078 |     "            nn.ReLU(),\n",
1079 |     "        )\n",
1080 |     "        self.h2h = nn.Sequential(\n",
1081 |     "            nn.Linear(n_hidden, n_hidden),\n",
1082 |     "            nn.Tanh(),\n",
1083 |     "        )\n",
1084 |     "        self.h2out = nn.Linear(n_hidden, n_vocab)\n",
1085 |     "        \n",
1086 |     "        self.n_hidden = n_hidden\n",
1087 |     "        \n",
1088 |     "    def forward(self, X):\n",
1089 |     "        h = torch.zeros(X.shape[0], n_hidden, requires_grad=True).to(device)\n",
1090 |     "        for i in range(X.shape[1]):\n",
1091 |     "            c = X[:, i]\n",
1092 |     "            h = self.h2h(h + self.i2e(c))\n",
1093 |     "\n",
1094 |     "        return F.log_softmax(self.h2out(h), dim=-1)"
1095 |    ]
1096 |   },
1097 |   {
1098 |    "cell_type": "code",
1099 |    "execution_count": 38,
1100 |    "metadata": {
1101 |     "execution": {
1102 |      "iopub.execute_input": "2020-12-06T08:49:28.697759Z",
1103 |      "iopub.status.busy": "2020-12-06T08:49:28.697090Z",
1104 |      "iopub.status.idle": "2020-12-06T08:50:06.780817Z",
1105 |      "shell.execute_reply": "2020-12-06T08:50:06.781311Z"
1106 |     }
1107 |    },
1108 |    "outputs": [
1109 |     {
1110 |      "name": "stdout",
1111 |      "output_type": "stream",
1112 |      "text": [
1113 |       "Epoch 0:\n",
1114 |       "  Train Loss: 2.0896\n",
1115 |       "  Train Acc:  0.40\n",
1116 |       "Epoch 1:\n",
1117 |       "  Train Loss: 1.7967\n",
1118 |       "  Train Acc:  0.47\n",
1119 |       "\n",
1120 |       "the --> ' '\n",
1121 |       "wom --> 'a'\n",
1122 |       "man --> ' '\n",
1123 |       "hum --> 'a'\n"
1124 |      ]
1125 |     }
1126 |    ],
1127 |    "source": [
1128 |     "model = CharModel(n_alphabet, n_embedding=n_embedding, n_hidden=n_hidden).to(device)\n",
1129 |     "fit(model)\n",
1130 |     "\n",
1131 |     "print()\n",
1132 |     "predict(\"the\")\n",
1133 |     "predict(\"wom\")\n",
1134 |     "predict(\"man\")\n",
1135 |     "predict(\"hum\")"
1136 |    ]
1137 |   }
1138 |  ],
1139 |  "metadata": {
1140 |   "kernelspec": {
1141 |    "display_name": "Python 3",
1142 |    "language": "python",
1143 |    "name": "python3"
1144 |   },
1145 |   "language_info": {
1146 |    "codemirror_mode": {
1147 |     "name": "ipython",
1148 |     "version": 3
1149 |    },
1150 |    "file_extension": ".py",
1151 |    "mimetype": "text/x-python",
1152 |    "name": "python",
1153 |    "nbconvert_exporter": "python",
1154 |    "pygments_lexer": "ipython3",
1155 |    "version": "3.8.5"
1156 |   }
1157 |  },
1158 |  "nbformat": 4,
1159 |  "nbformat_minor": 2
1160 | }
1161 | 


--------------------------------------------------------------------------------
/notebooks/rnn_from_scratch.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     text_representation:
  5 | #       extension: .py
  6 | #       format_name: percent
  7 | #       format_version: '1.3'
  8 | #       jupytext_version: 1.7.1
  9 | #   kernelspec:
 10 | #     display_name: Python 3
 11 | #     language: python
 12 | #     name: python3
 13 | # ---
 14 | 
 15 | # %% [markdown]
 16 | # # RNN from scratch with PyTorch
 17 | # A RNN ist just a normal NN.
 18 | # It's very easy to implement in PyTorch due to its dynamic nature.
 19 | #
 20 | # We'll build a very simple character based language model.
 21 | #
 22 | # Taken from http://www.fast.ai/
 23 | 
 24 | # %% [markdown]
 25 | # ## Init and helpers
 26 | 
 27 | # %%
 28 | from pathlib import Path
 29 | import numpy as np
 30 | 
 31 | # %% [markdown]
 32 | # ## Data
 33 | 
 34 | # %%
 35 | NIETSCHE_PATH = Path("../data/raw/nietzsche.txt")
 36 | if NIETSCHE_PATH.is_file():
 37 |     print("I already have the data.")
 38 | else:
 39 |     # !wget -o ../data/raw/nietzsche.txt https://s3.amazonaws.com/text-datasets/nietzsche.txt
 40 |         
 41 | with NIETSCHE_PATH.open() as f:
 42 |     data = f.read()
 43 | 
 44 | # %% [markdown]
 45 | # A tweet of Nietzsche:
 46 | 
 47 | # %%
 48 | print(data[:140])
 49 | 
 50 | # %% [markdown]
 51 | # We need to know the alphabet and we add a padding value "\0" to the alphabet.
 52 | 
 53 | # %%
 54 | alphabet = ["\0", *sorted(list(set(data)))]
 55 | n_alphabet = len(alphabet)
 56 | n_alphabet
 57 | 
 58 | # %%
 59 | char2index = {c: i for i, c in enumerate(alphabet)}
 60 | index2char = {i: c for i, c in enumerate(alphabet)}
 61 | 
 62 | # %% [markdown]
 63 | # Convert the data into a list of integers
 64 | 
 65 | # %%
 66 | index = [char2index[c] for c in data]
 67 | 
 68 | # %%
 69 | print(index[:25])
 70 | print("".join(index2char[i] for i in index[:25]))
 71 | 
 72 | # %%
 73 | index[0: 3]
 74 | 
 75 | # %%
 76 | X, y = [], []
 77 | for i in range(len(index) - 4):
 78 |     X.append(index[i : i + 3])
 79 |     y.append(index[i + 3])
 80 |     
 81 | X = np.stack(X)
 82 | y = np.stack(y)
 83 | 
 84 | # %%
 85 | X.shape, y.shape
 86 | 
 87 | # %%
 88 | X[0], y[0]
 89 | 
 90 | # %%
 91 | type(y)
 92 | 
 93 | # %%
 94 | import torch
 95 | from torch.utils.data import DataLoader, Dataset, TensorDataset
 96 | 
 97 | 
 98 | train_ds = TensorDataset(torch.from_numpy(X), torch.from_numpy(y))
 99 | train_dl = DataLoader(train_ds, batch_size=500)
100 | 
101 | # %% [markdown]
102 | # # The model
103 | 
104 | # %%
105 | import torch
106 | import torch.nn as nn
107 | import torch.nn.functional as F
108 | import torch.optim as optim
109 | 
110 | # %%
111 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
112 | device
113 | 
114 | 
115 | # %%
116 | class CharModel(nn.Module):
117 |     def __init__(self, n_vocab, n_embedding, n_hidden):
118 |         super().__init__()
119 |         self.emb = nn.Embedding(n_vocab, n_embedding)
120 |         self.lin_in = nn.Linear(n_embedding, n_hidden)
121 |         
122 |         self.lin_hidden = nn.Linear(n_hidden, n_hidden)
123 |         self.lin_out = nn.Linear(n_hidden, n_vocab)
124 |         
125 |     def forward(self, X):
126 |         c1, c2, c3 = X[:, 0], X[:, 1], X[:, 2]
127 |         
128 |         in1 = F.relu(self.lin_in(self.emb(c1)))
129 |         h = F.tanh(self.lin_hidden(in1))
130 |                    
131 |         in2 = F.relu(self.lin_in(self.emb(c2)))
132 |         h = F.tanh(self.lin_hidden(h + in2))
133 |         
134 |         in3 = F.relu(self.lin_in(self.emb(c3)))
135 |         h = F.tanh(self.lin_hidden(h + in3))
136 |         
137 |         return F.log_softmax(self.lin_out(h), dim=-1)
138 | 
139 | 
140 | # %%
141 | n_embedding = 40
142 | n_hidden = 256
143 | 
144 | model = CharModel(n_alphabet, n_embedding=40, n_hidden=128)
145 | model = model.to(device)
146 | 
147 | # %%
148 | optimizer = optim.Adam(model.parameters(), 0.001)
149 | #criterion = nn.CrossEntropyLoss()
150 | criterion = F.nll_loss
151 | 
152 | 
153 | # %%
154 | def fit(model, n_epoch=2):
155 |     optimizer = optim.Adam(model.parameters(), 0.001)
156 |     
157 |     for epoch in range(n_epoch):
158 |         print(f"Epoch {epoch}:")
159 |         running_loss, correct = 0.0, 0
160 | 
161 |         model.train()
162 |         for X, y in train_dl:
163 |             X, y = X.to(device), y.to(device)
164 |             optimizer.zero_grad()
165 | 
166 |             y_ = model(X)
167 |             loss = criterion(y_, y)
168 | 
169 |             loss.backward()
170 |             optimizer.step()
171 | 
172 |             _, y_label_ = torch.max(y_, 1)
173 |             correct += (y_label_ == y).sum().item()
174 |             running_loss += loss.item() * X.shape[0]
175 | 
176 |         print(f"  Train Loss: {running_loss / len(train_dl.dataset):0.4f}")
177 |         print(f"  Train Acc:  {correct / len(train_dl.dataset):0.2f}")
178 | 
179 | 
180 | # %%
181 | fit(model, 2)
182 | 
183 | 
184 | # %%
185 | def predict(word):
186 |     word_idx = [char2index[c] for c in word]
187 |     word_idx
188 |     with torch.no_grad():
189 |         X = torch.tensor(word_idx).unsqueeze(0).to(device)
190 |         model.eval()
191 |         y_ = model(X).cpu()
192 |     pred = index2char[torch.argmax(y_).item()]
193 |     print(f"{word} --> '{pred}'")
194 | 
195 | 
196 | # %%
197 | predict("the")
198 | 
199 | # %%
200 | predict("wom")
201 | 
202 | # %%
203 | predict("man")
204 | 
205 | # %%
206 | predict("hum")
207 | 
208 | 
209 | # %%
210 | class CharModel(nn.Module):
211 |     def __init__(self, n_vocab, n_embedding, n_hidden):
212 |         super().__init__()
213 |         self.emb = nn.Embedding(n_vocab, n_embedding)
214 |         self.lin_in = nn.Linear(n_embedding, n_hidden)
215 |         self.lin_hidden = nn.Linear(n_hidden, n_hidden)
216 |         self.lin_out = nn.Linear(n_hidden, n_vocab)
217 |         
218 |     def forward(self, X):
219 |         c1, c2, c3 = X[:, 0], X[:, 1], X[:, 2]
220 |         
221 |         in1 = F.relu(self.lin_in(self.emb(c1)))       
222 |         in2 = F.relu(self.lin_in(self.emb(c2)))
223 |         in3 = F.relu(self.lin_in(self.emb(c3)))
224 | 
225 |         h = F.tanh(self.lin_hidden(in1))
226 |         h = F.tanh(self.lin_hidden(h + in2))
227 |         h = F.tanh(self.lin_hidden(h + in3))
228 |         
229 |         return F.log_softmax(self.lin_out(h), dim=-1)
230 | 
231 | 
232 | # %%
233 | model = CharModel(n_alphabet, n_embedding=n_embedding, n_hidden=128).to(device)
234 | fit(model)
235 | 
236 | print()
237 | predict("the")
238 | predict("wom")
239 | predict("man")
240 | predict("hum")
241 | 
242 | 
243 | # %%
244 | class CharModel(nn.Module):
245 |     def __init__(self, n_vocab, n_embedding, n_hidden):
246 |         super().__init__()
247 |         self.emb = nn.Embedding(n_vocab, n_embedding)
248 |         self.lin_in = nn.Linear(n_embedding, n_hidden)
249 |         self.lin_hidden = nn.Linear(n_hidden, n_hidden)
250 |         self.lin_out = nn.Linear(n_hidden, n_vocab)
251 |         
252 |         self.n_hidden = n_hidden
253 |         
254 |     def forward(self, X):
255 |         c1, c2, c3 = X[:, 0], X[:, 1], X[:, 2]
256 |         
257 |         in1 = F.relu(self.lin_in(self.emb(c1)))       
258 |         in2 = F.relu(self.lin_in(self.emb(c2)))
259 |         in3 = F.relu(self.lin_in(self.emb(c3)))
260 |         
261 |         h = torch.zeros(X.shape[0], n_hidden, requires_grad=True).to(device)
262 |         h = F.tanh(self.lin_hidden(h + in1))
263 |         h = F.tanh(self.lin_hidden(h + in2))
264 |         h = F.tanh(self.lin_hidden(h + in3))
265 |         
266 |         return F.log_softmax(self.lin_out(h), dim=-1)
267 | 
268 | 
269 | # %%
270 | model = CharModel(n_alphabet, n_embedding=n_embedding, n_hidden=n_hidden).to(device)
271 | fit(model)
272 | 
273 | print()
274 | predict("the")
275 | predict("wom")
276 | predict("man")
277 | predict("hum")
278 | 
279 | 
280 | # %%
281 | class CharModel(nn.Module):
282 |     def __init__(self, n_vocab, n_embedding, n_hidden):
283 |         super().__init__()
284 |         self.emb = nn.Embedding(n_vocab, n_embedding)
285 |         self.lin_in = nn.Linear(n_embedding, n_hidden)
286 |         self.lin_hidden = nn.Linear(n_hidden, n_hidden)
287 |         self.lin_out = nn.Linear(n_hidden, n_vocab)
288 |         
289 |         self.n_hidden = n_hidden
290 |         
291 |     def forward(self, X):
292 |         h = torch.zeros(X.shape[0], n_hidden, requires_grad=True).to(device)
293 |         for i in range(X.shape[1]):
294 |             c = X[:, i]
295 |             in_ = F.relu(self.lin_in(self.emb(c)))
296 |             h = F.tanh(self.lin_hidden(h + in_))
297 | 
298 |         return F.log_softmax(self.lin_out(h), dim=-1)
299 | 
300 | 
301 | # %%
302 | model = CharModel(n_alphabet, n_embedding=n_embedding, n_hidden=n_hidden).to(device)
303 | fit(model)
304 | 
305 | print()
306 | predict("the")
307 | predict("wom")
308 | predict("man")
309 | predict("hum")
310 | 
311 | # %%
312 | predict("the huma")
313 | 
314 | # %%
315 | predict("those ")
316 | 
317 | # %%
318 | predict("those o")
319 | 
320 | # %%
321 | predict("those of ")
322 | 
323 | # %%
324 | predict("those of u")
325 | 
326 | 
327 | # %% [markdown]
328 | # You can use `nn.Sequential` to make it a bit more readable.
329 | 
330 | # %%
331 | class CharModel(nn.Module):
332 |     def __init__(self, n_vocab, n_embedding, n_hidden):
333 |         super().__init__()
334 |         self.i2e = nn.Sequential(
335 |             nn.Embedding(n_vocab, n_embedding),
336 |             nn.Linear(n_embedding, n_hidden),
337 |             nn.ReLU(),
338 |         )
339 |         self.h2h = nn.Sequential(
340 |             nn.Linear(n_hidden, n_hidden),
341 |             nn.Tanh(),
342 |         )
343 |         self.h2out = nn.Linear(n_hidden, n_vocab)
344 |         
345 |         self.n_hidden = n_hidden
346 |         
347 |     def forward(self, X):
348 |         h = torch.zeros(X.shape[0], n_hidden, requires_grad=True).to(device)
349 |         for i in range(X.shape[1]):
350 |             c = X[:, i]
351 |             h = self.h2h(h + self.i2e(c))
352 | 
353 |         return F.log_softmax(self.h2out(h), dim=-1)
354 | 
355 | 
356 | # %%
357 | model = CharModel(n_alphabet, n_embedding=n_embedding, n_hidden=n_hidden).to(device)
358 | fit(model)
359 | 
360 | print()
361 | predict("the")
362 | predict("wom")
363 | predict("man")
364 | predict("hum")
365 | 


--------------------------------------------------------------------------------
/notebooks/storing_and_loading_models.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     text_representation:
  5 | #       extension: .py
  6 | #       format_name: percent
  7 | #       format_version: '1.3'
  8 | #       jupytext_version: 1.7.1
  9 | #   kernelspec:
 10 | #     display_name: Python 3
 11 | #     language: python
 12 | #     name: python3
 13 | # ---
 14 | 
 15 | # %% [markdown]
 16 | # # Storing and Loading Models
 17 | #
 18 | # https://pytorch.org/tutorials/beginner/saving_loading_models.html
 19 | 
 20 | # %% [markdown]
 21 | # ## Init, helpers, utils, ...
 22 | 
 23 | # %%
 24 | # %matplotlib inline
 25 | 
 26 | # %%
 27 | import torch
 28 | import torch.nn as nn
 29 | import torch.nn.functional as F
 30 | import torch.optim as optim
 31 | import torchvision
 32 | 
 33 | DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 34 | 
 35 | # %%
 36 | from pprint import pprint
 37 | import matplotlib.pyplot as plt
 38 | import numpy as np
 39 | from IPython.core.debugger import set_trace
 40 | 
 41 | 
 42 | # %% [markdown]
 43 | # # `state_dict()`
 44 | 
 45 | # %% [markdown]
 46 | # ## `nn.Module.state_dict()`
 47 | # `nn.Module` contain state dict, that maps each layer to the learnable parameters.
 48 | 
 49 | # %%
 50 | class Net(nn.Module):
 51 |     def __init__(self):
 52 |         super().__init__()
 53 |         self.conv1 = nn.Conv2d(3, 6, 5)
 54 |         self.pool = nn.MaxPool2d(2, 2)
 55 |         self.conv2 = nn.Conv2d(6, 16, 5)
 56 |         self.fc1 = nn.Linear(16 * 5 * 5, 120)
 57 |         self.fc2 = nn.Linear(120, 84)
 58 |         self.fc3 = nn.Linear(84, 10)
 59 | 
 60 |     def forward(self, x):
 61 |         x = self.pool(F.relu(self.conv1(x)))
 62 |         x = self.pool(F.relu(self.conv2(x)))
 63 |         x = x.view(-1, 16 * 5 * 5)
 64 |         x = F.relu(self.fc1(x))
 65 |         x = F.relu(self.fc2(x))
 66 |         x = self.fc3(x)
 67 |         return x
 68 | 
 69 | 
 70 | # %%
 71 | model = Net()
 72 | 
 73 | # %%
 74 | model.state_dict()
 75 | 
 76 | 
 77 | # %%
 78 | def state_dict_info(obj):
 79 |     print(f"{'layer':25} shape")
 80 |     print("===================================================")
 81 |     for k,v in obj.state_dict().items():
 82 |         try:
 83 |             print(f"{k:25} {v.shape}")
 84 |         except AttributeError:
 85 |             print(f"{k:25} {v}")
 86 | 
 87 | 
 88 | # %%
 89 | state_dict_info(model)
 90 | 
 91 | # %% [markdown]
 92 | # ## `nn.Optimizer`
 93 | #
 94 | # Optimizers also have a a `state_dict`.
 95 | 
 96 | # %%
 97 | optimizer = optim.Adadelta(model.parameters())
 98 | 
 99 | # %%
100 | state_dict_info(optimizer)
101 | 
102 | # %%
103 | optimizer.state_dict()["state"]
104 | 
105 | # %%
106 | optimizer.state_dict()["param_groups"]
107 | 
108 | # %% [markdown]
109 | # ## Storing and loading `state_dict`
110 | 
111 | # %%
112 | model_file = "model_state_dict.pt"
113 | torch.save(model.state_dict(), model_file)
114 | 
115 | # %%
116 | model = Net()
117 | model.load_state_dict(torch.load(model_file))
118 | 
119 | # %% [markdown]
120 | # ## Storing and loading the full model
121 | 
122 | # %%
123 | model_file = "model_123.pt"
124 | torch.save(model, model_file)
125 | 
126 | # %%
127 | # Only works if code for `Net` is available right now
128 | model = torch.load(model_file)
129 | 
130 | # %% [markdown]
131 | # # Example Checkpointing
132 | # You can store model, optimizer and arbitrary information and reload it.
133 | #
134 | # Example:
135 | # ```python
136 | # torch.save(
137 | #     {
138 | #         'model_state_dict': model.state_dict(),
139 | #         'optimizer_state_dict': optimizer.state_dict(),
140 | #         'epoch': epoch,
141 | #         'loss': loss,
142 | #     },
143 | #     PATH,
144 | # )
145 | # ```
146 | 
147 | # %% [markdown]
148 | # # Exercise
149 | # - Find out what is going to be in the `state` variable of the `state_dict` of an optimizer.
150 | # - Write your own checkpoint functionality.
151 | 


--------------------------------------------------------------------------------
/notebooks/the_end.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# The End\n",
 8 |     "\n",
 9 |     "![https://twitter.com/karpathy/status/1013244313327681536](img/common_mistakes.png)\n",
10 |     "https://twitter.com/karpathy/status/1013244313327681536"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "markdown",
15 |    "metadata": {},
16 |    "source": [
17 |     "**Goals:**\n",
18 |     "- understand PyTorch concepts\n",
19 |     "- be able to use transfer learning in PyTorch\n",
20 |     "- be aware of some handy tools/libs"
21 |    ]
22 |   },
23 |   {
24 |    "cell_type": "markdown",
25 |    "metadata": {},
26 |    "source": [
27 |     "# Interesting reads\n",
28 |     "\n",
29 |     "- [The road to 1.0: production ready PyTorch](https://pytorch.org/2018/05/02/road-to-1.0.html)\n",
30 |     "  - `torch.jit`\n",
31 |     "  - optimize for mobile\n",
32 |     "  - quantized inference (such as 8-bit inference)\n",
33 |     "  - [caffe2 already merged into pytorch repo](https://github.com/pytorch/pytorch/tree/master/caffe2)\n",
34 |     "  - ONNX - Open Neural Network Exchange\n",
35 |     "\n",
36 |     "- [PyTorch under the hood](https://speakerdeck.com/perone/pytorch-under-the-hood)\n",
37 |     "- PyTorch 1.0 videos"
38 |    ]
39 |   }
40 |  ],
41 |  "metadata": {
42 |   "kernelspec": {
43 |    "display_name": "Python 3",
44 |    "language": "python",
45 |    "name": "python3"
46 |   },
47 |   "language_info": {
48 |    "codemirror_mode": {
49 |     "name": "ipython",
50 |     "version": 3
51 |    },
52 |    "file_extension": ".py",
53 |    "mimetype": "text/x-python",
54 |    "name": "python",
55 |    "nbconvert_exporter": "python",
56 |    "pygments_lexer": "ipython3",
57 |    "version": "3.8.5"
58 |   }
59 |  },
60 |  "nbformat": 4,
61 |  "nbformat_minor": 2
62 | }
63 | 


--------------------------------------------------------------------------------
/notebooks/the_end.py:
--------------------------------------------------------------------------------
 1 | # ---
 2 | # jupyter:
 3 | #   jupytext:
 4 | #     text_representation:
 5 | #       extension: .py
 6 | #       format_name: percent
 7 | #       format_version: '1.3'
 8 | #       jupytext_version: 1.7.1
 9 | #   kernelspec:
10 | #     display_name: Python 3
11 | #     language: python
12 | #     name: python3
13 | # ---
14 | 
15 | # %% [markdown]
16 | # # The End
17 | #
18 | # ![https://twitter.com/karpathy/status/1013244313327681536](img/common_mistakes.png)
19 | # https://twitter.com/karpathy/status/1013244313327681536
20 | 
21 | # %% [markdown]
22 | # **Goals:**
23 | # - understand PyTorch concepts
24 | # - be able to use transfer learning in PyTorch
25 | # - be aware of some handy tools/libs
26 | 
27 | # %% [markdown]
28 | # # Interesting reads
29 | #
30 | # - [The road to 1.0: production ready PyTorch](https://pytorch.org/2018/05/02/road-to-1.0.html)
31 | #   - `torch.jit`
32 | #   - optimize for mobile
33 | #   - quantized inference (such as 8-bit inference)
34 | #   - [caffe2 already merged into pytorch repo](https://github.com/pytorch/pytorch/tree/master/caffe2)
35 | #   - ONNX - Open Neural Network Exchange
36 | #
37 | # - [PyTorch under the hood](https://speakerdeck.com/perone/pytorch-under-the-hood)
38 | # - PyTorch 1.0 videos
39 | 


--------------------------------------------------------------------------------
/notebooks/torch_jit.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# `torch.jit`\n",
  8 |     "\n",
  9 |     "Eager execution is great for development and debugging. but it can be hard to (automatically) optimize the code and deploy it.\n",
 10 |     "\n",
 11 |     "Now there is`torch.jit` with two flavours:\n",
 12 |     "\n",
 13 |     "- `torch.jit.trace` does not record control flow.\n",
 14 |     "- `torch.jit.script` records control flow and creates an intermediate representation that can be optimized; only supports a subset of Python.\n",
 15 |     "\n",
 16 |     "Note: don't forget `model.eval()` and `model.train()`.\n",
 17 |     "\n",
 18 |     "\n",
 19 |     "## Ref and More:\n",
 20 |     "- https://pytorch.org/docs/stable/jit.html\n",
 21 |     "- https://speakerdeck.com/perone/pytorch-under-the-hood\n",
 22 |     "- https://lernapparat.de/fast-lstm-pytorch/"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Init, helpers, utils, ..."
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 1,
 35 |    "metadata": {
 36 |     "execution": {
 37 |      "iopub.execute_input": "2020-11-27T14:31:24.971789Z",
 38 |      "iopub.status.busy": "2020-11-27T14:31:24.969854Z",
 39 |      "iopub.status.idle": "2020-11-27T14:31:25.203359Z",
 40 |      "shell.execute_reply": "2020-11-27T14:31:25.202626Z"
 41 |     }
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "%load_ext autoreload\n",
 46 |     "%autoreload 2\n",
 47 |     "\n",
 48 |     "%matplotlib inline"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 2,
 54 |    "metadata": {
 55 |     "execution": {
 56 |      "iopub.execute_input": "2020-11-27T14:31:25.206235Z",
 57 |      "iopub.status.busy": "2020-11-27T14:31:25.205811Z",
 58 |      "iopub.status.idle": "2020-11-27T14:31:25.478268Z",
 59 |      "shell.execute_reply": "2020-11-27T14:31:25.477948Z"
 60 |     }
 61 |    },
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "import torch\n",
 65 |     "import torch.nn as nn\n",
 66 |     "import torch.nn.functional as F\n",
 67 |     "import torch.optim as optim\n",
 68 |     "import torchvision"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 3,
 74 |    "metadata": {
 75 |     "execution": {
 76 |      "iopub.execute_input": "2020-11-27T14:31:25.481570Z",
 77 |      "iopub.status.busy": "2020-11-27T14:31:25.481113Z",
 78 |      "iopub.status.idle": "2020-11-27T14:31:25.494485Z",
 79 |      "shell.execute_reply": "2020-11-27T14:31:25.494069Z"
 80 |     }
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "from pprint import pprint\n",
 85 |     "\n",
 86 |     "import matplotlib.pyplot as plt\n",
 87 |     "import numpy as np\n",
 88 |     "from IPython.core.debugger import set_trace\n",
 89 |     "\n",
 90 |     "import utils  # little helpers\n",
 91 |     "from utils import attr"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "# `torch.jit.trace`"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 4,
104 |    "metadata": {
105 |     "execution": {
106 |      "iopub.execute_input": "2020-11-27T14:31:25.497670Z",
107 |      "iopub.status.busy": "2020-11-27T14:31:25.497372Z",
108 |      "iopub.status.idle": "2020-11-27T14:31:25.504775Z",
109 |      "shell.execute_reply": "2020-11-27T14:31:25.504504Z"
110 |     }
111 |    },
112 |    "outputs": [],
113 |    "source": [
114 |     "def f(x):\n",
115 |     "    if x.item() < 0:\n",
116 |     "        return torch.tensor(0)\n",
117 |     "    else:\n",
118 |     "        return x"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 5,
124 |    "metadata": {
125 |     "execution": {
126 |      "iopub.execute_input": "2020-11-27T14:31:25.507274Z",
127 |      "iopub.status.busy": "2020-11-27T14:31:25.506848Z",
128 |      "iopub.status.idle": "2020-11-27T14:31:25.515959Z",
129 |      "shell.execute_reply": "2020-11-27T14:31:25.515596Z"
130 |     }
131 |    },
132 |    "outputs": [
133 |     {
134 |      "data": {
135 |       "text/plain": [
136 |        "tensor(0)"
137 |       ]
138 |      },
139 |      "execution_count": 1,
140 |      "metadata": {},
141 |      "output_type": "execute_result"
142 |     }
143 |    ],
144 |    "source": [
145 |     "f(torch.tensor(-1))"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 6,
151 |    "metadata": {
152 |     "execution": {
153 |      "iopub.execute_input": "2020-11-27T14:31:25.518286Z",
154 |      "iopub.status.busy": "2020-11-27T14:31:25.517863Z",
155 |      "iopub.status.idle": "2020-11-27T14:31:25.525322Z",
156 |      "shell.execute_reply": "2020-11-27T14:31:25.525004Z"
157 |     }
158 |    },
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "tensor(3)"
164 |       ]
165 |      },
166 |      "execution_count": 1,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "f(torch.tensor(3))"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 7,
178 |    "metadata": {
179 |     "execution": {
180 |      "iopub.execute_input": "2020-11-27T14:31:25.527966Z",
181 |      "iopub.status.busy": "2020-11-27T14:31:25.527466Z",
182 |      "iopub.status.idle": "2020-11-27T14:31:25.541534Z",
183 |      "shell.execute_reply": "2020-11-27T14:31:25.540887Z"
184 |     }
185 |    },
186 |    "outputs": [
187 |     {
188 |      "name": "stderr",
189 |      "output_type": "stream",
190 |      "text": [
191 |       "<ipython-input-1-5385317fa43b>:2: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
192 |       "  if x.item() < 0:\n"
193 |      ]
194 |     }
195 |    ],
196 |    "source": [
197 |     "X = torch.tensor(1)\n",
198 |     "traced = torch.jit.trace(f, X)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 8,
204 |    "metadata": {
205 |     "execution": {
206 |      "iopub.execute_input": "2020-11-27T14:31:25.543997Z",
207 |      "iopub.status.busy": "2020-11-27T14:31:25.543681Z",
208 |      "iopub.status.idle": "2020-11-27T14:31:25.551831Z",
209 |      "shell.execute_reply": "2020-11-27T14:31:25.551525Z"
210 |     }
211 |    },
212 |    "outputs": [
213 |     {
214 |      "data": {
215 |       "text/plain": [
216 |        "torch.jit.ScriptFunction"
217 |       ]
218 |      },
219 |      "execution_count": 1,
220 |      "metadata": {},
221 |      "output_type": "execute_result"
222 |     }
223 |    ],
224 |    "source": [
225 |     "type(traced)"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 9,
231 |    "metadata": {
232 |     "execution": {
233 |      "iopub.execute_input": "2020-11-27T14:31:25.554129Z",
234 |      "iopub.status.busy": "2020-11-27T14:31:25.553805Z",
235 |      "iopub.status.idle": "2020-11-27T14:31:25.562407Z",
236 |      "shell.execute_reply": "2020-11-27T14:31:25.562078Z"
237 |     }
238 |    },
239 |    "outputs": [
240 |     {
241 |      "data": {
242 |       "text/plain": [
243 |        "tensor(1)"
244 |       ]
245 |      },
246 |      "execution_count": 1,
247 |      "metadata": {},
248 |      "output_type": "execute_result"
249 |     }
250 |    ],
251 |    "source": [
252 |     "traced(torch.tensor(1))"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 10,
258 |    "metadata": {
259 |     "execution": {
260 |      "iopub.execute_input": "2020-11-27T14:31:25.564943Z",
261 |      "iopub.status.busy": "2020-11-27T14:31:25.564560Z",
262 |      "iopub.status.idle": "2020-11-27T14:31:25.572611Z",
263 |      "shell.execute_reply": "2020-11-27T14:31:25.572278Z"
264 |     }
265 |    },
266 |    "outputs": [
267 |     {
268 |      "data": {
269 |       "text/plain": [
270 |        "graph(%0 : Long(requires_grad=0, device=cpu)):\n",
271 |        "  return (%0)"
272 |       ]
273 |      },
274 |      "execution_count": 1,
275 |      "metadata": {},
276 |      "output_type": "execute_result"
277 |     }
278 |    ],
279 |    "source": [
280 |     "traced.graph"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 11,
286 |    "metadata": {
287 |     "execution": {
288 |      "iopub.execute_input": "2020-11-27T14:31:25.575102Z",
289 |      "iopub.status.busy": "2020-11-27T14:31:25.574627Z",
290 |      "iopub.status.idle": "2020-11-27T14:31:25.582783Z",
291 |      "shell.execute_reply": "2020-11-27T14:31:25.582382Z"
292 |     }
293 |    },
294 |    "outputs": [
295 |     {
296 |      "data": {
297 |       "text/plain": [
298 |        "tensor(-1)"
299 |       ]
300 |      },
301 |      "execution_count": 1,
302 |      "metadata": {},
303 |      "output_type": "execute_result"
304 |     }
305 |    ],
306 |    "source": [
307 |     "traced(torch.tensor(-1))"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "markdown",
312 |    "metadata": {},
313 |    "source": [
314 |     "## Storing and restoring"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": 12,
320 |    "metadata": {
321 |     "execution": {
322 |      "iopub.execute_input": "2020-11-27T14:31:25.585530Z",
323 |      "iopub.status.busy": "2020-11-27T14:31:25.585143Z",
324 |      "iopub.status.idle": "2020-11-27T14:31:25.606206Z",
325 |      "shell.execute_reply": "2020-11-27T14:31:25.605544Z"
326 |     }
327 |    },
328 |    "outputs": [],
329 |    "source": [
330 |     "traced.save(\"traced.pt\")"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 13,
336 |    "metadata": {
337 |     "execution": {
338 |      "iopub.execute_input": "2020-11-27T14:31:25.608723Z",
339 |      "iopub.status.busy": "2020-11-27T14:31:25.608368Z",
340 |      "iopub.status.idle": "2020-11-27T14:31:25.733329Z",
341 |      "shell.execute_reply": "2020-11-27T14:31:25.731998Z"
342 |     }
343 |    },
344 |    "outputs": [
345 |     {
346 |      "name": "stdout",
347 |      "output_type": "stream",
348 |      "text": [
349 |       "scripted.pt: Zip archive data, at least v?[0] to extract\r\n"
350 |      ]
351 |     }
352 |    ],
353 |    "source": [
354 |     "!file scripted.pt"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "code",
359 |    "execution_count": 14,
360 |    "metadata": {
361 |     "execution": {
362 |      "iopub.execute_input": "2020-11-27T14:31:25.737533Z",
363 |      "iopub.status.busy": "2020-11-27T14:31:25.737240Z",
364 |      "iopub.status.idle": "2020-11-27T14:31:25.745978Z",
365 |      "shell.execute_reply": "2020-11-27T14:31:25.745553Z"
366 |     }
367 |    },
368 |    "outputs": [],
369 |    "source": [
370 |     "g = torch.jit.load(\"traced.pt\")"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": 15,
376 |    "metadata": {
377 |     "execution": {
378 |      "iopub.execute_input": "2020-11-27T14:31:25.749203Z",
379 |      "iopub.status.busy": "2020-11-27T14:31:25.748303Z",
380 |      "iopub.status.idle": "2020-11-27T14:31:25.757664Z",
381 |      "shell.execute_reply": "2020-11-27T14:31:25.757985Z"
382 |     }
383 |    },
384 |    "outputs": [
385 |     {
386 |      "data": {
387 |       "text/plain": [
388 |        "tensor(1)"
389 |       ]
390 |      },
391 |      "execution_count": 1,
392 |      "metadata": {},
393 |      "output_type": "execute_result"
394 |     }
395 |    ],
396 |    "source": [
397 |     "g(torch.tensor(1))"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": 16,
403 |    "metadata": {
404 |     "execution": {
405 |      "iopub.execute_input": "2020-11-27T14:31:25.761208Z",
406 |      "iopub.status.busy": "2020-11-27T14:31:25.760655Z",
407 |      "iopub.status.idle": "2020-11-27T14:31:25.769043Z",
408 |      "shell.execute_reply": "2020-11-27T14:31:25.769360Z"
409 |     }
410 |    },
411 |    "outputs": [
412 |     {
413 |      "data": {
414 |       "text/plain": [
415 |        "tensor(-1)"
416 |       ]
417 |      },
418 |      "execution_count": 1,
419 |      "metadata": {},
420 |      "output_type": "execute_result"
421 |     }
422 |    ],
423 |    "source": [
424 |     "g(torch.tensor(-1))"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "markdown",
429 |    "metadata": {},
430 |    "source": [
431 |     "# `torch.jit.script`"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": 17,
437 |    "metadata": {
438 |     "execution": {
439 |      "iopub.execute_input": "2020-11-27T14:31:25.772118Z",
440 |      "iopub.status.busy": "2020-11-27T14:31:25.771699Z",
441 |      "iopub.status.idle": "2020-11-27T14:31:25.780525Z",
442 |      "shell.execute_reply": "2020-11-27T14:31:25.780141Z"
443 |     }
444 |    },
445 |    "outputs": [
446 |     {
447 |      "data": {
448 |       "text/plain": [
449 |        "True"
450 |       ]
451 |      },
452 |      "execution_count": 1,
453 |      "metadata": {},
454 |      "output_type": "execute_result"
455 |     }
456 |    ],
457 |    "source": [
458 |     "bool(torch.tensor(1) < 2)"
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": 18,
464 |    "metadata": {
465 |     "execution": {
466 |      "iopub.execute_input": "2020-11-27T14:31:25.784867Z",
467 |      "iopub.status.busy": "2020-11-27T14:31:25.783207Z",
468 |      "iopub.status.idle": "2020-11-27T14:31:25.792360Z",
469 |      "shell.execute_reply": "2020-11-27T14:31:25.792027Z"
470 |     }
471 |    },
472 |    "outputs": [],
473 |    "source": [
474 |     "@torch.jit.script\n",
475 |     "def f(x):\n",
476 |     "    if bool(x < 0):\n",
477 |     "        result = torch.zeros(1)\n",
478 |     "    else:\n",
479 |     "        result = x\n",
480 |     "    return result"
481 |    ]
482 |   },
483 |   {
484 |    "cell_type": "markdown",
485 |    "metadata": {},
486 |    "source": [
487 |     "This is `torchscript` which is a only a supset of python."
488 |    ]
489 |   },
490 |   {
491 |    "cell_type": "code",
492 |    "execution_count": 19,
493 |    "metadata": {
494 |     "execution": {
495 |      "iopub.execute_input": "2020-11-27T14:31:25.794784Z",
496 |      "iopub.status.busy": "2020-11-27T14:31:25.794427Z",
497 |      "iopub.status.idle": "2020-11-27T14:31:25.803364Z",
498 |      "shell.execute_reply": "2020-11-27T14:31:25.803087Z"
499 |     }
500 |    },
501 |    "outputs": [
502 |     {
503 |      "data": {
504 |       "text/plain": [
505 |        "tensor([0.])"
506 |       ]
507 |      },
508 |      "execution_count": 1,
509 |      "metadata": {},
510 |      "output_type": "execute_result"
511 |     }
512 |    ],
513 |    "source": [
514 |     "f(torch.tensor(-1))"
515 |    ]
516 |   },
517 |   {
518 |    "cell_type": "code",
519 |    "execution_count": 20,
520 |    "metadata": {
521 |     "execution": {
522 |      "iopub.execute_input": "2020-11-27T14:31:25.806024Z",
523 |      "iopub.status.busy": "2020-11-27T14:31:25.805284Z",
524 |      "iopub.status.idle": "2020-11-27T14:31:25.813152Z",
525 |      "shell.execute_reply": "2020-11-27T14:31:25.813381Z"
526 |     }
527 |    },
528 |    "outputs": [
529 |     {
530 |      "data": {
531 |       "text/plain": [
532 |        "tensor(1)"
533 |       ]
534 |      },
535 |      "execution_count": 1,
536 |      "metadata": {},
537 |      "output_type": "execute_result"
538 |     }
539 |    ],
540 |    "source": [
541 |     "f(torch.tensor(1))"
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "code",
546 |    "execution_count": 21,
547 |    "metadata": {
548 |     "execution": {
549 |      "iopub.execute_input": "2020-11-27T14:31:25.815516Z",
550 |      "iopub.status.busy": "2020-11-27T14:31:25.815182Z",
551 |      "iopub.status.idle": "2020-11-27T14:31:25.823160Z",
552 |      "shell.execute_reply": "2020-11-27T14:31:25.822776Z"
553 |     }
554 |    },
555 |    "outputs": [
556 |     {
557 |      "data": {
558 |       "text/plain": [
559 |        "torch.jit.ScriptFunction"
560 |       ]
561 |      },
562 |      "execution_count": 1,
563 |      "metadata": {},
564 |      "output_type": "execute_result"
565 |     }
566 |    ],
567 |    "source": [
568 |     "type(f)"
569 |    ]
570 |   },
571 |   {
572 |    "cell_type": "code",
573 |    "execution_count": 22,
574 |    "metadata": {
575 |     "execution": {
576 |      "iopub.execute_input": "2020-11-27T14:31:25.825962Z",
577 |      "iopub.status.busy": "2020-11-27T14:31:25.825098Z",
578 |      "iopub.status.idle": "2020-11-27T14:31:25.833882Z",
579 |      "shell.execute_reply": "2020-11-27T14:31:25.833453Z"
580 |     }
581 |    },
582 |    "outputs": [
583 |     {
584 |      "data": {
585 |       "text/plain": [
586 |        "graph(%x.1 : Tensor):\n",
587 |        "  %8 : None = prim::Constant()\n",
588 |        "  %2 : int = prim::Constant[value=0]() # <ipython-input-1-5b977b5b82b7>:3:16\n",
589 |        "  %5 : int = prim::Constant[value=1]() # <ipython-input-1-5b977b5b82b7>:4:29\n",
590 |        "  %3 : Tensor = aten::lt(%x.1, %2) # <ipython-input-1-5b977b5b82b7>:3:12\n",
591 |        "  %4 : bool = aten::Bool(%3) # <ipython-input-1-5b977b5b82b7>:3:7\n",
592 |        "  %result : Tensor = prim::If(%4) # <ipython-input-1-5b977b5b82b7>:3:4\n",
593 |        "    block0():\n",
594 |        "      %7 : int[] = prim::ListConstruct(%5)\n",
595 |        "      %result.1 : Tensor = aten::zeros(%7, %8, %8, %8, %8) # <ipython-input-1-5b977b5b82b7>:4:17\n",
596 |        "      -> (%result.1)\n",
597 |        "    block1():\n",
598 |        "      -> (%x.1)\n",
599 |        "  return (%result)"
600 |       ]
601 |      },
602 |      "execution_count": 1,
603 |      "metadata": {},
604 |      "output_type": "execute_result"
605 |     }
606 |    ],
607 |    "source": [
608 |     "f.graph"
609 |    ]
610 |   },
611 |   {
612 |    "cell_type": "markdown",
613 |    "metadata": {},
614 |    "source": [
615 |     "## Storing and restoring"
616 |    ]
617 |   },
618 |   {
619 |    "cell_type": "code",
620 |    "execution_count": 23,
621 |    "metadata": {
622 |     "execution": {
623 |      "iopub.execute_input": "2020-11-27T14:31:25.836435Z",
624 |      "iopub.status.busy": "2020-11-27T14:31:25.836068Z",
625 |      "iopub.status.idle": "2020-11-27T14:31:25.843708Z",
626 |      "shell.execute_reply": "2020-11-27T14:31:25.843266Z"
627 |     }
628 |    },
629 |    "outputs": [],
630 |    "source": [
631 |     "torch.jit.save(f, \"scripted.pt\")"
632 |    ]
633 |   },
634 |   {
635 |    "cell_type": "code",
636 |    "execution_count": 24,
637 |    "metadata": {
638 |     "execution": {
639 |      "iopub.execute_input": "2020-11-27T14:31:25.847133Z",
640 |      "iopub.status.busy": "2020-11-27T14:31:25.846686Z",
641 |      "iopub.status.idle": "2020-11-27T14:31:25.963596Z",
642 |      "shell.execute_reply": "2020-11-27T14:31:25.962539Z"
643 |     }
644 |    },
645 |    "outputs": [
646 |     {
647 |      "name": "stdout",
648 |      "output_type": "stream",
649 |      "text": [
650 |       "scripted.pt: Zip archive data, at least v?[0] to extract\r\n"
651 |      ]
652 |     }
653 |    ],
654 |    "source": [
655 |     "!file scripted.pt"
656 |    ]
657 |   },
658 |   {
659 |    "cell_type": "code",
660 |    "execution_count": 25,
661 |    "metadata": {
662 |     "execution": {
663 |      "iopub.execute_input": "2020-11-27T14:31:25.968061Z",
664 |      "iopub.status.busy": "2020-11-27T14:31:25.967602Z",
665 |      "iopub.status.idle": "2020-11-27T14:31:25.979424Z",
666 |      "shell.execute_reply": "2020-11-27T14:31:25.978497Z"
667 |     }
668 |    },
669 |    "outputs": [],
670 |    "source": [
671 |     "g = torch.jit.load(\"scripted.pt\")"
672 |    ]
673 |   },
674 |   {
675 |    "cell_type": "code",
676 |    "execution_count": 26,
677 |    "metadata": {
678 |     "execution": {
679 |      "iopub.execute_input": "2020-11-27T14:31:25.982405Z",
680 |      "iopub.status.busy": "2020-11-27T14:31:25.982021Z",
681 |      "iopub.status.idle": "2020-11-27T14:31:25.992205Z",
682 |      "shell.execute_reply": "2020-11-27T14:31:25.991823Z"
683 |     }
684 |    },
685 |    "outputs": [
686 |     {
687 |      "data": {
688 |       "text/plain": [
689 |        "tensor([0.])"
690 |       ]
691 |      },
692 |      "execution_count": 1,
693 |      "metadata": {},
694 |      "output_type": "execute_result"
695 |     }
696 |    ],
697 |    "source": [
698 |     "g(torch.tensor(-1))"
699 |    ]
700 |   },
701 |   {
702 |    "cell_type": "code",
703 |    "execution_count": 27,
704 |    "metadata": {
705 |     "execution": {
706 |      "iopub.execute_input": "2020-11-27T14:31:25.995114Z",
707 |      "iopub.status.busy": "2020-11-27T14:31:25.994647Z",
708 |      "iopub.status.idle": "2020-11-27T14:31:26.005153Z",
709 |      "shell.execute_reply": "2020-11-27T14:31:26.004702Z"
710 |     }
711 |    },
712 |    "outputs": [
713 |     {
714 |      "data": {
715 |       "text/plain": [
716 |        "tensor(1)"
717 |       ]
718 |      },
719 |      "execution_count": 1,
720 |      "metadata": {},
721 |      "output_type": "execute_result"
722 |     }
723 |    ],
724 |    "source": [
725 |     "g(torch.tensor(1))"
726 |    ]
727 |   },
728 |   {
729 |    "cell_type": "markdown",
730 |    "metadata": {},
731 |    "source": [
732 |     "## Subclassing `torch.jit.ScriptModule`\n",
733 |     "If you work with `nn.Module` replace it by `torch.jit.ScriptModule` (see [[tutorial]](https://pytorch.org/tutorials/beginner/deploy_seq2seq_hybrid_frontend_tutorial.html) for more).\n",
734 |     "\n",
735 |     "```python\n",
736 |     "class MyModule(torch.jit.ScriptModule):\n",
737 |     "    def __init__(self):\n",
738 |     "        super().__init__()\n",
739 |     "        \n",
740 |     "    def forward(self, x):\n",
741 |     "        # ...\n",
742 |     "        return x\n",
743 |     "```"
744 |    ]
745 |   },
746 |   {
747 |    "cell_type": "markdown",
748 |    "metadata": {},
749 |    "source": [
750 |     "# PyTorch and C++\n",
751 |     "\n",
752 |     "PyTorch offers a very nice(!) C++ interface which is very close to Python."
753 |    ]
754 |   },
755 |   {
756 |    "cell_type": "markdown",
757 |    "metadata": {},
758 |    "source": [
759 |     "## Loading traced models from C++"
760 |    ]
761 |   },
762 |   {
763 |    "cell_type": "markdown",
764 |    "metadata": {},
765 |    "source": [
766 |     "```c++\n",
767 |     "#include <torch/script.h>\n",
768 |     "\n",
769 |     "int main(int(argc, const char* argv[]) {\n",
770 |     "    auto module = torch::jit::load(\"scrpted.pt\");\n",
771 |     "    // data ...\n",
772 |     "    module->forward(data);\n",
773 |     "}\n",
774 |     "```"
775 |    ]
776 |   }
777 |  ],
778 |  "metadata": {
779 |   "kernelspec": {
780 |    "display_name": "Python 3",
781 |    "language": "python",
782 |    "name": "python3"
783 |   },
784 |   "language_info": {
785 |    "codemirror_mode": {
786 |     "name": "ipython",
787 |     "version": 3
788 |    },
789 |    "file_extension": ".py",
790 |    "mimetype": "text/x-python",
791 |    "name": "python",
792 |    "nbconvert_exporter": "python",
793 |    "pygments_lexer": "ipython3",
794 |    "version": "3.8.5"
795 |   }
796 |  },
797 |  "nbformat": 4,
798 |  "nbformat_minor": 2
799 | }
800 | 


--------------------------------------------------------------------------------
/notebooks/torch_jit.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     text_representation:
  5 | #       extension: .py
  6 | #       format_name: percent
  7 | #       format_version: '1.3'
  8 | #       jupytext_version: 1.7.1
  9 | #   kernelspec:
 10 | #     display_name: Python 3
 11 | #     language: python
 12 | #     name: python3
 13 | # ---
 14 | 
 15 | # %% [markdown]
 16 | # # `torch.jit`
 17 | #
 18 | # Eager execution is great for development and debugging. but it can be hard to (automatically) optimize the code and deploy it.
 19 | #
 20 | # Now there is`torch.jit` with two flavours:
 21 | #
 22 | # - `torch.jit.trace` does not record control flow.
 23 | # - `torch.jit.script` records control flow and creates an intermediate representation that can be optimized; only supports a subset of Python.
 24 | #
 25 | # Note: don't forget `model.eval()` and `model.train()`.
 26 | #
 27 | #
 28 | # ## Ref and More:
 29 | # - https://pytorch.org/docs/stable/jit.html
 30 | # - https://speakerdeck.com/perone/pytorch-under-the-hood
 31 | # - https://lernapparat.de/fast-lstm-pytorch/
 32 | 
 33 | # %% [markdown]
 34 | # ## Init, helpers, utils, ...
 35 | 
 36 | # %%
 37 | # %load_ext autoreload
 38 | # %autoreload 2
 39 | 
 40 | # %matplotlib inline
 41 | 
 42 | # %%
 43 | import torch
 44 | import torch.nn as nn
 45 | import torch.nn.functional as F
 46 | import torch.optim as optim
 47 | import torchvision
 48 | 
 49 | # %%
 50 | from pprint import pprint
 51 | 
 52 | import matplotlib.pyplot as plt
 53 | import numpy as np
 54 | from IPython.core.debugger import set_trace
 55 | 
 56 | import utils  # little helpers
 57 | from utils import attr
 58 | 
 59 | 
 60 | # %% [markdown]
 61 | # # `torch.jit.trace`
 62 | 
 63 | # %%
 64 | def f(x):
 65 |     if x.item() < 0:
 66 |         return torch.tensor(0)
 67 |     else:
 68 |         return x
 69 | 
 70 | 
 71 | # %%
 72 | f(torch.tensor(-1))
 73 | 
 74 | # %%
 75 | f(torch.tensor(3))
 76 | 
 77 | # %%
 78 | X = torch.tensor(1)
 79 | traced = torch.jit.trace(f, X)
 80 | 
 81 | # %%
 82 | type(traced)
 83 | 
 84 | # %%
 85 | traced(torch.tensor(1))
 86 | 
 87 | # %%
 88 | traced.graph
 89 | 
 90 | # %%
 91 | traced(torch.tensor(-1))
 92 | 
 93 | # %% [markdown]
 94 | # ## Storing and restoring
 95 | 
 96 | # %%
 97 | traced.save("traced.pt")
 98 | 
 99 | # %%
100 | # !file scripted.pt
101 | 
102 | # %%
103 | g = torch.jit.load("traced.pt")
104 | 
105 | # %%
106 | g(torch.tensor(1))
107 | 
108 | # %%
109 | g(torch.tensor(-1))
110 | 
111 | # %% [markdown]
112 | # # `torch.jit.script`
113 | 
114 | # %%
115 | bool(torch.tensor(1) < 2)
116 | 
117 | 
118 | # %%
119 | @torch.jit.script
120 | def f(x):
121 |     if bool(x < 0):
122 |         result = torch.zeros(1)
123 |     else:
124 |         result = x
125 |     return result
126 | 
127 | 
128 | # %% [markdown]
129 | # This is `torchscript` which is a only a supset of python.
130 | 
131 | # %%
132 | f(torch.tensor(-1))
133 | 
134 | # %%
135 | f(torch.tensor(1))
136 | 
137 | # %%
138 | type(f)
139 | 
140 | # %%
141 | f.graph
142 | 
143 | # %% [markdown]
144 | # ## Storing and restoring
145 | 
146 | # %%
147 | torch.jit.save(f, "scripted.pt")
148 | 
149 | # %%
150 | # !file scripted.pt
151 | 
152 | # %%
153 | g = torch.jit.load("scripted.pt")
154 | 
155 | # %%
156 | g(torch.tensor(-1))
157 | 
158 | # %%
159 | g(torch.tensor(1))
160 | 
161 | # %% [markdown]
162 | # ## Subclassing `torch.jit.ScriptModule`
163 | # If you work with `nn.Module` replace it by `torch.jit.ScriptModule` (see [[tutorial]](https://pytorch.org/tutorials/beginner/deploy_seq2seq_hybrid_frontend_tutorial.html) for more).
164 | #
165 | # ```python
166 | # class MyModule(torch.jit.ScriptModule):
167 | #     def __init__(self):
168 | #         super().__init__()
169 | #         
170 | #     def forward(self, x):
171 | #         # ...
172 | #         return x
173 | # ```
174 | 
175 | # %% [markdown]
176 | # # PyTorch and C++
177 | #
178 | # PyTorch offers a very nice(!) C++ interface which is very close to Python.
179 | 
180 | # %% [markdown]
181 | # ## Loading traced models from C++
182 | 
183 | # %% [markdown]
184 | # ```c++
185 | # #include <torch/script.h>
186 | #
187 | # int main(int(argc, const char* argv[]) {
188 | #     auto module = torch::jit::load("scrpted.pt");
189 | #     // data ...
190 | #     module->forward(data);
191 | # }
192 | # ```
193 | 


--------------------------------------------------------------------------------
/notebooks/transfer_learning.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     text_representation:
  5 | #       extension: .py
  6 | #       format_name: percent
  7 | #       format_version: '1.3'
  8 | #       jupytext_version: 1.7.1
  9 | #   kernelspec:
 10 | #     display_name: Python 3
 11 | #     language: python
 12 | #     name: python3
 13 | # ---
 14 | 
 15 | # %% [markdown]
 16 | # # Transfer learning with PyTorch
 17 | # We're going to train a neural network to classify dogs and cats.
 18 | 
 19 | # %% [markdown]
 20 | # ## Init, helpers, utils, ...
 21 | 
 22 | # %%
 23 | # %matplotlib inline
 24 | 
 25 | # %%
 26 | import torch
 27 | import torch.nn as nn
 28 | import torch.nn.functional as F
 29 | import torch.optim as optim
 30 | import torchvision
 31 | 
 32 | DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 33 | DEVICE
 34 | 
 35 | # %%
 36 | from pprint import pprint
 37 | 
 38 | import matplotlib.pyplot as plt
 39 | import numpy as np
 40 | from IPython.core.debugger import set_trace
 41 | 
 42 | 
 43 | # %%
 44 | # # %load my_train_helper.py
 45 | def get_trainable(model_params):
 46 |     return (p for p in model_params if p.requires_grad)
 47 | 
 48 | 
 49 | def get_frozen(model_params):
 50 |     return (p for p in model_params if not p.requires_grad)
 51 | 
 52 | 
 53 | def all_trainable(model_params):
 54 |     return all(p.requires_grad for p in model_params)
 55 | 
 56 | 
 57 | def all_frozen(model_params):
 58 |     return all(not p.requires_grad for p in model_params)
 59 | 
 60 | 
 61 | def freeze_all(model_params):
 62 |     for param in model_params:
 63 |         param.requires_grad = False
 64 | 
 65 | 
 66 | 
 67 | # %% [markdown] toc-hr-collapsed=true
 68 | # # The Data - DogsCatsDataset
 69 | 
 70 | # %% [markdown]
 71 | # ## Transforms
 72 | 
 73 | # %%
 74 | from torchvision import transforms
 75 | 
 76 | _image_size = 224
 77 | _mean = [0.485, 0.456, 0.406]
 78 | _std = [0.229, 0.224, 0.225]
 79 | 
 80 | 
 81 | train_trans = transforms.Compose([
 82 |     transforms.Resize(256),  # some images are pretty small
 83 |     transforms.RandomCrop(_image_size),
 84 |     transforms.RandomHorizontalFlip(),
 85 |     transforms.ColorJitter(.3, .3, .3),
 86 |     transforms.ToTensor(),
 87 |     transforms.Normalize(_mean, _std),
 88 | ])
 89 | val_trans = transforms.Compose([
 90 |     transforms.Resize(256),
 91 |     transforms.CenterCrop(_image_size),
 92 |     transforms.ToTensor(),
 93 |     transforms.Normalize(_mean, _std),
 94 | ])
 95 | 
 96 | # %% [markdown]
 97 | # ## Dataset
 98 | #
 99 | # The implementation of the dataset does not really.
100 | 
101 | # %%
102 | from torchvision.datasets.folder import ImageFolder
103 | 
104 | # %%
105 | train_ds = ImageFolder("dogscats/training_set/", transform=train_trans)
106 | val_ds = ImageFolder("dogscats/test_set/", transform=val_trans)
107 | 
108 | batch_size = 32
109 | n_classes = 2
110 | 
111 | # %% [markdown]
112 | # Use the following if you want to use the full dataset:
113 | 
114 | # %%
115 | # train_ds = DogsCatsDataset("../data/raw", "train", transform=train_trans)
116 | # val_ds = DogsCatsDataset("../data/raw", "valid", transform=val_trans)
117 | 
118 | # batch_size = 128
119 | # n_classes = 2
120 | 
121 | # %%
122 | len(train_ds), len(val_ds)
123 | 
124 | # %% [markdown]
125 | # ## DataLoader
126 | # Batch loading for datasets with multi-processing and different sample strategies.
127 | 
128 | # %%
129 | from torch.utils.data import DataLoader
130 | 
131 | 
132 | train_dl = DataLoader(
133 |     train_ds,
134 |     batch_size=batch_size,
135 |     shuffle=True,
136 |     num_workers=16,
137 | )
138 | 
139 | val_dl = DataLoader(
140 |     val_ds,
141 |     batch_size=batch_size,
142 |     shuffle=False,
143 |     num_workers=16,
144 | )
145 | 
146 | # %% [markdown]
147 | # # The Model
148 | # PyTorch offers quite a few [pre-trained networks](https://pytorch.org/docs/stable/torchvision/models.html) such as:
149 | # - AlexNet
150 | # - VGG
151 | # - ResNet
152 | # - SqueezeNet
153 | # - DenseNet
154 | # - Inception v3
155 | #
156 | # And there are more available via [pretrained-models.pytorch](https://github.com/Cadene/pretrained-models.pytorch):
157 | # - NASNet,
158 | # - ResNeXt,
159 | # - InceptionV4,
160 | # - InceptionResnetV2, 
161 | # - Xception, 
162 | # - DPN,
163 | # - ...
164 | #
165 | # We'll use a simple resnet18 model:
166 | 
167 | # %%
168 | from torchvision import models
169 | 
170 | model = models.resnet18(pretrained=True)
171 | 
172 | # %%
173 | model
174 | 
175 | # %%
176 | import torchsummary
177 | 
178 | torchsummary.summary(model, (3, 224, 224), device="cpu")
179 | 
180 | # %%
181 | nn.Linear(2, 1, bias=True)
182 | 
183 | # %%
184 | # Freeze all parameters manually
185 | for param in model.parameters():
186 |     param.requires_grad = False
187 | 
188 | # %%
189 | # Or use our convenient functions from before
190 | freeze_all(model.parameters())
191 | assert all_frozen(model.parameters())
192 | 
193 | # %% [markdown]
194 | # Replace the last layer with a linear layer. New layers have `requires_grad = True`.
195 | 
196 | # %%
197 | model.fc = nn.Linear(512, n_classes)
198 | 
199 | # %%
200 | assert not all_frozen(model.parameters())
201 | 
202 | 
203 | # %%
204 | def get_model(n_classes=2):
205 |     model = models.resnet18(pretrained=True)
206 |     freeze_all(model.parameters())
207 |     model.fc = nn.Linear(512, n_classes)
208 |     model = model.to(DEVICE)
209 |     return model
210 | 
211 | 
212 | model = get_model()
213 | 
214 | # %% [markdown]
215 | # # The Loss
216 | 
217 | # %%
218 | criterion = nn.CrossEntropyLoss()
219 | 
220 | # %% [markdown]
221 | # # The Optimizer
222 | 
223 | # %%
224 | optimizer = torch.optim.Adam(
225 |     get_trainable(model.parameters()),
226 |     lr=0.001,
227 |     # momentum=0.9,
228 | )
229 | 
230 | # %% [markdown]
231 | # # The Train Loop
232 | 
233 | # %%
234 | N_EPOCHS = 1
235 | 
236 | for epoch in range(N_EPOCHS):
237 |     
238 |     # Train
239 |     model.train()  # IMPORTANT
240 |     
241 |     total_loss, n_correct, n_samples = 0.0, 0, 0
242 |     for batch_i, (X, y) in enumerate(train_dl):
243 |         X, y = X.to(DEVICE), y.to(DEVICE)
244 |         
245 |         optimizer.zero_grad()
246 |         y_ = model(X)
247 |         loss = criterion(y_, y)
248 |         loss.backward()
249 |         optimizer.step()
250 |         
251 |         # Statistics
252 |         print(
253 |             f"Epoch {epoch+1}/{N_EPOCHS} |"
254 |             f"  batch: {batch_i} |"
255 |             f"  batch loss:   {loss.item():0.3f}"
256 |         )
257 |         _, y_label_ = torch.max(y_, 1)
258 |         n_correct += (y_label_ == y).sum().item()
259 |         total_loss += loss.item() * X.shape[0]
260 |         n_samples += X.shape[0]
261 |     
262 |     print(
263 |         f"Epoch {epoch+1}/{N_EPOCHS} |"
264 |         f"  train loss: {total_loss / n_samples:9.3f} |"
265 |         f"  train acc:  {n_correct / n_samples * 100:9.3f}%"
266 |     )
267 |     
268 |     
269 |     # Eval
270 |     model.eval()  # IMPORTANT
271 |     
272 |     total_loss, n_correct, n_samples = 0.0, 0, 0
273 |     with torch.no_grad():  # IMPORTANT
274 |         for X, y in val_dl:
275 |             X, y = X.to(DEVICE), y.to(DEVICE)
276 |                     
277 |             y_ = model(X)
278 |         
279 |             # Statistics
280 |             _, y_label_ = torch.max(y_, 1)
281 |             n_correct += (y_label_ == y).sum().item()
282 |             loss = criterion(y_, y)
283 |             total_loss += loss.item() * X.shape[0]
284 |             n_samples += X.shape[0]
285 | 
286 |     
287 |     print(
288 |         f"Epoch {epoch+1}/{N_EPOCHS} |"
289 |         f"  valid loss: {total_loss / n_samples:9.3f} |"
290 |         f"  valid acc:  {n_correct / n_samples * 100:9.3f}%"
291 |     )
292 | 
293 | 
294 | # %% [markdown]
295 | # # Exercise
296 | # - Create your own module which takes any of the existing pre-trained model as backbone and adds a problem specific head.
297 | 
298 | # %%
299 | class Net(nn.Module):
300 |     def __init__(self, backbone: nn.Module, n_classes: int):
301 |         super().__init__()
302 |         # self.backbone
303 |         # self.head = init_head(n_classes)
304 |         
305 |     def forward(self, x):
306 |         # TODO
307 |         return x
308 | 


--------------------------------------------------------------------------------
/notebooks/visualize_model_loss_optimizer.py:
--------------------------------------------------------------------------------
  1 | # ---
  2 | # jupyter:
  3 | #   jupytext:
  4 | #     text_representation:
  5 | #       extension: .py
  6 | #       format_name: percent
  7 | #       format_version: '1.3'
  8 | #       jupytext_version: 1.7.1
  9 | #   kernelspec:
 10 | #     display_name: Python 3
 11 | #     language: python
 12 | #     name: python3
 13 | # ---
 14 | 
 15 | # %% [markdown]
 16 | # # Software vs Machine Learning
 17 | #
 18 | # ![](img/software_vs_ml.png)
 19 | #
 20 | # ![](img/ml_debt.jpg)
 21 | 
 22 | # %% [markdown]
 23 | # # Widget to visualize linear regression, error, and loss
 24 | 
 25 | # %%
 26 | import numpy as np
 27 | import altair as alt
 28 | import pandas as pd
 29 | import ipywidgets
 30 | 
 31 | 
 32 | # %%
 33 | def f(x, slope: float, bias: float):
 34 |     """A simple linear model."""
 35 |     return x * slope + bias
 36 | 
 37 | 
 38 | # %%
 39 | def err2(pred, true):
 40 |     return (true - pred) ** 2
 41 | 
 42 | def mse(pred, true):
 43 |     return np.mean(err2(pred, true))
 44 | 
 45 | 
 46 | # %%
 47 | n = 20
 48 | std = 4
 49 | 
 50 | x = np.linspace(-10, 10, 20)
 51 | noise = np.random.normal(0, 2, size=n)
 52 | 
 53 | y = f(x, slope=1.3, bias=5) + noise
 54 | 
 55 | data = pd.DataFrame({"x": x, "y": y})
 56 | 
 57 | # %%
 58 | slope_dom = np.linspace(-2, 4.5, 66)
 59 | slope_losses = {
 60 |     _slope: mse(f(x, _slope, bias=5), y)
 61 |     for _slope in slope_dom
 62 | }
 63 | df_slope_losses = pd.DataFrame({
 64 |     "slope": slope_losses.keys(),
 65 |     "loss": slope_losses.values(),
 66 | })
 67 | 
 68 | # %%
 69 | alt.renderers.enable('altair_viewer')
 70 | 
 71 | 
 72 | # %%
 73 | def show_lin_reg(
 74 |     slope: float,
 75 |     bias: float,
 76 |     show_pred=True,
 77 |     show_err=True,
 78 |     show_err2=False,
 79 |     show_loss_landscape=False,
 80 | ):
 81 |     
 82 |     pred = x * slope + bias
 83 |     
 84 |     data["pred"] = pred
 85 |     data["err"] = y - pred
 86 |     data["err2"] = (y - pred) ** 2
 87 |     data["x2"] = x - data["err"]
 88 | 
 89 |     mse = np.mean(data['err2'])
 90 |     mae = np.mean(np.abs(data['err']))
 91 |     
 92 |     chart = (
 93 |         alt.Chart(data)
 94 |         .mark_point()
 95 |         .encode(x="x", y="y")
 96 |         .properties(title=f"Lin Reg | MSE: {mse:5.01f} | MAE: {mae:5.02f}")
 97 |     )
 98 |     if show_pred:
 99 |         chart += (
100 |             alt.Chart(data)
101 |             .mark_line()
102 |             .encode(x="x", y="pred")
103 |         )
104 |     if show_err:
105 |         chart += (
106 |             alt.Chart(data)
107 |             .mark_line()
108 |             .encode(x="x", y="y", y2="pred")
109 | 
110 |         )
111 |     if show_err2:
112 |         chart += (
113 |             alt.Chart(data)
114 |             .mark_rect(fill="none", stroke="red")
115 |             .encode(x="x", y="y", x2="x2", y2="pred")
116 | 
117 |         )
118 |     
119 | 
120 |     if not show_loss_landscape:
121 |         return chart
122 |     
123 |     _chart_loss = (
124 |         alt.Chart(df_slope_losses)
125 |         .mark_line()
126 |         .encode(x="slope", y="loss")
127 |         .properties(title="Loss Landscape (slope)")
128 |     )
129 |     _chart_loss_hl = (
130 |         alt.Chart(pd.DataFrame({"x": [slope], "y": [0], "y2": [400]}))
131 |         .mark_line()
132 |         .encode(x="x", y="y", y2="y2")
133 |     )
134 |     return chart | (_chart_loss + _chart_loss_hl)
135 | 
136 | # %%
137 | # show_lin_reg(
138 | #     slope=.3,
139 | #     bias=8,
140 | #     show_pred=True,
141 | #     show_err=True,
142 | #     show_err2=False,
143 | # )   
144 | 
145 | # %%
146 | ipywidgets.interact(
147 |     show_lin_reg,
148 |     slope=(-2.0, 2.0),
149 |     bias=(-8.0, 8.0),
150 |     show_pred=True,
151 |     show_err=False,
152 | )
153 | 
154 | # %% [markdown]
155 | # ## Linear regression - more formally
156 | #
157 | # 0. Data
158 | #
159 | #
160 | # 1. Model:
161 | #   - $f(X) = X \beta = \hat y$
162 | #
163 | #
164 | # 2. Loss / criterion:
165 | #   - $ err_i = y_i - f(X_i)$
166 | #   - $MSE = \frac{1}{n} \sum_{i=1}^{N} err_i^2$
167 | #
168 | #
169 | # 3. Optimize:
170 | #   - minimize the MSE yields the optimal $\hat\beta$ (after doing some math)
171 | #   - $\hat\beta = (X^TX)^{-1}X^Ty$
172 | #   - (or, more generally, use gradient descent to optimize the parameters)
173 | 


--------------------------------------------------------------------------------
/notebooks/working_with_data.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # ---
  3 | # jupyter:
  4 | #   jupytext:
  5 | #     text_representation:
  6 | #       extension: .py
  7 | #       format_name: percent
  8 | #       format_version: '1.3'
  9 | #       jupytext_version: 1.7.1
 10 | #   kernelspec:
 11 | #     display_name: Python 3
 12 | #     language: python
 13 | #     name: python3
 14 | # ---
 15 | 
 16 | # %% [markdown]
 17 | # # Working with Data: `Dataset`, `DataLoader`, `Sampler`, and `Transforms`
 18 | #
 19 | # These basic concepts make it easy to work with large data.
 20 | 
 21 | # %% [markdown]
 22 | # ## Init, helpers, utils, ...
 23 | 
 24 | # %%
 25 | # %matplotlib inline
 26 | 
 27 | # %%
 28 | import torch
 29 | import torch.nn as nn
 30 | import torch.nn.functional as F
 31 | import torch.optim as optim
 32 | import torchvision
 33 | 
 34 | DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 35 | 
 36 | # %%
 37 | from pprint import pprint
 38 | 
 39 | import matplotlib.pyplot as plt
 40 | import numpy as np
 41 | from IPython.core.debugger import set_trace
 42 | 
 43 | # %% [markdown]
 44 | # # Dataset
 45 | # It's easy to create your `Dataset`,
 46 | # but PyTorch comes with some
 47 | # [build-in datasets](https://pytorch.org/docs/stable/torchvision/datasets.html):
 48 | #
 49 | # - MNIST
 50 | # - Fashion-MNIST
 51 | # - KMNIST
 52 | # - EMNIST
 53 | # - FakeData
 54 | # - COCO
 55 | #   - Captions
 56 | #   - Detection
 57 | # - LSUN
 58 | # - ImageFolder
 59 | # - DatasetFolder
 60 | # - Imagenet-12
 61 | # - CIFAR
 62 | # - STL10
 63 | # - SVHN
 64 | # - PhotoTour
 65 | # - SBU
 66 | # - Flickr
 67 | # - VOC
 68 | # - Cityscapes
 69 | #
 70 | # `Dataset` gives you information about the number of samples (implement `__len__`) and gives you the sample at a given index (implement `__getitem__`.
 71 | # It's a nice and simple abstraction to work with data.
 72 | 
 73 | # %%
 74 | from torch.utils.data import Dataset
 75 | 
 76 | # %% [markdown]
 77 | # ```python
 78 | # class Dataset(object):
 79 | #     def __getitem__(self, index):
 80 | #         raise NotImplementedError
 81 | #
 82 | #     def __len__(self):
 83 | #         raise NotImplementedError
 84 | #
 85 | #     def __add__(self, other):
 86 | #         return ConcatDataset([self, other])
 87 | # ```
 88 | 
 89 | # %% [markdown]
 90 | # The `ImageFolder` dataset is quite useful and follows the usual conventions for folder layouts:
 91 | #
 92 | # ```
 93 | # root/dog/xxx.png
 94 | # root/dog/xxy.png
 95 | # root/dog/xxz.png
 96 | #
 97 | # root/cat/123.png
 98 | # root/cat/nsdf3.png
 99 | # root/cat/asd932_.png
100 | # ```
101 | 
102 | # %% [markdown]
103 | # ## Example: dogs and cats dataset
104 | # Please download the dataset from
105 | # https://www.kaggle.com/chetankv/dogs-cats-images
106 | # and place it in the `notebook/` folder.
107 | 
108 | # %%
109 | # !tree -d dogscats/
110 | 
111 | # %%
112 | from torchvision.datasets.folder import ImageFolder
113 | 
114 | train_ds = ImageFolder("dogscats/training_set/")
115 | 
116 | # %%
117 | train_ds
118 | 
119 | # %%
120 | # the __len__ method
121 | len(train_ds)
122 | 
123 | # %%
124 | # the __getitem__ method
125 | train_ds[0]
126 | 
127 | # %%
128 | train_ds[0][0]
129 | 
130 | # %%
131 | train_ds[0][1]
132 | 
133 | # %% [markdown]
134 | # Optionally, some datasets offer convenience functions and attributes.
135 | # This is not enforced by the interface! Don't rely on it!
136 | 
137 | # %%
138 | train_ds.classes
139 | 
140 | # %%
141 | train_ds.class_to_idx
142 | 
143 | # %%
144 | train_ds.imgs
145 | 
146 | # %%
147 | 
148 | # %%
149 | import random
150 | 
151 | rand_idx = np.random.randint(0, len(train_ds), 4)
152 | for i in rand_idx:
153 |     img, label_id = train_ds[i]
154 |     print(label_id, train_ds.classes[label_id], i)
155 |     display(img)
156 | 
157 | # %% [markdown]
158 | # # `torchvision.transforms`
159 | #
160 | # Common image transformation that can be composed/chained [[docs]](https://pytorch.org/docs/stable/torchvision/transforms.html).
161 | 
162 | # %%
163 | from torchvision import transforms
164 | 
165 | # %%
166 | _image_size = 224
167 | _mean = [0.485, 0.456, 0.406]
168 | _std = [0.229, 0.224, 0.225]
169 | 
170 | 
171 | trans = transforms.Compose([
172 |     transforms.Resize(256),
173 |     transforms.RandomCrop(_image_size),
174 |     transforms.RandomHorizontalFlip(),
175 |     transforms.ColorJitter(.3, .3, .3),
176 |     transforms.ToTensor(),
177 |     transforms.Normalize(_mean, _std),
178 | ])
179 | 
180 | trans(train_ds[7074][0])
181 | 
182 | # %% [markdown]
183 | # ## `torchvision.transforms.functional`
184 | #
185 | # >Functional transforms give you fine-grained control of the transformation pipeline. As opposed to the transformations above, functional transforms don’t contain a random number generator for their parameters. That means you have to specify/generate all parameters, but you can reuse the functional transform. For example, you can apply a functional transform to multiple images like this:
186 | # >
187 | # > https://pytorch.org/docs/stable/torchvision/transforms.html#functional-transforms
188 | #
189 | # ```python
190 | # import torchvision.transforms.functional as TF
191 | # import random
192 | #
193 | # def my_segmentation_transforms(image, segmentation):
194 | #     if random.random() > 5:
195 | #         angle = random.randint(-30, 30)
196 | #         image = TF.rotate(image, angle)
197 | #         segmentation = TF.rotate(segmentation, angle)
198 | #     # more transforms ...
199 | #     return image, segmentation
200 | # ```
201 | 
202 | # %% [markdown]
203 | # Ref:
204 | # - https://pytorch.org/docs/stable/torchvision/transforms.htm
205 | # - https://pytorch.org/docs/stable/torchvision/transforms.html#functional-transforms
206 | # - https://pytorch.org/tutorials/beginner/data_loading_tutorial.html
207 | # - https://github.com/mdbloice/Augmentor
208 | # - https://github.com/aleju/imgaug
209 | #
210 | # Shout-out:
211 | # - Hig performance image augmentation with pillow-simd [[github]](https://github.com/uploadcare/pillow-simd) [[benchmark]](http://python-pillow.org/pillow-perf/)
212 | # - Improving Deep Learning Performance with AutoAugment [[blog]](https://ai.googleblog.com/2018/06/improving-deep-learning-performance.html) [[paper]](https://arxiv.org/abs/1805.09501) [[pytorch implementation]](https://github.com/DeepVoltaire/AutoAugment)
213 | 
214 | # %% [markdown]
215 | # # Dataloader
216 | # The `DataLoader` class offers batch loading of datasets with multi-processing and different sample strategies [[docs]](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader).
217 | #
218 | # The signature looks something like this:
219 | # ```python
220 | # DataLoader(
221 | #     dataset,
222 | #     batch_size=1,
223 | #     shuffle=False,
224 | #     sampler=None,
225 | #     batch_sampler=None,
226 | #     num_workers=0,
227 | #     collate_fn=default_collate,
228 | #     pin_memory=False,
229 | #     drop_last=False,
230 | #     timeout=0,
231 | #     worker_init_fn=None
232 | # )
233 | # ```
234 | 
235 | # %%
236 | from torch.utils.data import DataLoader
237 | 
238 | # %%
239 | train_ds = ImageFolder("dogscats/training_set/", transform=trans)
240 | train_dl = DataLoader(
241 |     train_ds,
242 |     batch_size=2,
243 |     shuffle=True,
244 |     num_workers=4,
245 | )
246 | 
247 | # %%
248 | train_iter = iter(train_dl)
249 | X, y = next(train_iter)
250 | 
251 | # %%
252 | print("X:", X.shape)
253 | print("y:", y.shape)
254 | 
255 | # %% [markdown]
256 | # Note that I passed `trans`, which returns `torch.Tensor`, not pillow images.
257 | # DataLoader expects tensors, numbers, dicts or lists.
258 | 
259 | # %%
260 | _train_ds = ImageFolder("dogscats/test_set/", transform=trans) 
261 | _train_dl = DataLoader(_train_ds, batch_size=2, shuffle=True)
262 | 
263 | 
264 | # %% [markdown]
265 | # ## `collate_fn`
266 | # The `collate_fn` argument of `DataLoader` allows you to customize how single datapoints are put together into a batch.
267 | # `collate_fn` is a simple callable that gets a list of datapoints (i.e. what `dataset.__getitem__` returns).
268 | 
269 | # %% [markdown]
270 | # Example of a custom `collate_fn`
271 | # (taken from [here](https://discuss.pytorch.org/t/how-to-create-a-dataloader-with-variable-size-input/8278/3)):
272 | 
273 | # %%
274 | def my_collate_fn(list_of_x_y):
275 |     data = [item[0] for item in list_of_x_y]
276 |     target = [item[1] for item in list_of_x_y]
277 |     target = torch.LongTensor(target)
278 |     return [data, target]
279 | 
280 | 
281 | # %% [markdown]
282 | # # Sampler
283 | # `Sampler` define **how** to sample from the dataset [[docs]](https://pytorch.org/docs/stable/data.html#torch.utils.data.sampler.Sampler).
284 | #
285 | # Examples:
286 | # - `SequentialSampler`
287 | # - `RandomSamples`
288 | # - `SubsetSampler`
289 | # - `WeightedRandomSampler`
290 | #
291 | # Write your own by simply implementing `__iter__` to iterate over the indices of the dataset.
292 | #
293 | # ```python
294 | # class Sampler(object):
295 | #     def __init__(self, data_source):
296 | #         pass
297 | #
298 | #     def __iter__(self):
299 | #         raise NotImplementedError
300 | #
301 | #     def __len__(self):
302 | #         raise NotImplementedError
303 | # ```
304 | 
305 | # %% [markdown]
306 | # # Recap
307 | # - `Dataset`: get one datapoint
308 | # - `transforms`: composable transformations
309 | # - `DataLoader`: combine single datapoints into batches (plus multi processing and more)
310 | # - `Sampler`: **how** to sample from a dataset
311 | #
312 | # **Simple but extensible interfaces**
313 | 
314 | # %% [markdown]
315 | # # Exercise
316 | # Go out and play:
317 | #
318 | # - Maybe extend the `DogsCatsDataset` such that you can specify the size of dataset, i.e. the number of samples.
319 | # - Maybe try the `Subset` [[docs]](https://pytorch.org/docs/stable/data.html#torch.utils.data.Subset) to create smaller datasets.
320 | # - Maybe create `SubsetFraction` where you can specify the size of the dataset (between 0. and 1.).
321 | # - Maybe write a custom collate function for the `DogsCatsDataset` that turns it into a dataset appropriate to use in an autoencoder settings.
322 | 
323 | # %%
324 | def autoencoder_collate_fn(list_of_x_y):
325 |     # TODO implement me
326 |     pass
327 | 
328 | 
329 | # %%
330 | class MyDataSet(Dataset):
331 |     def __init__(self):
332 |         super().__init__()
333 |         # TODO implement me
334 |     
335 |     def __len__(self):
336 |         # TODO implement me
337 |         pass
338 |     
339 |     def __getitem__(self, idx):
340 |         # TODO implement me
341 |         pass
342 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==1.7
 2 | torchvision
 3 | jupyterlab
 4 | jupytext
 5 | notebook
 6 | ipywidgets
 7 | matplotlib
 8 | sklearn
 9 | altair
10 | altair_viewer
11 | vega_datasets
12 | 


--------------------------------------------------------------------------------