├── .github
    └── workflows
    │   ├── ci.yml
    │   └── label.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── generate.py
├── how-to-guides
    ├── 01-installation.ipynb
    ├── 02-convert-pytorch-to-ignite.ipynb
    ├── 03-time-profiling.ipynb
    ├── 04-fastai-lr-finder.ipynb
    ├── 05-gradient-accumulation.ipynb
    ├── 06-data-iterator.ipynb
    ├── 07-cross-validation.ipynb
    ├── 08-custom-events.ipynb
    ├── 09-switch-data-training.ipynb
    ├── 10-loggers.ipynb
    ├── 11-load-checkpoint.ipynb
    └── assets
    │   ├── clearml-dashboard.png
    │   └── convert-pytorch2ignite.gif
├── tests
    └── test_generate.py
└── tutorials
    ├── advanced
        └── 01-collective-communication.ipynb
    ├── assets
        ├── all-gather.png
        ├── all-reduce.png
        └── broadcast.png
    ├── beginner
        ├── 01-getting-started.ipynb
        └── 02-transformers-text-classification.ipynb
    └── intermediate
        ├── 01-cifar10-distributed.ipynb
        ├── 02-Machine_Translation_using_PyTorch_Ignite.ipynb
        ├── 03-reinforcement-learning.ipynb
        └── cifar10-distributed.py


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | on:
 3 |   push:
 4 |     paths:
 5 |       - "generate.py"
 6 |       - ".github/workflows/ci.yml"
 7 | 
 8 |   pull_request:
 9 |     paths:
10 |       - "generate.py"
11 |       - ".github/workflows/ci.yml"
12 | 
13 | jobs:
14 |   Run-Tests:
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - uses: actions/checkout@v2
18 |       - name: Install dependencies
19 |         run: |
20 |           pip install pytest
21 |       - name: Run tests
22 |         run: |
23 |           pytest tests -vvv
24 | 


--------------------------------------------------------------------------------
/.github/workflows/label.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will triage pull requests and apply a label based on the
 2 | # paths that are modified in the pull request.
 3 | #
 4 | # To use this workflow, you will need to set up a .github/labeler.yml
 5 | # file with configuration.  For more information, see:
 6 | # https://github.com/actions/labeler
 7 | 
 8 | name: Labeler
 9 | on: [pull_request]
10 | 
11 | jobs:
12 |   label:
13 | 
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       contents: read
17 |       pull-requests: write
18 | 
19 |     steps:
20 |     - uses: actions/labeler@v2
21 |       with:
22 |         repo-token: "${{ secrets.GITHUB_TOKEN }}"
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # How to Guide
132 | # Ignore generated files
133 | how-to-guides/MNIST/
134 | *.csv


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guide
 2 | 
 3 | This project is a community effort, and everyone is welcome to contribute !
 4 | 
 5 | If you are interested in contributing to PyTorch-Ignite's examples, there are many ways to help out. Your contributions may fall into the following categories:
 6 | 
 7 | 1. It helps us very much if you could report issues you’re facing with:
 8 | 
 9 |    - Executing the Juypter notebooks, scripts or rendering assets.
10 |    - Understanding the language written in any of the notebooks that can be simplified.
11 | 
12 | 2. You would like to add more examples. These fall into two categories:
13 |    1. Tutorials: Something general which has emphasis on explanation and has self-contained end-to-end code which showcases an Ignite concept or concepts. These are meant for learning purposes when exploring the library. See cifar10 tutorial on distributed training: https://pytorch-ignite.ai/tutorials/cifar10-distributed/ for an example.
14 |    2. How-to guides: These are very specific and more code-based. They are used to answer a specific question like how to use FastaiLR finder with Ignite or how to do cross validation. Comparing it with the `cifar10` tutorial above, if we were to make a how to guide it could go like: how to train a model using multiple gpus with ignite.
15 | 
16 | Please refer to [README.md](https://github.com/pytorch-ignite/examples#readme) on how to generate Jupyter notebooks with built-in frontmatter.
17 | 
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2021, PyTorch-Ignite Organization
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch-Ignite Examples, tutorials and how-to guides
 2 | 
 3 | Notebooks in this repo requires the essential frontmatters to be used
 4 | when rendering on the [website](https://pytorch-ignite.ai).
 5 | 
 6 | To contribute the notebooks, please use [`generate.py`](./generate.py)
 7 | script to generate the notebook.
 8 | 
 9 | **Usage:**
10 | 
11 | ```sh
12 | # python generate.py <notebook-names>...
13 | python generate.py yolo
14 | # > Generated /workspace/yolo.ipynb
15 | ```
16 | 
17 | Alternatively, you can run `generate.py` on your existing notebooks to add the required frontmatter cell to them.
18 | ```sh
19 | # If your completed tutorial is present in /workspace/yolo.ipynb
20 | python generate.py yolo
21 | # > Added frontmatter to /workspace/yolo.ipynb
22 | ```
23 | This will add the necessary frontmatter cell at the top of the notebook, now you need to open it and update the values.
24 | 
25 | See more in [`generate.py`](./generate.py).
26 | 


--------------------------------------------------------------------------------
/generate.py:
--------------------------------------------------------------------------------
  1 | # Generate plain notebooks with the required frontmatter defined
  2 | 
  3 | # Usage:
  4 | #   $ python generate.py <notebook-names>... [-h]
  5 | #
  6 | #   Generate plain notebooks with the required frontmatter defined.
  7 | #
  8 | # Positional arguments:
  9 | #   notebook_names        Notebooks to generate
 10 | #
 11 | # Options:
 12 | #   -h, --help            show this help message and exit
 13 | #
 14 | # Example:
 15 | #   python generate.py {data-iterator,fastai-lr-finder,gradient-accumulation,installation}
 16 | 
 17 | import json
 18 | import os
 19 | from argparse import ArgumentParser
 20 | from datetime import datetime
 21 | 
 22 | today = datetime.now().strftime('%Y-%m-%d')
 23 | 
 24 | notebook = {
 25 |   'nbformat': 4,
 26 |   'nbformat_minor': 0,
 27 |   'metadata': {
 28 |     'kernelspec': {
 29 |       'display_name': 'Python 3',
 30 |       'name': 'python3',
 31 |     },
 32 |     'accelerator': 'GPU',
 33 |   },
 34 |   'cells': [
 35 |     {
 36 |       'cell_type': 'markdown',
 37 |       'metadata': {},
 38 |       'source': [
 39 |         '<!-- ---\n',
 40 |         'title: <required-title>\n',
 41 |         f'date: {today}\n',
 42 |         'downloads: true\n',
 43 |         'weight: <required-weight> See: https://github.com/pytorch-ignite/examples/issues/30\n',
 44 |         'summary: <use either this or the `<!--more-->` tag below to provide summary for this notebook, '
 45 |         'and delete the other>\n'
 46 |         'tags:\n',
 47 |         '  - <required-tag>\n',
 48 |         '--- -->\n',
 49 |         '\n',
 50 |         '# title-placeholder\n',
 51 |         '\n',
 52 |         '<If you are not using the `summary` variable above, use this space to '
 53 |         'provide a summary for this notebook.>\n',
 54 |         '<Otherwise, delete the `<!--more-->` below.>',
 55 |         '\n',
 56 |         '<!--more-->',
 57 |       ]
 58 |     }
 59 |   ]
 60 | }
 61 | 
 62 | if __name__ == '__main__':
 63 |   cwd = os.getcwd()
 64 |   parser = ArgumentParser(
 65 |     'generate',
 66 |     '$ python generate.py <notebook-names>... [-h]',
 67 |     'Generate plain notebooks with the required frontmatter defined.'
 68 |   )
 69 |   parser.add_argument(
 70 |     'notebook_names',
 71 |     help='Notebooks to generate',
 72 |     nargs='+',
 73 |   )
 74 |   args = parser.parse_args()
 75 |   for name in args.notebook_names:
 76 |     if not name.endswith('.ipynb'):
 77 |       name = name + ".ipynb"
 78 | 
 79 |     if os.path.isfile(name):
 80 |       with open(name) as fp:
 81 |         content = json.load(fp)
 82 |       if len(content['cells']) > 0 and content['cells'][0] == notebook['cells'][0]:
 83 |         print(f'Frontmatter cell already exists in {os.path.join(cwd, name)}. Exiting')
 84 | 
 85 |       else:
 86 |         for key, value in content.items():
 87 |           if key != 'cells':
 88 |             content[key] = notebook[key]
 89 |           else:
 90 |             content[key] = notebook[key] + content[key]
 91 | 
 92 |         with open(name, mode='w') as f:
 93 |           f.write(json.dumps(content, indent=2))
 94 |           print(f'Added frontmatter to {os.path.join(cwd, name)}')
 95 | 
 96 |     else:
 97 |       with open(name, 'w') as fp:
 98 |         json.dump(notebook, fp, indent=2)
 99 |         print(f'Generated {os.path.join(cwd, name)}')
100 | 


--------------------------------------------------------------------------------
/how-to-guides/01-installation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "d13be020",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<!-- ---\n",
  9 |     "title: How to install PyTorch-Ignite\n",
 10 |     "date: 2021-08-04\n",
 11 |     "weight: 1\n",
 12 |     "downloads: true\n",
 13 |     "sidebar: true\n",
 14 |     "summary: Install PyTorch-Ignite from pip, conda, source or use pre-built docker images\n",
 15 |     "tags:\n",
 16 |     "  - installation\n",
 17 |     "  - pip\n",
 18 |     "  - docker images\n",
 19 |     "  - conda\n",
 20 |     "  - nightly\n",
 21 |     "--- -->\n",
 22 |     "# How to install PyTorch-Ignite"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "id": "513c70bb",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "From [pip](https://pypi.org/project/pytorch-ignite/):\n",
 31 |     "\n",
 32 |     "``` shell\n",
 33 |     "pip install pytorch-ignite\n",
 34 |     "```\n",
 35 |     "\n",
 36 |     "From [conda](https://anaconda.org/pytorch/ignite):\n",
 37 |     "\n",
 38 |     "``` shell\n",
 39 |     "conda install ignite -c pytorch\n",
 40 |     "```\n",
 41 |     "\n",
 42 |     "From source:\n",
 43 |     "\n",
 44 |     "``` shell\n",
 45 |     "pip install git+https://github.com/pytorch/ignite\n",
 46 |     "```"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "id": "7df69584",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "## Nightly releases\n",
 55 |     "\n",
 56 |     "From pip:\n",
 57 |     "\n",
 58 |     "``` shell\n",
 59 |     "pip install --pre pytorch-ignite\n",
 60 |     "```\n",
 61 |     "\n",
 62 |     "From conda (please install the [pytorch nightly\n",
 63 |     "release](https://anaconda.org/pytorch-nightly/pytorch) instead of the\n",
 64 |     "stable version as a dependency):\n",
 65 |     "\n",
 66 |     "``` shell\n",
 67 |     "conda install ignite -c pytorch-nightly\n",
 68 |     "```"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "id": "a0f0118c",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "\n",
 77 |     "## Docker Images\n",
 78 |     "\n",
 79 |     "### Using pre-built images\n",
 80 |     "\n",
 81 |     "Pull a pre-built docker image from [our Docker\n",
 82 |     "Hub](https://hub.docker.com/u/pytorchignite) using :\n",
 83 |     "\n",
 84 |     "``` shell\n",
 85 |     "docker pull IMAGE_NAME\n",
 86 |     "```\n",
 87 |     "\n",
 88 |     "Available pre-built images are :\n",
 89 |     "\n",
 90 |     "|               |                                           Base                                           |                                             Horovod                                              |                                            MS DeepSpeed                                            |\n",
 91 |     "|:-------------:|:----------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------:|\n",
 92 |     "|     Base      |        [`pytorchignite/base:latest`](https://hub.docker.com/r/pytorchignite/base)        |        [`pytorchignite/hvd-base:latest`](https://hub.docker.com/r/pytorchignite/hvd-base)        |                                                 X                                                  |\n",
 93 |     "|    Vision     |      [`pytorchignite/vision:latest`](https://hub.docker.com/r/pytorchignite/vision)      |      [`pytorchignite/hvd-vision:latest`](https://hub.docker.com/r/pytorchignite/hvd-vision)      |                                                 X                                                  |\n",
 94 |     "|      NLP      |         [`pytorchignite/nlp:latest`](https://hub.docker.com/r/pytorchignite/nlp)         |         [`pytorchignite/hvd-nlp:latest`](https://hub.docker.com/r/pytorchignite/hvd-nlp)         |                                                 X                                                  |\n",
 95 |     "|  NVIDIA Apex  |        [`pytorchignite/apex:latest`](https://hub.docker.com/r/pytorchignite/apex)        |        [`pytorchignite/hvd-apex:latest`](https://hub.docker.com/r/pytorchignite/hvd-apex)        |        [`pytorchignite/msdp-apex:latest`](https://hub.docker.com/r/pytorchignite/msdp-apex)        |\n",
 96 |     "| Apex + Vision | [`pytorchignite/apex-vision:latest`](https://hub.docker.com/r/pytorchignite/apex-vision) | [`pytorchignite/hvd-apex-vision:latest`](https://hub.docker.com/r/pytorchignite/hvd-apex-vision) | [`pytorchignite/msdp-apex-vision:latest`](https://hub.docker.com/r/pytorchignite/msdp-apex-vision) |\n",
 97 |     "|  Apex + NLP   |    [`pytorchignite/apex-nlp:latest`](https://hub.docker.com/r/pytorchignite/apex-nlp)    |    [`pytorchignite/hvd-apex-nlp:latest`](https://hub.docker.com/r/pytorchignite/hvd-apex-nlp)    |    [`pytorchignite/msdp-apex-nlp:latest`](https://hub.docker.com/r/pytorchignite/msdp-apex-nlp)    |\n",
 98 |     "\n",
 99 |     "and run it with Docker v19.03+ :\n",
100 |     "\n",
101 |     "``` shell\n",
102 |     "docker run --gpus all -it -v $PWD:/workspace/project --network=host --shm-size 16G IMAGE_NAME\n",
103 |     "```\n",
104 |     "\n",
105 |     "For more details, [check out our\n",
106 |     "GitHub](https://github.com/pytorch/ignite/tree/master/docker)."
107 |    ]
108 |   }
109 |  ],
110 |  "metadata": {
111 |   "kernelspec": {
112 |    "display_name": "Python 3 (ipykernel)",
113 |    "language": "python",
114 |    "name": "python3"
115 |   },
116 |   "language_info": {
117 |    "codemirror_mode": {
118 |     "name": "ipython",
119 |     "version": 3
120 |    },
121 |    "file_extension": ".py",
122 |    "mimetype": "text/x-python",
123 |    "name": "python",
124 |    "nbconvert_exporter": "python",
125 |    "pygments_lexer": "ipython3",
126 |    "version": "3.10.4"
127 |   }
128 |  },
129 |  "nbformat": 4,
130 |  "nbformat_minor": 5
131 | }
132 | 


--------------------------------------------------------------------------------
/how-to-guides/02-convert-pytorch-to-ignite.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "id": "xo0JaCAvVI64"
  7 |    },
  8 |    "source": [
  9 |     "<!-- ---\n",
 10 |     "title: How to convert pure PyTorch code to Ignite\n",
 11 |     "weight: 2\n",
 12 |     "downloads: true\n",
 13 |     "sidebar: true\n",
 14 |     "tags:\n",
 15 |     "  - training loop\n",
 16 |     "  - validation loop\n",
 17 |     "  - metrics\n",
 18 |     "  - checkpoints\n",
 19 |     "  - progress bar\n",
 20 |     "  - logging\n",
 21 |     "--- -->\n",
 22 |     "# How to convert pure PyTorch code to Ignite "
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {
 28 |     "id": "CXNZ4XPeV8_I"
 29 |    },
 30 |    "source": [
 31 |     "In this guide, we will show how PyTorch code components can be converted into compact and flexible PyTorch-Ignite code. \n",
 32 |     "\n",
 33 |     "<!--more-->\n",
 34 |     "\n",
 35 |     "![Convert PyTorch to Ignite](assets/convert-pytorch2ignite.gif)"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "Since Ignite focuses on the training and validation pipeline, the code for models, datasets, optimizers, etc will remain user-defined and in pure PyTorch."
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {
 49 |     "id": "L6zvxAsVjP-Z"
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "model = ...\n",
 54 |     "train_loader = ...\n",
 55 |     "val_loader = ...\n",
 56 |     "optimizer = ...\n",
 57 |     "criterion = ..."
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {
 63 |     "id": "2EmmpiTX6huF"
 64 |    },
 65 |    "source": [
 66 |     "## Training Loop to `trainer`\n",
 67 |     "\n",
 68 |     "A typical PyTorch training loop processes a single batch of data, passes it through the `model`, calculates `loss`, etc as below:\n",
 69 |     "\n",
 70 |     "```python\n",
 71 |     "for batch in train_loader:\n",
 72 |     "    model.train()\n",
 73 |     "    inputs, targets = batch\n",
 74 |     "    optimizer.zero_grad()\n",
 75 |     "    outputs = model(inputs)\n",
 76 |     "    loss = criterion(outputs, targets)\n",
 77 |     "    loss.backward()\n",
 78 |     "    optimizer.step()\n",
 79 |     "```"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {
 85 |     "id": "zDkeEWz58hCJ"
 86 |    },
 87 |    "source": [
 88 |     "To convert the above code into Ignite we need to move the code or steps taken to process a single batch of data while training under a function (`train_step()` below). This function will take `engine` and `batch` (current batch of data) as arguments and can return any data (usually the loss) that can be accessed via `engine.state.output`. We pass this function to `Engine` which creates a `trainer` object."
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "id": "lkWiJVuvh-LC"
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "from ignite.engine import Engine\n",
100 |     "\n",
101 |     "\n",
102 |     "def train_step(engine, batch):\n",
103 |     "    model.train()\n",
104 |     "    inputs, targets = batch\n",
105 |     "    optimizer.zero_grad()\n",
106 |     "    outputs = model(inputs)\n",
107 |     "    loss = criterion(outputs, targets)\n",
108 |     "    loss.backward()\n",
109 |     "    optimizer.step()\n",
110 |     "    return loss.item()\n",
111 |     "\n",
112 |     "\n",
113 |     "trainer = Engine(train_step)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {
119 |     "id": "4MWJzKK8-AiC"
120 |    },
121 |    "source": [
122 |     "There are other [helper methods](https://pytorch.org/ignite/engine.html#helper-methods-to-define-supervised-trainer-and-evaluator) that directly create the `trainer` object without writing a custom function for some common use cases like [supervised training](https://pytorch.org/ignite/generated/ignite.engine.create_supervised_trainer.html#ignite.engine.create_supervised_trainer) and [truncated backprop through time](https://pytorch.org/ignite/contrib/engines.html#ignite.contrib.engines.tbptt.create_supervised_tbptt_trainer)."
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {
128 |     "id": "cocfuUFZ8okw"
129 |    },
130 |    "source": [
131 |     "## Validation Loop to `evaluator`\n",
132 |     "\n",
133 |     "The validation loop typically makes predictions (`y_pred` below) on the `val_loader` batch by batch and uses them to calculate evaluation metrics (Accuracy, Intersection over Union, etc) as below:\n",
134 |     "\n",
135 |     "```python\n",
136 |     "model.eval()\n",
137 |     "num_correct = 0\n",
138 |     "num_examples = 0\n",
139 |     "\n",
140 |     "for batch in val_loader:\n",
141 |     "    x, y = batch\n",
142 |     "    y_pred = model(x)\n",
143 |     "\n",
144 |     "    correct = torch.eq(torch.round(y_pred).type(y.type()), y).view(-1)\n",
145 |     "    num_correct = torch.sum(correct).item()\n",
146 |     "    num_examples = correct.shape[0]\n",
147 |     "    print(f\"Epoch: {epoch},  Accuracy: {num_correct / num_examples}\")\n",
148 |     "```"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {
154 |     "id": "N0ETiWo9E0D4"
155 |    },
156 |    "source": [
157 |     "We will convert this to Ignite in two steps by separating the validation and metrics logic.\n",
158 |     "\n",
159 |     "We will move the model evaluation logic under another function (`validation_step()` below) which receives the same parameters as `train_step()` and processes a single batch of data to return some output (usually the predicted and actual value which can be used to calculate metrics) stored in `engine.state.output`. Another instance (called `evaluator` below) of `Engine` is created by passing the `validation_step()` function."
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {
166 |     "id": "zv2kceT0CS-L"
167 |    },
168 |    "outputs": [],
169 |    "source": [
170 |     "def validation_step(engine, batch):\n",
171 |     "    model.eval()\n",
172 |     "    with torch.no_grad():\n",
173 |     "        x, y = batch\n",
174 |     "        y_pred = model(x)\n",
175 |     "\n",
176 |     "    return y_pred, y\n",
177 |     "    \n",
178 |     "    \n",
179 |     "evaluator = Engine(validation_step)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {
185 |     "id": "EAIBqfFm8oqS"
186 |    },
187 |    "source": [
188 |     "Similar to the training loop, there are [helper methods](https://pytorch.org/ignite/engine.html#helper-methods-to-define-supervised-trainer-and-evaluator) to avoid writing this custom evaluation function like [`create_supervised_evaluator`](https://pytorch.org/ignite/generated/ignite.engine.create_supervised_evaluator.html#ignite.engine.create_supervised_evaluator).\n",
189 |     "\n",
190 |     "**Note**: You can create different evaluators for training, validation, and testing if they serve different purposes. A common practice is to have two separate evaluators for training and validation, since the results of the validation evaluator are helpful in determining the best model to save after training."
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {
196 |     "id": "4t4PsYXn8ost"
197 |    },
198 |    "source": [
199 |     "## Switch to built-in Metrics\n",
200 |     "\n",
201 |     "Then we can replace the code for calculating metrics like accuracy and instead use several [out-of-the-box metrics](https://pytorch.org/ignite/metrics.html#complete-list-of-metrics) that Ignite provides or write a custom one (refer [here](https://pytorch.org/ignite/metrics.html#how-to-create-a-custom-metric)). The metrics will be computed using the `evaluator`'s output. Finally, we attach these metrics to the `evaluator` by providing a key name (\"accuracy\" below) so they can be accessed via `engine.state.metrics[key_name]`."
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "id": "iUVAOP6kFdA-"
209 |    },
210 |    "outputs": [],
211 |    "source": [
212 |     "from ignite.metrics import Accuracy\n",
213 |     "\n",
214 |     "Accuracy().attach(evaluator, \"accuracy\")"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "markdown",
219 |    "metadata": {
220 |     "id": "WnGK925N5AR7"
221 |    },
222 |    "source": [
223 |     "## Organizing code into Events and Handlers\n",
224 |     "\n",
225 |     "Next, we need to identify any code that is triggered when an event occurs. Examples of events can be the start of an iteration, completion of an epoch, or even the start of backprop. We already provide some predefined events (complete list [here](https://pytorch.org/ignite/generated/ignite.engine.events.Events.html#ignite.engine.events.Events)) however we can also create custom ones (refer [here](https://pytorch-ignite.ai/concepts/02-events-and-handlers#custom-events). We move the event-specific code to different handlers (named functions, lambdas, class functions) which are attached to these events and executed whenever a specific event happens. Here are some common handlers:"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "markdown",
230 |    "metadata": {
231 |     "id": "uZIdI39b-rB4"
232 |    },
233 |    "source": [
234 |     "### Running `evaluator`\n",
235 |     "\n",
236 |     "We can convert the code that runs the `evaluator` on the training/validation/test dataset after `validate_every` epoch:\n",
237 |     "\n",
238 |     "```python\n",
239 |     "if epoch % validate_every == 0:\n",
240 |     "    # Validation logic\n",
241 |     "```\n",
242 |     "\n",
243 |     "by attaching a handler to a built-in event `EPOCH_COMPLETED` like:"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {
250 |     "id": "62Z6RmfJVn7s"
251 |    },
252 |    "outputs": [],
253 |    "source": [
254 |     "from ignite.engine import Events\n",
255 |     "\n",
256 |     "validate_every = 10\n",
257 |     "\n",
258 |     "\n",
259 |     "@trainer.on(Events.EPOCH_COMPLETED(every=validate_every))\n",
260 |     "def run_validation():\n",
261 |     "    evaluator.run(val_loader)"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "markdown",
266 |    "metadata": {
267 |     "id": "7bkte_sKb-vr"
268 |    },
269 |    "source": [
270 |     "### Logging metrics\n",
271 |     "\n",
272 |     "Similarly, we can log the validation metrics in another handler or combine it with the above handler."
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": null,
278 |    "metadata": {
279 |     "id": "ZExU6_CscHyf"
280 |    },
281 |    "outputs": [],
282 |    "source": [
283 |     "@trainer.on(Events.EPOCH_COMPLETED(every=validate_every))\n",
284 |     "def log_validation():\n",
285 |     "    metrics = evaluator.state.metrics\n",
286 |     "    print(f\"Epoch: {trainer.state.epoch},  Accuracy: {metrics['accuracy']}\")"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "markdown",
291 |    "metadata": {
292 |     "id": "sRgDrTgi5AU_"
293 |    },
294 |    "source": [
295 |     "### Progress Bar\n",
296 |     "\n",
297 |     "We use a built-in wrapper around `tqdm` called [`ProgressBar()`](https://pytorch.org/ignite/generated/ignite.contrib.handlers.tqdm_logger.html#module-ignite.contrib.handlers.tqdm_logger)."
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": null,
303 |    "metadata": {
304 |     "id": "0j79aG7ddmk6"
305 |    },
306 |    "outputs": [],
307 |    "source": [
308 |     "from ignite.contrib.handlers import ProgressBar\n",
309 |     "\n",
310 |     "ProgressBar().attach(trainer)"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "markdown",
315 |    "metadata": {
316 |     "id": "vkqMcVnA5AZ3"
317 |    },
318 |    "source": [
319 |     "### Checkpointing\n",
320 |     "\n",
321 |     "Instead of saving all models after `checkpoint_every` epoch:\n",
322 |     "```python\n",
323 |     "if epoch % checkpoint_every == 0:\n",
324 |     "    checkpoint(model, optimizer, \"checkpoint_dir\")\n",
325 |     "```\n",
326 |     "\n",
327 |     "we can smartly save the best `n_saved` models (depending on `evaluator.state.metrics`), and the state of `optimizer` and `trainer` via the built-in [`Checkpoint()`](https://pytorch.org/ignite/generated/ignite.handlers.checkpoint.Checkpoint.html#checkpoint).\n"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": null,
333 |    "metadata": {
334 |     "id": "VAkDj1fpoSij"
335 |    },
336 |    "outputs": [],
337 |    "source": [
338 |     "from ignite.handlers import Checkpoint\n",
339 |     "\n",
340 |     "checkpoint_every = 5\n",
341 |     "checkpoint_dir = ...\n",
342 |     "\n",
343 |     "\n",
344 |     "checkpointer = Checkpoint(\n",
345 |     "    to_save={'model': model, 'optimizer': optimizer, 'trainer': trainer},\n",
346 |     "    save_handler=checkpoint_dir, n_saved=2\n",
347 |     ")\n",
348 |     "trainer.add_event_handler(\n",
349 |     "    Events.EPOCH_COMPLETED(every=checkpoint_every), checkpointer\n",
350 |     ")"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "markdown",
355 |    "metadata": {
356 |     "id": "WbByMD6xYpgM"
357 |    },
358 |    "source": [
359 |     "## Run for a number of epochs\n",
360 |     "\n",
361 |     "Finally, instead of:\n",
362 |     "```python\n",
363 |     "max_epochs = ...\n",
364 |     "\n",
365 |     "for epoch in range(max_epochs):\n",
366 |     "```\n",
367 |     "we begin training on `train_loader` via:\n",
368 |     "```python\n",
369 |     "trainer.run(train_loader, max_epochs)\n",
370 |     "```"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "markdown",
375 |    "metadata": {},
376 |    "source": [
377 |     "An end-to-end example implementing the above principles can be found [here](https://pytorch-ignite.ai/tutorials/getting-started/#complete-code)."
378 |    ]
379 |   }
380 |  ],
381 |  "metadata": {
382 |   "colab": {
383 |    "collapsed_sections": [],
384 |    "name": "convert-pytorch-to-ignite.ipynb",
385 |    "provenance": [],
386 |    "toc_visible": true
387 |   },
388 |   "kernelspec": {
389 |    "display_name": "Python 3 (ipykernel)",
390 |    "language": "python",
391 |    "name": "python3"
392 |   },
393 |   "language_info": {
394 |    "codemirror_mode": {
395 |     "name": "ipython",
396 |     "version": 3
397 |    },
398 |    "file_extension": ".py",
399 |    "mimetype": "text/x-python",
400 |    "name": "python",
401 |    "nbconvert_exporter": "python",
402 |    "pygments_lexer": "ipython3",
403 |    "version": "3.10.4"
404 |   }
405 |  },
406 |  "nbformat": 4,
407 |  "nbformat_minor": 4
408 | }
409 | 


--------------------------------------------------------------------------------
/how-to-guides/04-fastai-lr-finder.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "id": "5w-QlZE9mvdY"
  7 |    },
  8 |    "source": [
  9 |     "<!-- ---\n",
 10 |     "title: How to use FastaiLRFinder with Ignite\n",
 11 |     "date: 2021-08-02\n",
 12 |     "weight: 4\n",
 13 |     "downloads: true\n",
 14 |     "sidebar: true\n",
 15 |     "tags:\n",
 16 |     "  - lr finder\n",
 17 |     "--- -->\n",
 18 |     "\n",
 19 |     "# How to use FastaiLRFinder with Ignite"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "This how-to guide demonstrates how we can leverage the [`FastaiLRFinder`](https://pytorch.org/ignite/generated/ignite.handlers.lr_finder.FastaiLRFinder.html) handler to find an optimal learning rate to train our model on. We will compare the results produced with and without using the handler for better understanding.\n",
 27 |     "\n",
 28 |     "<!--more-->\n",
 29 |     "\n",
 30 |     "In this example, we will be using a [ResNet18](https://pytorch.org/vision/stable/models.html#torchvision.models.resnet18) model on the [MNIST](https://pytorch.org/vision/stable/datasets.html#torchvision.datasets.MNIST) dataset. The base code is the same as used in the [Getting Started Guide](https://pytorch-ignite.ai/tutorials/getting-started/)."
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {
 36 |     "id": "L_wmAdFgmvdx"
 37 |    },
 38 |    "source": [
 39 |     "## Basic Setup"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 1,
 45 |    "metadata": {
 46 |     "id": "lMphyBmmmvdw",
 47 |     "pycharm": {
 48 |      "is_executing": false
 49 |     }
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "import torch\n",
 54 |     "import torch.nn as nn\n",
 55 |     "from torch.utils.data import DataLoader\n",
 56 |     "from torchvision.datasets import MNIST\n",
 57 |     "from torchvision.models import resnet18\n",
 58 |     "from torchvision.transforms import Compose, Normalize, ToTensor\n",
 59 |     "\n",
 60 |     "from ignite.engine import create_supervised_trainer, create_supervised_evaluator\n",
 61 |     "from ignite.metrics import Accuracy, Loss\n",
 62 |     "from ignite.handlers import FastaiLRFinder"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 22,
 68 |    "metadata": {
 69 |     "id": "eZeKOgKymvdx"
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
 74 |     "\n",
 75 |     "\n",
 76 |     "class Net(nn.Module):\n",
 77 |     "    def __init__(self):\n",
 78 |     "        super(Net, self).__init__()\n",
 79 |     "\n",
 80 |     "        self.model = resnet18(num_classes=10)\n",
 81 |     "        self.model.conv1 = nn.Conv2d(\n",
 82 |     "            1, 64, kernel_size=3, padding=1, bias=False\n",
 83 |     "        )\n",
 84 |     "\n",
 85 |     "    def forward(self, x):\n",
 86 |     "        return self.model(x)\n",
 87 |     "\n",
 88 |     "\n",
 89 |     "model = Net().to(device)\n",
 90 |     "\n",
 91 |     "data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])\n",
 92 |     "\n",
 93 |     "train_loader = DataLoader(\n",
 94 |     "    MNIST(download=True, root=\".\", transform=data_transform, train=True),\n",
 95 |     "    batch_size=128,\n",
 96 |     "    shuffle=True,\n",
 97 |     ")\n",
 98 |     "\n",
 99 |     "test_loader = DataLoader(\n",
100 |     "    MNIST(download=True, root=\".\", transform=data_transform, train=False),\n",
101 |     "    batch_size=256,\n",
102 |     "    shuffle=False,\n",
103 |     ")\n",
104 |     "\n",
105 |     "\n",
106 |     "model = Net().to(device)\n",
107 |     "optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-06)\n",
108 |     "criterion = nn.CrossEntropyLoss()"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {
114 |     "id": "b04erc67G8IK"
115 |    },
116 |    "source": [
117 |     "We will first train the model with a fixed learning rate (lr) of 1e-06 and inspect our results. Let's save the initial state of the model and the optimizer to restore them later for comparison."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 23,
123 |    "metadata": {
124 |     "id": "HglaeKrqFlkY"
125 |    },
126 |    "outputs": [],
127 |    "source": [
128 |     "init_model_state = model.state_dict()\n",
129 |     "init_opt_state = optimizer.state_dict()"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "## Without LR Finder"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 24,
142 |    "metadata": {
143 |     "colab": {
144 |      "base_uri": "https://localhost:8080/"
145 |     },
146 |     "id": "dKihuuH4A-sH",
147 |     "outputId": "699eebb6-7446-40cd-9b91-ebdaea07eaeb"
148 |    },
149 |    "outputs": [
150 |     {
151 |      "data": {
152 |       "text/plain": [
153 |        "State:\n",
154 |        "\titeration: 1407\n",
155 |        "\tepoch: 3\n",
156 |        "\tepoch_length: 469\n",
157 |        "\tmax_epochs: 3\n",
158 |        "\toutput: 0.5554001927375793\n",
159 |        "\tbatch: <class 'list'>\n",
160 |        "\tmetrics: <class 'dict'>\n",
161 |        "\tdataloader: <class 'torch.utils.data.dataloader.DataLoader'>\n",
162 |        "\tseed: <class 'NoneType'>\n",
163 |        "\ttimes: <class 'dict'>"
164 |       ]
165 |      },
166 |      "execution_count": 24,
167 |      "metadata": {
168 |       "tags": []
169 |      },
170 |      "output_type": "execute_result"
171 |     }
172 |    ],
173 |    "source": [
174 |     "trainer = create_supervised_trainer(model, optimizer, criterion, device=device)\n",
175 |     "\n",
176 |     "trainer.run(train_loader, max_epochs=3)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 25,
182 |    "metadata": {
183 |     "colab": {
184 |      "base_uri": "https://localhost:8080/"
185 |     },
186 |     "id": "mophHZUkICKI",
187 |     "outputId": "378c7aac-b3c8-49ff-8be1-acc547730c12"
188 |    },
189 |    "outputs": [
190 |     {
191 |      "name": "stdout",
192 |      "output_type": "stream",
193 |      "text": [
194 |       "{'Accuracy': 0.8655, 'Loss': 0.602867822265625}\n"
195 |      ]
196 |     }
197 |    ],
198 |    "source": [
199 |     "evaluator = create_supervised_evaluator(\n",
200 |     "    model, metrics={\"Accuracy\": Accuracy(), \"Loss\": Loss(criterion)}, device=device\n",
201 |     ")\n",
202 |     "evaluator.run(test_loader)\n",
203 |     "\n",
204 |     "print(evaluator.state.metrics)"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {
210 |     "id": "U_EHmN2bmvd2"
211 |    },
212 |    "source": [
213 |     "Let's see how we can achieve better results by using the [`FastaiLRFinder`](https://pytorch.org/ignite/generated/ignite.handlers.lr_finder.FastaiLRFinder.html) handler. But first, let's restore the initial state of the model and optimizer so we can re-train them from scratch. "
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 26,
219 |    "metadata": {
220 |     "id": "CTGJPVI6mvd2"
221 |    },
222 |    "outputs": [],
223 |    "source": [
224 |     "model.load_state_dict(init_model_state)\n",
225 |     "optimizer.load_state_dict(init_opt_state)"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "markdown",
230 |    "metadata": {},
231 |    "source": [
232 |     "## With LR Finder"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "metadata": {
238 |     "id": "fLaAAE05GIFh"
239 |    },
240 |    "source": [
241 |     "When attached to the `trainer`, this handler follows the same procedure used by [fastai](https://docs.fast.ai/callback.schedule.html#LRFinder). The model is trained for `num_iter` iterations while the learning rate is increased from `start_lr` (defaults to initial value specified by the optimizer, here 1e-06) to the upper bound called `end_lr`. This increase can be linear (`step_mode=\"linear\"`) or exponential (`step_mode=\"exp\"`). The default `step_mode` is exponential which is recommended for larger learning rate ranges while linear provides good results for small ranges."
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 27,
247 |    "metadata": {
248 |     "id": "qEA0T0af3iU1"
249 |    },
250 |    "outputs": [],
251 |    "source": [
252 |     "lr_finder = FastaiLRFinder()\n",
253 |     "\n",
254 |     "# To restore the model's and optimizer's states after running the LR Finder\n",
255 |     "to_save = {\"model\": model, \"optimizer\": optimizer}\n",
256 |     "\n",
257 |     "with lr_finder.attach(trainer, to_save, end_lr=1e-02) as trainer_with_lr_finder:\n",
258 |     "    trainer_with_lr_finder.run(train_loader)"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {
264 |     "id": "lzhRX59cRDXO"
265 |    },
266 |    "source": [
267 |     "Let's plot how the learning rate changes within our specified range and print the suggested learning rate."
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 28,
273 |    "metadata": {
274 |     "colab": {
275 |      "base_uri": "https://localhost:8080/",
276 |      "height": 300
277 |     },
278 |     "id": "oN0VkPapmvd5",
279 |     "outputId": "b2f3fc64-046e-43a4-cd7a-e604003b8c6f"
280 |    },
281 |    "outputs": [
282 |     {
283 |      "data": {
284 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdd3zU9f3A8dfnsncIWZAACRBGCBsDsgQnikKtoqJYRxVnbW1rqz8VR21rp1ZLnbXiKuJGRXFBlSUEZIMQwkiYCWTv3L1/f9wlJpBxCTcy3s/HI4/kvuve901y7/tsIyIopZTquizeDkAppZR3aSJQSqkuThOBUkp1cZoIlFKqi9NEoJRSXZwmAqWU6uJ8vR1Aa0VHR0tSUpK3w1BKqQ5l/fr1eSIS09i+DpcIkpKSyMjI8HYYSinVoRhj9je1T6uGlFKqi9NEoJRSXZwmAqWU6uLc2kZgjJkG/APwAV4UkcdP2t8bWABEOo65V0SWtPZ5qqurycnJoaKiwgVRK9U5BAYGkpiYiJ+fn7dDUe2c2xKBMcYHmA+cB+QA64wxi0Vke73DHgAWicgzxphUYAmQ1NrnysnJISwsjKSkJIwxLoheqY5NRDh+/Dg5OTkkJyd7OxzVzrmzaigdyBSRLBGpAhYCM086RoBwx88RwKG2PFFFRQXdu3fXJKCUgzGG7t27aylZOcWdiSAByK73OMexrb6HgTnGmBzspYGfNXYhY8xcY0yGMSYjNze30SfTJKBUQ/o/0T4cLapgb16pt8Nolrcbi2cDL4tIInAR8Kox5pSYROR5ERkjImNiYhodD9E6IrBmDbz3nv27rsmglHKxdftO8Pq3+xn7hy+5+KlvKKuq8XZITXJnIjgI9Kr3ONGxrb6fAosARGQ1EAhEuzEmWLIEeveG886D66+3f+/d2769k9i4cSNL2vB6pkyZ0uhgvaa2n2znzp2MGDGCkSNHsmfPnlY/v2qdP/7xj/Tv35+BAweydOlSb4ej6rHahFnPrub+97YCUFplZem2I16OqmnuTATrgBRjTLIxxh+4Clh80jEHgHMAjDGDsSeCxut+XGHJErj8csjJgZISKCqyf8/JsW/vJMmgrYmgtaxWa4PH77//Ppdffjnfffcd/fr1a/F8EcFms7ksnpoa733i8vRzb9++nYULF7Jt2zY+/fRTbr/99lN+H8p7snJL6n5edMuZJEQG8fHmLpgIRKQGuBNYCuzA3jtomzHmUWPMDMdhvwJuNsZsAv4LXC/uWjtTBObOhfLyxveXl8Mtt7Spmqi0tJTp06czfPhw0tLSePPNNwH7dBh5eXkAZGRkMGXKFAByc3M577zzGDJkCDfddBN9+vSpO+53v/sdAwcOZOLEicyePZu//vWvAOzZs4dp06YxevRoJk2axM6dOwF46623SEtLY/jw4UyePJmqqirmzZvHm2++yYgRI3jzzTcpLS3lxhtvJD09nZEjR/LBBx84XnI5V111FYMHD+bSSy+lvKl7U09oaCi/+tWvGD58OKtXr67bvmTJEp588kmeeeYZpk6dCsDf//530tLSSEtL48knnwRg3759DBw4kJ/85CekpaWRnZ3d4PpJSUn85je/YejQoaSnp5OZmQnAhx9+yNixYxk5ciTnnnsuR48eBeDhhx/m2muvZcKECVx77bXs27ePSZMmMWrUKEaNGsWqVasAWL58OWeddRYzZ86kb9++3Hvvvbz++uukp6czdOjQZkswe/bsYdy4cQwdOpQHHniA0NDQumtOmjSJGTNmkJqaSkVFBTfccANDhw5l5MiRLFu2DICXX36ZO++8s+56F198McuXL6+7n3fffTdDhgzhnHPOobYN7KmnniI1NZVhw4Zx1VVXnRLTBx98wFVXXUVAQADJycn079+ftWvXtvj7U56x9VAhAEt/MZn05CjSk6PYerDQy1E1Q0Q61Nfo0aPlZNu3bz9l2ylWrxYJDRWxv9U3/hUaKrJmTcvXOsnbb78tN910U93jgoICERHp06eP5ObmiojIunXr5KyzzhIRkTvuuEP+8Ic/iIjIJ598IoDk5ubK2rVrZfjw4VJeXi5FRUXSv39/+ctf/iIiImeffbbs2rVLRETWrFkjU6dOFRGRtLQ0ycnJERGR/Px8ERH5z3/+I3fccUddPPfdd5+8+uqrdcekpKRISUmJ/O1vf5MbbrhBREQ2bdokPj4+sm7dulNe31lnnVW3HZA333yz0fvw0EMP1cWbkZEhaWlpUlJSIsXFxZKamiobNmyQvXv3ijFGVq9e3eg1+vTpI4899piIiCxYsECmT58uIiInTpwQm80mIiIvvPCC/PKXv6x7zlGjRklZWZmIiJSWlkp5ebmIiOzatUtq/16WLVsmERERcujQIamoqJCePXvKvHnzRETkySeflJ///OeNxiMiMn36dHnjjTdEROSZZ56RkJCQumsGBwdLVlaWiIj89a9/rbufO3bskF69ekl5efkpv4/p06fLsmXL6u7na6+9JiIijzzySN1xPXr0kIqKChH54fda3x133FH3OxURufHGG+Wtt9465Tin/jeUyz2yeJsMfGCJVNdYRUTkha/3SJ/ffiR5xRVeiwnIkCbeV73dWOw5hw+DpYWXa7HAodb3YB06dCiff/45v/3tb/nmm2+IiIho9vgVK1bUfcqbNm0a3bp1A2DlypXMnDmTwMBAwsLCuOSSSwAoKSlh1apVzJo1ixEjRnDLLbdw+PBhACZMmMD111/PCy+80GTVwGeffcbjjz/OiBEjmDJlChUVFRw4cICvv/6aOXPmADBs2DCGDRvW4mv18fHhsssua/G4FStWcOmllxISEkJoaCg//vGP+eabbwDo06cP48aNa/Lc2bNn132vLXXk5ORwwQUXMHToUP7yl7+wbdu2uuNnzJhBUFAQYB9cePPNNzN06FBmzZrF9u0/DFs544wz6NGjBwEBAfTr14/zzz8fsP/+9u3b12Q8q1evZtasWQBcffXVDfalp6fX9dNfsWJF3f0cNGgQffr0YdeuXc3eJ4vFwpVXXgnAnDlzWLFiBWD/fVxzzTW89tpr+Pp2uLkhu7yN2fmk9YzA18f+npPaw95LftuhIm+G1aSukwh69ICW6qNtNujZs9WXHjBgABs2bKirOnj00UcB8PX1rasDP53+3DabjcjISDZu3Fj3tWPHDgCeffZZHnvsMbKzsxk9ejTHjx8/5XwR4Z133qk798CBAwwePLhNsQQGBuLj49Pm1wIQEhLS7P763R5rf/7Zz37GnXfeyZYtW3juueca3M/613viiSeIi4tj06ZNZGRkUFVVVbcvICCg7meLxVL32GKxtLmOv6XXAg3/DqD5v4Xa1/vxxx9zxx13sGHDBs4444xT4ktISGhQrZaTk0NCwsm9s5U3lFdZ2ZxTyJikqLptQ3pGYAx8seOoFyNrWtdJBGPHQguf1ImMhPT0Vl/60KFDBAcHM2fOHO655x42bNgA2Ou7169fD8A777xTd/yECRNYtGgRYP+0np+fX7f9ww8/pKKigpKSEj766CMAwsPDSU5O5q233gLsb+ybNm0C7PXXY8eO5dFHHyUmJobs7GzCwsIoLi6ue74LLriAp59+GnG0f3z33XcATJ48mTfeeAOArVu3snnz5la/9qZMmjSJ999/n7KyMkpLS3nvvfeYNGmSU+fWtrG8+eabnHnmmQAUFhbWvdEtWLCgyXMLCwvp0aMHFouFV1991SUNqOPGjav7/S1cuLDJ4yZNmsTrr78OwK5duzhw4AADBw4kKSmJjRs3YrPZyM7OblCXb7PZePvttwF44403mDhxYt1xU6dO5U9/+hOFhYWUlJQ0eK4ZM2awcOFCKisr2bt3L7t37ya9DX+7yvW+y86nxiakJ3er2xYR7Mc1Y3vzyur97Dpa3MzZ3tF1EoEx8Pzz4KhCOEVQEDz3nP24VtqyZQvp6emMGDGCRx55hAceeACAhx56iJ///OeMGTOmwafohx56iM8++4y0tDTeeust4uPjCQsL44wzzmDGjBkMGzaMCy+8kKFDh9ZVM73++uv8+9//Zvjw4QwZMqSuwfeee+5h6NChpKWlMX78eIYPH87UqVPZvn17XWPxgw8+SHV1NcOGDWPIkCE8+OCDANx2222UlJQwePBg5s2bx+jRo1v92psyatQorr/+etLT0xk7diw33XQTI0eOdOrc/Px8hg0bxj/+8Q+eeOIJwN4oPGvWLEaPHk10dNM9jG+//XYWLFjA8OHD2blzp1Of2Fvy5JNP8ve//51hw4aRmZnZZNXf7bffjs1mY+jQoVx55ZW8/PLLBAQEMGHCBJKTk0lNTeWuu+5i1KhRdeeEhISwdu1a0tLS+Oqrr5g3bx5Wq5U5c+bUNTrfddddREZGNniuIUOGcMUVV5Camsq0adOYP3/+aZfUlGtsO2iv/hnZq1uD7deM7QPA7qMlp5zjdU01HrTXrzY3Ftf6+GORxER7w3B4uP17YqJ9u4dUVFRIdXW1iIisWrVKhg8fXrevuLhYROyNnqNHj5b169d7LK72oH4De3tRWlpa11D93//+V2bMmOGya9c2PLuLNhZ73tNf7pI+v/1IqhwNxbVyiyukz28/kpdX7vVKXDTTWNz1WqEuuggOHIC1a+0Nwz172quDPDgc/8CBA1xxxRXYbDb8/f154YUX6vbNnTuX7du3U1FRwXXXXdfg06PyjvXr13PnnXciIkRGRvLSSy95OyTVjlkdzUGWk95TugX7YzGQW1zphaia1/USAdjf9MeO9drTp6Sk1NXTn6y2zr6raq73jrv9/ve/r2uHqTVr1izuv//+ujYZVzu57l91fFZHW5zlpM+WPhZD99AA8ko0EbiNiOgkW+q03H///dx///3eDsNlROfQ8gr7e1Hjk/5Ft9NE0CkaiwMDAzl+/Lj+4SvlII71CAIDA70dSpdjtQk+TXwojQ7116ohd0lMTCQnJ4empqhWqiuqXaFMeZZVBMvJ9UIOMWEBZOW2vympO0Ui8PPz01WYlFLtgsip7QO1YkIDyC2pbHdV2Z2iakgppdqL5quGAqiqsVFU0b7WJtBEoJRSLmRroWoIaHcNxpoIlFLKhWw2OWUMQa3oUHsiaG8NxpoIlFLKhawi+DRRIogO8we0RKCUUp2arYXGYoA8LREopVTn1VzVULdgf3wshlwtESilVOdla6ZqyGIxRIX4k1dc1eh+b9FEoJRSLmS1nTrhXH0x7XCaCU0ESinlQvbuo03vjw4L4Ji2ESilVOdlk6YHlAEkRAaRk1/mwYhapolAKaVcyNpMYzFAUvdg8suqKSyr9mBUzdNEoJRSLtTcyGKAPt3ty6fuP9F+Jp/TRKCUUi5ks9Fs1VBytD0R7DvefqqH3JoIjDHTjDHfG2MyjTH3NrL/CWPMRsfXLmNMgTvjUUopd7M6FqZpSu+oYAD257WfEoHbpqE2xvgA84HzgBxgnTFmsYhsrz1GRO6ud/zPgJHuikcppTxBmhlHABDk70OvqCA25RR6MKrmubNEkA5kikiWiFQBC4GZzRw/G/ivG+NRSim3a6mxGOCcQXF8szuX0sr2MR21OxNBApBd73GOY9spjDF9gGTgqyb2zzXGZBhjMnQVMqVUe2YVmm0sBpiWFk9ljY35yzLbxRK77aWx+CrgbRGxNrZTRJ4XkTEiMiYmJsbDoSmllPNEBJ8WFh9LT4rix6MS+NfyPTz60fbmD/YAdyaCg0Cveo8THdsacxVaLaSU6gScqRqyWAx/mzWcH49KYMGqfZRXNfoZ2GPcmQjWASnGmGRjjD/2N/vFJx9kjBkEdANWuzEWpZTyCKut+XEEtYwxTBsSj01g++EiD0TWNLclAhGpAe4ElgI7gEUiss0Y86gxZka9Q68CFkp7qChTSqnTJNL8OIL6hiVGArAlx7s9593WfRRARJYAS07aNu+kxw+7MwallPIkqwh+TpQIAOLCA4gODWDzQe92JW0vjcVKKdUp2KTlNoJaxhiGJUawVROBUkp1HjZb8wPKTjY0IYLMYyWUVXlvTIEmAqWUciFrK0oEYE8ENoHth7zXYKyJQCmlXMjWwgplJxuWGAHAun357gqpRZoIlFLKhextBM4fHxseSHpyFP9ekUWJl6ac0ESglFIu1Nzi9U35zQUDySupYsnmw26KqnmaCJRSyoWcHVBW3+g+3UiIDOKz7UfdFFXzNBEopZQL2aR1bQRg70Z6Xqp9RtLKGs9PN6GJQCmlXMjmxKRzjUntEU5ljY1jRZWuD6oFmgiUUsqF2lI1BBAXEQjAkaIKV4fUIk0ESinlQjYnZh9tTHy4IxEUaiJQSqkOzdaKSefqq00ER7VEoJRSHZtV2lY1FB7kS6CfRUsESinV0UkrB5TVMsYQFx7I0WJtLFZKqQ7N2spJ5+qLCw/kqJYIlFKqY3NmqcqmxIcHaq8hpZTq6KQNA8pqxUfYE4GnF2zURKCUUi5kFcGnje+sceGBVNXYKCirdm1QLdBEoJRSLnQ6VUNx4QGA5weVaSJQSikXEqFN3UfBe2MJNBEopZQLWUXaNKAM7FVDoIlAKaU6tNYuTFNfXN00E54dS6CJQCmlXERETqtqyN/XQnSoP4cLy10cWfM0ESillItYbfZun21tLAZI6h5CVm6pq0JyilsTgTFmmjHme2NMpjHm3iaOucIYs90Ys80Y84Y741FKqZN9vSuXT7e6ZolIRx5o88higP6xoWTmlrgkHme5LREYY3yA+cCFQCow2xiTetIxKcB9wAQRGQL8wl3xAJRXeX7lH6VU+/av5Zk8+uF2l1zLJqdfIugfG8qJ0ipOlFa5JCZnuLNEkA5kikiWiFQBC4GZJx1zMzBfRPIBROSYq4MQEX7z9iaGPryUwfM+5dU1+139FEqpDqygrJpDhRUcc0FPnR8SQduv0S82FICnv9rtsRHG7kwECUB2vcc5jm31DQAGGGNWGmPWGGOmNXYhY8xcY0yGMSYjNzfXqSfPPFZC5rESXvgmi0UZORRX1ADwyOJtfLLlMBc//Q0PvL/FK+uDKqXaj9pRvN9lF5z2tWrbCE6naii1RzgA/1m5j805hacdkzN8PfIszT9/CjAFSAS+NsYMFZEGvxEReR54HmDMmDEtpsgaq42Z/1xBqaMq6KKh8Zw7OI7yait/+HgHt72+AYCtB4uIDg3gF+cOcOVrUkp1IAXl9iqYjdkFXDAk/rSuZbPZv59O1VBceCDv3Daey55ZxYYD+QzvFXlaMTnDnYngINCr3uNEx7b6coBvRaQa2GuM2YU9Maxr7sLlVVaC/H2a3P/FjqOUVlmJCQvgslGJ/Ozs/oQE2F/q5JQYNuUUML5fNA++v5Vn/7eHCf2jOSMpqvWvUCnVoVVUW6motr97b3JBicAVVUMAo/t0Iz48kI0uiMkZ7qwaWgekGGOSjTH+wFXA4pOOeR97aQBjTDT2qqKs5i6aX1rFkIc+5d0NOZworeLllXvJPFaCzfZDQeHVNftJiAxizX3ncO+Fg+qSAECvqGAuHtaTqBB/HpqRSs+IIK57aS3r9+e74jUrpTqQ2mqhID8fNucU1lXttJVVTr9qqNbI3pGs35/vkXYCt5UIRKTGGHMnsBTwAV4SkW3GmEeBDBFZ7Nh3vjFmO2AF7hGR481dN6egnB4Cv1y0CWPs83oA+FoMKXFhHDheSmmVlXsuGNjiLyM2LJCFc8cxc/5K/vbZ97xx87jTft1KqY6jtlpofL/ufLnzGHtySxgQF9bm69V+IG3rgLL6pg6M5ZOtR8jYn+/2Ggu3jiMQkSUiMkBE+onI7x3b5jmSAGL3SxFJFZGhIrKwpWtGhfgzY3hP/HwM149P4sWfjGF2em+uH59ETFgAE1OiGd+vO7PTezsVY2x4IDNHJLB27wkKy+2fDmqsNo/PB66U8rz8Uvv//FkDYwDYcpqNs7UFitNpI6h18fAehAX68upq9/d09HZjcaslRAbx1OyRVFuH4+eY9Pvc1LjTuuZ5qbE8+789PPj+VgbGh/Hs8j3cclZf7jw7xRUhK6XaqUJHiWBEr0j8fAy7j53eQK66qiEXJIJgf18uG5XI69/uJ68klejQgNO+ZlM67BQTfm1d+aERI3t146cTk/lk62H+svR7iitr+OeyTAo9vDiEUsqz8h3/49GhASR1DyHzNBNBbdWQC/IAAHPG9aHaKrz/3cn9bFyrwyYCV7JYDA9enMqWhy/guwfP46OfTaTaKvz4mZVUVOs4A6U6q9rG4shgP/rHhrLnNKd2sLmwsRjso4zjwwPZfrjIJddriiaCegL9fOgW4k9aQgT/nD2SPbmlfL79qLfDUkq5SWF5NX4+hiA/H1JiQ9l/vPS0Pvy5YkDZyfrFhrDHzZPQaSJowgVD4ukREciijGxtOFaqkyquqCYs0A9jDP1iQ7EJ7Dve9jfd2sZi46q6IaBfTChZx0rc+j6kiaAJFou9V9I3u/P489LvdSoKpTqh4ooawgPtfWZSYu3dRk+nncDmwsbiWv1jQymurCG32H2L1WgiaMbNk/py2ahEnlm+h5++nKHJQKlOprZEANA3JgRjYPfRtieCH6qGXBIeYC8RAG6dmloTQTMsFsPfrhjO4z8eyorMPBauzW75JKVUh1FcUUOYo0QQ6OdD76jg03rDrS0RuLJqqHdUMAA5J9y3apkmAidceUYv0pOi+NfyzAYNSYsysln2vctnzlZKeUj9RADQPyaUPadTNeSYdM6VVUPxEYFYDOTkl7nsmifrcAPKvMEYwy/OS+HqF77lsY+3syWnkMSoYD7ebF/V6JUb05k8IKbueBFx6ScCpZR7FNWrGgJ7ffw3u/OosdrwbUP9jqu7j4J9zFR8eCA5Be4rEWgicNL4ftFM7B/Na2sOALApp5C+MSEUlVdzxxsbuH1Kf5K6B5NXWsU/vtjF4z8edtojnpVS7nVKiSA2lCqrjez8cpKjQ1p9Pau4dkBZrcRuweTkayJoF56ZM4o/fbqTC4bEM6ZPFP6+Fp77eg9//vR7/vTpzgbH3vrael75aTrj+0V7KVqlVHOsNqGksuaUEgHYew61JRHY3DCOACChWxBr955w6TXr00TQCmGBfjz2o6ENtt0wPpluwf6cPSiWXUeLWbBqP7dP7cev39rE1S98y9zJfbn73AHNrp+glPK8kkr7qoXhJ5UIAHYfK+a8NpToXTnpXH2J3YL4YGM51VabS6fXqaWNxacpyN+H2em9iQsPZFJKDC9eN4ZRvbvxfxcOBuD5r7P4bPuRBufUWG18ueMoizKytUuqUl5SXGGfXqJ+1VBYoB/x4YFt7kJaVWNvLfZ1cYmgd1QwNsFt1UNaInCTcwbH8sSVw7n7zU088P5Wqq3CoYJyjpdUsnxXLvuP23sAfJt1gr9dMdzL0SrV9dSuYx5er2oI7DORrt17ok2dPmp79vSMDHJNkA59HWMJsnLbVmXVEi0RuIkxhktHJnJ+ahzFFTX8+q1NPPHFLv67NpuSihqeuWYUcyf35Z0NOazYneftcJXqcorKa0sEDRPBpAHRHCwoZ+G61k8vs/9EGX4+xvWJwPHmn+WmOYe0ROBm9144iPTkKGLCAhgYH0ZydAgWY/DzsXD24FiWbDnMHz/ZwYf9JrpkVSOllHPKHGOCggMatt9NTrF3Bb/v3S0E+/swc0SC09fcf7yUXt2CXd5Y3C3En27BfmTluWd0sZYI3KxvTCg3TerLzBEJDIoPJ8DXp66xJ8DXh3suGMi2Q0Vc+swqXlm9r67XQX5plRejVqrzq63P9z+p8bVXVDDPzhkNwD+/ymxVqWBfXhm9uwe7Lsh6+saEum0WUk0EXnbJsJ5cPbY3BWVVzPtgG498uI0lWw4z6rHPWZmpVUZKuUulIxEE+p36NjgtLZ5HZw5h97ESpxtoRYQDJ8pI6u76OnyAgfFh7DhUVDefkStpIvAyi8Xwh0uHsvzXU/jpxGQWrN7P7a9vQAR+//EOSh1d3JRSrvVDiaDxrt0jekUCsNnJdYyLKmooqawhwcXtA7XOSOpGcWUNO4+4fpEapxKBMSbEGGNx/DzAGDPDGOPX0nnKecbYV0mbf/UozhoQwy1n9WXnkSJuWpBRV12klHKd2q7bAY2UCAAGxYfj72Nhc06BU9fLK7FPEx0T5p61hcf0iQIgY1++y6/tbGPx18AkY0w34DNgHXAlcI3LI+ripg/rwfRhPQBI7h7Cve9u4dGPtnP/9MFuGUiiVFfVVBtBLX9fC4N7hDldIshzrBfQPdTfNQGeJLFbELFhAWzKdi4xtYaz7yxGRMqAHwP/EpFZwBCXR6MauPKMXswZ15uXV+3j6S93ezscpTqVukTg2/TbYEpcmNM9dfJK7B08okPdUyIwxtA/NpSsPNc3GDudCIwxZ2IvAXzs2KZzJriZMYbHfjSUmSN68uzXWXx3IJ+jRRXUWG3eDk2pDq+2sTigmUTQOyqYo0WVTq1jfLzUXiJwVyIASI4OYa8XE8EvgPuA90RkmzGmL7CspZOMMdOMMd8bYzKNMfc2sv96Y0yuMWaj4+um1oXfNcy7OJX48EBufHkdEx7/iqtf+FYbkZU6TVU1NiyGZqeb7uPoCnrgRMtrAeQVV2IxEBXinqohsCeCwvJql3cvdyoRiMj/RGSGiPzJ0WicJyJ3NXeOMcYHmA9cCKQCs40xqY0c+qaIjHB8vdjaF9AVdA8N4MXrxlBebaXGJqzdd4LfL9nh7bCU6tCqrLZmq4Xgh9XBaqeEaU5uSRVRIf4uH0xWX+30Eq6uHnK219AbxphwY0wIsBXYboy5p4XT0oFMEckSkSpgITDz9MLtugbEhfH2reP55jdTmTu5L298e4CntN1AqTarrLYS4Nt8DXcfx5gAp0oEJZV0D3FftRBAkiMR7PNGIgBSRaQI+BHwCZAMXNvCOQlA/UV+cxzbTnaZMWazMeZtY0yvxi5kjJlrjMkwxmTk5uY6GXLnk5YQQa+oYH47bRAXDInj+a+znKq7VEqdypkSQbdgP0IDfDlwvOU33rySSqLD3FctBNRNX+HqdgJnE4GfY9zAj4DFIlINuKJz+4dAkogMAz4HFjR2kIg8LyJjRGRMTExMY4d0KT4Ww9xm5XsAACAASURBVDVj+1BSWcNyXTNZqTaprLE121AM9g4bvaOC2e9EieBYUSVxYYGuCq9R/r4WenULYq8Tiak1nE0EzwH7gBDga2NMH6Cl4W0Hgfqf8BMd2+qIyHERqXQ8fBEY7WQ8Xd6Z/boTHx7IXQs38uI3WW7pSaBUZ1ZZ03KJAOwNxgdaaCOw2oQjRRX0iHRvIgB79dBeF8855Gxj8VMikiAiF4ndfmBqC6etA1KMMcnGGH/gKmBx/QOMMT3qPZwBaAuok/x8LLx7+3hSe4Tz2Mc7uOgf35B5rITvjxRTY7Vx+TOr+NH8lSzKyOZYcYW3w1Wq3amqsTU5mKy+3lH29YKbm+Mnr6QSq02Ij3DP9BL1JUeHsO94aaunyG6OUyOLjTERwEPAZMem/wGPAk0OuRORGmPMncBS7GMOXnJ0PX0UyBCRxcBdxpgZQA1wAri+rS+kK+oZGcSCG9J5dc0+nv4qk5n/XEFpVcM2g43ZBYQG+PL6TWMZ7pg7RSllTwQBfi0Ph+rdPZgqq40jRRVNziN0qMA+MV3PCPeXCJKjQyirsnKsuJK4cOeeb+vB5kdHO1s19BJQDFzh+CoC/tPSSSKyREQGiEg/Efm9Y9s8RxJARO4TkSEiMlxEporIzuavqE4WEezHnWensHDuOIIDfOnu6MOcHB1C5u8vZNEtZ9ItxI+fLsjgSKGWDJSqVVljJcCJEkGfKHtPnf3N1MvX/m/FeygRQOsWqbnl1fXN7nd2rqF+InJZvcePGGM2Oh2FcruRvbux9v/OwRhTN/mVr4+F9OQoXrruDGbOX8kvF23kxevGEOyv6xEpVVVjIySg5f+FXlH2UkDOiXLo1/gxhxyJoKeHqoYA9h0v5cx+3Z06p6Cs+QFozpYIyo0xE2sfGGMmAO5ZRVm1We36qtGhAQ2GuafEhXHXOSms2nOc8Y9/pSUDpXB0H3WiRFBb/XKkqOn/myOF5QT6WYgMdv+kzD0jgvD3tTjdQaSqxnZKlfHJnP1oeCvwiqOtACAfuM7Jc1U7cPOkvkQE+XH/e1uY9exKfh1ewMWx4JOQAGPHQisX6Vaqo6usdq7XUKCfD1Eh/hxu5gPUocIKekQEtXqx+7awWAxJ3YOdTgSFjrWZm+NUIhCRTcBwY0y443GRMeYXwGanIlFe52MxzE7vTa/Vyxnw0K8JLivG6u+LjwEiI+G55+Cii7wdplIeU2VteRxBrfjwQI42UyI4XFBODw+0D9RKjg5xetnKwvKW5yVq1QT3IlLkGGEM8MvWnKvagSVLmHjfrcQW5hJaXYF/aQmUlEBODlx+OSxZ4u0IlfIYZ0sEAD0iApstERwprPBIQ3GtlNgw9uWVUuLE5JP5ZS2XCE5npROtS+hIRGDuXChvommnvBxuucV+nFJdgL1E4Nxs+vERgRwpbPx/x2oTjhZXeqShuNb4ft2psQnfZh1v8dgCNycCfcfoSL79FgpbWGmpoADWrvVMPEp5WZWTI4vBXjWUX1bd6NxeucW1g8k8VyIY1acbgX4Wvt7V8txrLfUYghYSgTGm2BhT1MhXMdDT6aiV9x0+DJbm/+jFYoFDhzwUkFLeVVljdToRJHSzf9pvbDrqQ46SQk8PTC9RK9DPh3MHx/HftdktrqnsTGNxs3dBRMJEJLyRrzAR0c7oHUmPHmBrfmWzispqqmLjPBSQUt5jswnVVnG6sXhU724ArN134pR9hwscg8nCPVc1BPDYj9Lw9TG8u+Fgs8cVlFW3uEaCrobeVYwdCxERzR6S7x/CM2XRHgpIKe+psra8XnF9fboHExce0Gid/PdHirCYHwZ6eUpksD8pcWHsOlrc7HH5ZVVEBjU/vkETQVdhDDz/PAQ1/qlFgoL44NZ5zF++h8xjzi3WrVRHVduAGhbo3AAwYwxjk7uzdu+pJYItBwtJiQ0jyN/zy7gPiA1ldwv/r3kllXRrYflMTQRdyUUXwdtvQ2IihIZCeLj9e2Ii5u23ufzBuQT6Wfi/d7dga2amRaU6utp5g2qXonTGiF6RHCuubDCeQETYcrCQtITmS9vukhIXSm5xZbMNwpnHSujbQmlFE0FXc9FFcOAAfPEFvPyy/fuBA3DRRcSEBXD/9MGs3XeCm1/J4IONzdc9KtVR1S492acViWBYov3NfkvOD73vjhZVkldSxdCEcNcG6KSU2DCAJksFlTVW9h0vY0BcWLPX0QbfrsgYe5tBI64Y04tDBRX848vdfPX9Mcb3iyYmzL3rsCrlaQdOlOFjMXW9gZyR2jMci4HNBws5N9XeqaK2ZNEvNtQtcbYkJc7+vLuOFnNGUtQp+/fmlWK1Sd1xTdESgWrAGMPd5w3g87snIwKPf7KT977LobJG10ZWncf+42X0jAzEz4lJ52oF+/syuEc4b2Vkc9Cx/kBOvv17YjfnSxau1DMiiGB/H3YfbbxEsMuxvaUSgSYC1aiUuDDSk6J4Z0MOd7+5iTte/46qmua7nyrVUew/UVa3zkBr/OmyYRwvqWLBqn3AD4nAk2MI6rNYDCmxoew+1njPoYOO+Pp0bz5RaSJQTXr95rGsu/9cHr4klS92HOWnC9bx8ebDjY6uVKojKS6vbtOU0WkJEQzvFVHXeygnv4y48ACnp6pwh/6xYU2WCArLq/H3sRDUwkpsmghUk/x8LMSEBXD9hGR+N3MIKzLzuOONDdz7zmaXrpeqlKc5u3B9Y85IimLrwULKqmrIyS/3WrVQrUHxYRwrrqxbLrO+wvIqIoL9WpweWxOBcsq1Zyax5K5JTB/Wg/c3HuLGl9fxzvocaqxaXaQ6ntZMQX2ycX3tE76lzlvK6qzjJLaiwdkdzh9ib7helJF9yr6CsuoWB5OB9hpSrTC4RzhPXzWSpO7BvLbmAMu+z+VQQTk/OyfF26Ep1SpVNc6tTtaYif2jmZQSzTe78xjVO5LLRye6OLrW6dM9hLHJUTz5xW5sNuGX5w+s21dQ5lwVmJYIVKtYLIZ7LhjExnnnMbF/NAvXZevgM9XhtGbm0ZNZLIbnrx3DRz+byLu3T2BSSoyLo2u9p2aP5OxBsTy9LLPBJHQF5dVEOFEi0ESg2sQYwxVn9OJgQTlPfbWb/NKWp7pVqr2osrY9EQAE+ft4bTRxY+LCA3nyqhEALNv5w9TUReXVRAQ1P70EaCJQp+GitHimDIzhyS92M/nPy/hmd8tzoyvlbVabYLUJ/j7e6+njDuGBfiR2C2rQlbSgrMr7VUPGmGnGmO+NMZnGmHubOe4yY4wYY8a4Mx7lWr4+Fl74yRheuTGd7qH+/HXp994OSakW1Y6HOZ0SQXs1oF5X0qoaG6VVVqcai912J4wxPsB84EIgFZhtjElt5Lgw4OfAt+6KRbmPn4+FyQNimDOuD5tyClucElcpb+vMiSAlLoysvBKqrba6BWkivFwiSAcyRSRLRKqAhcDMRo77HfAnoOmVoVW7N2NET0IDfLnmxW/ZcbjI2+Eo1aRKq31AZGdMBAPiQqm2CvuPl1JYbm+383ZjcQJQv2NrjmNbHWPMKKCXiHzsxjiUB8SGBfLObePxMYbZL6xh68EW1kdWyktqSwQBbew+2p7Vzim062gJRworAYgObXnSSK/dCWOMBfg78Csnjp1rjMkwxmTk5mqDZHs1MD6MRbecSYi/L7e9vl6nolDtUmeuGuoXE4oxsPtoCdsO2T+MDe7R8hTZ7rwTB4Fe9R4nOrbVCgPSgOXGmH3AOGBxYw3GIvK8iIwRkTExMd7vs6ua1rt7MH++fBjZJ8p54vNd3g5HqVO0dpnKjiTI34de3YLZdayYrYeKSIgMIqqF1cnAvYlgHZBijEk2xvgDVwGLa3eKSKGIRItIkogkAWuAGSKS4caYlAdM6B/NNWN789zXWXy+/ai3w1GqgboSQSesGgJ7O8GuI8VsO1jIkJ7OLZjjtjshIjXAncBSYAewSES2GWMeNcbMcNfzqvbhoUuGMCg+jAfe31LXe0Gp9qAzVw0BjO4Txe5jJWTllTK2b3enznHrXEMisgRYctK2eU0cO8WdsSjP8ve18OfLh/Gj+Sv5/cfbmdA/mnMGxxEaoNNbKe/q7Ing5knJnCitxM/HwvXjk5w6R/8rldsMS4zk5kl9ee7rLBZl5HD9+CQenjHE22GpLq6yE7cRgH2g5/3TTxmy1azOeSdUu3H3eQPqFv3+bNsRnaBOeV1nbyNoC70Tyq0C/Xz44I4J/OuaURwqrODprzK9HZLq4jp71VBb6J1QbmeM4cK0eGaO6MnTX+3meEmlt0NSXZiWCE6ld0J5hDGG26f0p8YmfLT5sLfDUV1YZx5H0FZ6J5THDIwPI7VHOAtW7aOyRkcddyXvbsjhj5/s8HYYgFYNNUbvhPKoe6YNJCuvlH+v2OvtUJSHiAi/XLSJ5/6Xxao9ed4ORxNBI/ROKI+aOjCW81PjePrLTA4XliOivYg6u60Hf5iN9tn/ZXkxEru6qiFtI6ijd0J53IMXp1JltXHmH79izr+/xapdSju1FZn2UsDs9N6szMzjeEkly3Ye89r0I5XaWHwKvRPK43pFBXN+ahwAKzOP869lmTq+oBPbk1tCbFgA143vg9UmfLzlMDe8vI6bX8kgK7fE4/FU1djw8zFYLMbjz91eaSJQXvH4ZcN47trRnDs4lr99vosrn19NaWWNt8NSbrAnt4R+MaEMig9nQFxogyVNn/3fHo/HU1Vj09LASfRuKK+ICPLjgiHxvPCTMfxu5hDW7cvn4y3arbSzERH2HCuhf2woADOG96Sowp7wx/WN4rPtR6l21Nl7SpXVqg3FJ9G7obzKGMOccX1I6h7M2+tz2HaokPIq7VraWeSWVFJUUUO/mBAArh7bh2vH9eHeCwdx44RkCsqqWb3nuEdjqqqxEeDr49HnbO80ESivq00Ga/eeYPpTK/hLvaoD1XHZbMID720FYETvbgBEhfjzux+lcetZ/Zg8IIYQfx+WeLgkWFppJThAE0F9mghUu/DTicn84twUAD7cfEi7lXYC/16xl8+2H2XexamM6BV5yv5APx/OGRzH0m1HqPFg9VBpVY1Oh34STQSqXTDG8ItzB/Dny4eRW1zJun353g5JnaY31h5gfL/u3DAhqcljzh8SR35ZNVsOFnosrtLKGkL8NRHUp4lAtSsXpsUTHx7Ife9upqJa2wo6qoKyKvbmlTIxJRpjmu6meaZjBa1VHmwnKKm0EqJVQw1oIlDtSligH3+dNZw9uaUMevBT7nxjAwVlVa2+js0mrN9/QquYvGRzjv0T/ojEU6uE6useGsCg+DA+236UsirPdB8uq6ohRKuGGtBEoNqdiSnR3H3uAEb1juTTrUf4x5e7W32NdzbkcNkzq3l3w0E3RKhasjmnAIA0x6JEzZmd3ptN2QU8vHibu8MCHFVDmgga0ESg2qWfn5vCu7dPYMbwnixal01ucSWbsgu4+ZUMth8qavK8oopqPth4kN++sxmAP36yk6KKak+FrRyOFlXSLdiP8EC/Fo+9bnwSl45M4PPtRz0y3UhJpTYWn0wTgWrXbp3Sj9IqK2f8/gtmzl/J59uP8pOXvuXt9TkNjjtUUM7M+SsZ94cv+fnCjdjE3t5wvLSSRz/crvMZeVhxRTXhQS0ngVpTBsaQX1bNJkdJwl2sNqGi2kawv7YR1KdpUbVrA+LC+N3MIbyz4SDnD4ljYFwYT32Vya/f2sSnWw8zsX80hwor+P5IMZuyCxjRK5LfThuEiDCqTzf6fZXJP5dl0jsqmLvOSfH2y+kyiipqCAt0/u3lrAExBPn5MO+Drbx1y3iC3PRGXepoh9ASQUN6N1S7d+2ZSVx7ZlLd4ykDY5m/LJMFq/bxxY5j+FoMNTbhhglJPHTJkAbn/vqCgWzKKeDNddncObW/TjTmIUXl1U5VC9WKDPbnqdkjufmVDBas3setZ/VzS1y181lpG0FDejdUh+NjMdx1Tgq3ntWPvXml9IsJ4UhRBXHhgY0ef9moRH7x5kbW7TvBWEd3ReVexRU1JEUHt+qc81LjOGtADM/9bw/XnZnkllJBaaW9S7JWDTXk1jYCY8w0Y8z3xphMY8y9jey/1RizxRiz0RizwhiT6s54VOfi72thYHwYvj4WErsF49fEjJLnD4kj2N+H9zdqDyJPKapoXYmg1m1T+pFfVs2Hmw45fU5VjY0PNh6kxInZa2tLBFo11JDbEoExxgeYD1wIpAKzG3mjf0NEhorICODPwN/dFY/quoL9fZk2JJ6PNh926s1Cnb6i8tY1FtcamxzFwLgwXl97oNH9x4ormPfBVtbuPVG3bcGqffx84UYueOJrih09xESE/3tvCwtW7WtwvlYNNc6dJYJ0IFNEskSkClgIzKx/gIjU7wcYAmjXDuUWPxmfREllDY9/skMHmblZjdVGaZW1VY3FtYwxXDY6gU3ZBezLKz1l/x2vb+CV1fv5x5e7AHsvoJdW2te/PlRYzh+W7OS1NftJvm8Jb3x7gIcWb6tboxig1DGzrZYIGnJnIkgAsus9znFsa8AYc4cxZg/2EsFdboxHdWEjekVyw/hkXltzgLsWbuREaetHKyvn1Ja62lI1BHDxsJ4A/OhfK9lbLxlkHiupm4Nq3b58yqpqOHCijMOFFfz5smHMndSX/649wAPvb8VioFdUEABf7PhhScySSnuJQdsIGvL6OAIRmS8i/YDfAg80dowxZq4xJsMYk5Gbm+vZAFWn8cD0wdx1dn8+3XqYP32y09vhdFpF5fZE0JYSAUDPyCCevHIEBWXVPFdvBbMPNx3CYuAfV42gqsbGFzuOsTfPvtRlv9gQ7rlgINed2YdbzurLpofOZ/mvp9IjIpA319k/j24/VMT8ZXvoFuxHfETjHQu6KncmgoNAr3qPEx3bmrIQ+FFjO0TkeREZIyJjYmJiXBii6kosFsMvzx/I1em9eWdDDgcLyr0dUqdUO5K7LW0EtX40MoGrzujF+xsPkpNfBsC3e4+TlhDBxcN6khwdwgtfZ5GVay8xJEeH4utj4ZGZadx34WDCAv3wsRguH53I17tzWbbzGJc/u4pjRRXMv2YUwTr7aAPuTATrgBRjTLIxxh+4Clhc/wBjTP0RPtOB1k8qo1QrzT2rH8bQ4NOmcp26RNDGqqFat03ph5/Fwt1vbqSqxsZ3BwoY0ycKH4vhpxOT2XKwkA83HSI80JduwY0/15xxfYgPD+SGl9dRVmVl8Z0TGd8v+rTi6ozclhZFpMYYcyewFPABXhKRbcaYR4EMEVkM3GmMOReoBvKB69wVj1K1EiKDuHx0Iq9/e4Diihr25JZw77RBjOzdjWqb7bTfwLq6onJ7Imhr1VCtPt1D+NX5A3j4w+0MeOATAM5Isq90dsmwnjy8eBubcgoZ3iuyyamu48IDee/2Cfx7RRZJ0SEkRYecVkydlVvLRyKyBFhy0rZ59X7+uTufX6mm3D89ldziKt77zl5befWL3+JjMVhtwqD4MCb2j2bDgXxmjenFRUN7EHEa1RxdzdGiSgBiwwNO+1qXjkrk4Q+3A/aBgZMH2KuGI4L9mDools+3H2V4CzOcxkcEcv90HaLUHNPRutKNGTNGMjIyvB2G6gREhOXf5xIZ7MfSbUfx8zEE+vmwbOcxNhzIp/48dX++bBhXnNGL4opq9uWVMdSJ6ZW7qj99upMXvs5i12MXumRKj29259It2J+0hIb3/HhJJfuOlzI8MRLfJgYTqh8YY9aLyJjG9mmLieqyjDFMHRQLwEjH4uoAd0ztj80mHDhRxq2vrcfXx/Cbdzbz+toDbMouwBh497bxDc5RPzhaaJ/uw1XzOk1KabyDSPfQALqHnn6pQ7WD7qNKtUcWiyEpOoRPfzGZd2+bwM2Tkil0rJQW6OvDwx9u14FpTbDP+6Rv0B2JlgiUaoG/r4X7p6dy34WDycor5du9x7n/va2syTrBmf10EruTHSmqYFB8mLfDUK2gJQKlnGSxGPrHhnLZqERiwwJ4aPFWyh1TFqgf1FYNqY5DE4FSrRTo58NfZw1n19ESXluz39vheNTxkkp2Hy1ucn9RRTWlVVbiNRF0KJoIlGqDyQNiSE+K4pU1+7rUMpj3vbuF8574mj8s2dHo/mU7jwEwLDHSk2Gp06SJQKk2unFiEtknyuvGInRmIsKqzDyWfW9/o3/+6yzufnMjd7y+gUP1pup4/7uD9IgIZGxylLdCVW2giUCpNrpgSDzDEyP422ffU1HdudsKPtt+lKtf/JZqq/C3WcM5d3As7313kKXbjnDpv1ayL68Uq01Yk3WC81PjdEnQDkYTgVJtZIzh3gsHc7iw4pQFUDqb2hk8ASalRPPctWNYctckPr5rEhXVNn7zzmb25pVSXm1lSIIOtutoNBEodRrO7NedcX2jWLguu9ONK7DZhJdW7OWaF9fw1c5j3HJWX5b+YjKx4YH4WAypPcMZGB/GPRcMZO3eEzz4/lYAhvQM93LkqrU0ESh1mi4e1pO9eaV830xvmo7oX8szefSj7azMPA7A3El9GdjI+IDZ6b2ZPqwHq7OO4+djSInVMQQdjQ4oU+o0XTAknkc/2s4tr67nrVvPJDas43edPFhQzlNfZjJ9WA8mp0QTGuDX5HQOPhbDk1eOAKCssgZ/X/182dFoIlDqNMWEBfDKjenMefFbfvfRDuZO6kvfmJAOvUD6f1bsxSrC/100mITIoBaP9/OxMP/qUZ2ueqyr0NStlAuM69udq9J78eGmQ1zyzxXc9+4Wb4fUZqv3HOeV1fu5ZFgPp5JAfU2tC6Dat477kUWpdubhS4YwOSWGfy3fw4ebD3HgRBlRIf78+7oxHeoN8t8rsuge6s8jM9O8HYryEC0RKOUivj4Wzh8Sz3+uP4OeEUFszC7gq53HWP59rrdDa5VDBRWk9gjXxXi6EE0ESrlYtxB/ltw1iWfnjKZHRCB/Wfo91Vabt8Ny2uHCcnpEdvwGb+U8TQRKuUFEsB/T0uJ5eMYQth8u4v73tlDgWM+gPauotpJfVk2PiNa1DaiOTROBUm50wZB4bpncl0UZOUz80zKWbjvCZc+s4rsD+XXHHC+p9GKEDR0prADQ2UO7GE0ESrnZvRcO4tWfphPs78Mtr65n/f58frVoE8eKK3j92/2MfuwLFq49QFZuCdknyrwa66FC+wRyPSI0EXQl2mtIKTczxjApJYa3bx3PE1/sYk9uCZtzCkn//ZcA+PkYHlq8DWOgotrGX2cN5/LRiS557mNFFYQH+RHo5+PU8bUlgh6t7DaqOjZNBEp5SO/uwTxx5QiqamyszMxjx5EifC2G81LjueK51ZworSI5OoR5H2zFZhOuOKPXKdeorLES4Ovcm3q11cZFT60gLjyARbec6dQAt9oppbVqqGvRRKCUh/n7Wpg6KJapg2Lrtr13+3gqqm2EBfpy13+/4zfvbMZiMVw2KoFqq7App4C/f7aL77Lz+fiuSfSLCW3xeVbvOU5eSSV5JZX8fOF3zL9mVItJZP/xMmLDAgjydy7ZqM7BrYnAGDMN+AfgA7woIo+ftP+XwE1ADZAL3CgiXWvtP6WAxG7BdT+/dtNYrv/PWu57dzP/WbmX7YeLEIHo0ACqrcI5f/sfI3tHkhwdQnL3ELqHBvDqmv0UlFVx//TBXDysJwCfbD1MaIAvvzg3hcc+3sHkPy/jyjG9uPu8AU0OcDtwoozeUcGN7lOdl9sSgTHGB5gPnAfkAOuMMYtFZHu9w74DxohImTHmNuDPwJXuikmpjsDPx8K/rhnN7OfXsO1QEYPiw5id3psrxvTiiS92sXDtAfx9LHy54xiF5dUADE+MICzQlzvf+I6ySis/HpXA0m1HOXtQLDc55j568Zu9PPVVJn4+Fn52TkqD59x9tJiQAF+yT5Qxrl93b7xs5UXuLBGkA5kikgVgjFkIzATqEoGILKt3/BpgjhvjUarDiAjy461bz+Tz7UeZlhZf19h777RB/HbaIHwcK4D9YckODhdW8PcrhmO1CTctyGDe4q1sO1TIidIqLhoaD8DZg+KYOjCWuxZu5KmvdnPh0B70jw1lS04hv3prI7uOlhAXHsDRokotEXRB7kwECUB2vcc5wNhmjv8p8Ikb41GqQwkJ8OVHIxMabDt5Ccj/u2hw3c9+PvDXWcOZOX8FC1bvJyEyiLMG/NAOYYzh4UtS+XLHUZ74YhcVVVa+3HmMYH8fpgyMqZsKQxNB19MuGouNMXOAMcBZTeyfC8wF6N27twcjU6pjiY8I5JvfnI1NhABfyyltAd1DA7j2zD4897+sum3zrx7F1EGxfLDxIG+uy+ZMrRrqctyZCA4C9fu/JTq2NWCMORe4HzhLRBodYikizwPPA4wZM0YnPFeqGS0tDHP3uQMor7JSbRX+cGlaXbKYOSKBmSMSmj1XdU7uTATrgBRjTDL2BHAVcHX9A4wxI4HngGkicsyNsSilHAL9fHhUp5hW9bhtigkRqQHuBJYCO4BFIrLNGPOoMWaG47C/AKHAW8aYjcaYxe6KRymlVOPc2kYgIkuAJSdtm1fv53Pd+fxKKaVappPOKaVUF6eJQCmlujhNBEop1cVpIlBKqS5OE4FSSnVxmgiUUqqLMyIda6CuMSYXcNVU1RFAoYfOd+bYlo5pan9j253ZFg3ktRCTq3jyXjt7fHPHdOR73djzu/N8T97rxrbrvXbumD4iEtPoGSLSZb+A5z11vjPHtnRMU/sb2+7MNiCjM95rV9zvjnyvPX2/PXmvm7i3eq9PM56uXjX0oQfPd+bYlo5pan9j253d5imevNfOHt/cMR35Xrvi+T35t92ae93Ydr3Xbb8e0AGrhpTrGGMyRGSMt+PoCvRee47e69br6iWCru55bwfQhei99hy9162kJQKllOritESglFJdnCYCpZTq4jQRKKVUF6eJQDXKGDPFGPONMeZZY8wUb8fT2RljQowxGcaYi70dS2dmjBns+Jt+2xhzZ9ckAwAABORJREFUm7fjaS80EXRCxpiXjDHHjDFbT9o+zRjzvTEm0xhzbwuXEaAECARy3BVrR+eiew3wW2CRe6LsHFxxr0Vkh4jcClwBTHBnvB2J9hrqhIwxk7G/ib8iImmObT7ALuA87G/s64DZgA/wx5MucSOQJyI2Y0wc8HcRucZT8XckLrrXw4Hu2JNunoh85JnoOxZX3GsROeZYKvc24FURecNT8bdnbl2qUnmHiHxtjEk6aXM6kCkiWQDGmIXATBH5I9BcdUQ+EOCOODsDV9xrR9VbCJAKlBtjloiIzZ1xd0Su+rsWkcXAYmPMx4AmAjQRdCUJQHa9xznA2KYONsb8GLgAiAT+6d7QOp1W3WsRuR/AGHM9jpKYW6PrXFr7dz0F+DH2DzdLmjquq9FEoBolIu8C73o7jq5ERF72dgydnYgsB5Z7OYx2RxuLu46DQK96jxMd25Tr6b32HL3XLqCJoOtYB6QYY5KNMf7AVcBiL8fUWem99hy91y6giaATMsb8F1gNDDTG5BhjfioiNcCdwFJgB7BIRLZ5M87OQO+15+i9dh/tPqqUUl2clgiUUqqL00SglFJdnCYCpZTq4jQRKKVUF6eJQCmlujhNBEop1cVpIlCdhjGmxMPPt8rDzxdpjLndk8+pugZNBEo1wRjT7FxcIjLew88ZCWgiUC6niUB1asaYfsaYT40x6x0rrg1ybL/EGPOtMeY7Y8wXjnUXMMY8bIx51RizEnjV8fglY8xyY0yWMeauetcucXyf4tj/tjFmpzHmdWOMcey7yLFtvTHmKWPMKWsNGGOuN8YsNsZ8BXxpjAk1xnxpjNlgjNlijJnpOPRxoJ8xZqMx5i+Oc+8xxqwzxmw2xjziznupOjER0S/96hRfQEkj274EUhw/jwW+cvzcjR9G1t8E/M3x88PAeiCo3uNV2KctjgaOA371nw+YAhRin/DMgn0ahInYF5rJBpIdx/0X+KiRGK/HPn1ylOOxLxDu+DkayAQM8P/t3b1rFEEcxvHvE9BCTlJpEAVfwMJKwc5CsMl/IBYKgqD/gZ2FlXZaiEjsRSVdVFQEC0EEEVSMoAjGwurAF/RCOMQ8Fjsha7zIiTmit88HBm5vZnbmOO5+O7PLzDZgulZvHLhc8kaAm8D+1f4ekv6/lGWoY2hJagH7gMlygQ6Lm+xsAa5L2gSsBWZqVadsz9WOb9nuAl1JbWCMX7fvfGz7fWn3GdWfdgd4a3vh3FeBE8t0957tjwtdB86UHbnmqdbcH+tRZ7ykp+W4BewEHizTRkRPCQQxzEaAz7b39Mi7QLUF51TZrOR0LW92Sdlu7fV3ev9u+inzO/U2DwMbgL22v0l6RzW6WErAWdsTf9hWxE9yjyCGlu0vwIykgwCq7C7ZoyyuW390QF14Deyoba94qM96o0C7BIEDwNby/ldgfa3cXeBYGfkgabOkjX/d62icjAhimKyTVJ+yOUd1dX1J0ilgDXANeE41ApiU9Am4D2xf6c7YniuPe96RNEu1dn4/rgA3JL0AngCvyvk+SHooaRq4bfukpF3AozL11QGOAO2V/iwx3LIMdcQASWrZ7pSniC4Cb2yfX+1+RdRlaihisI6Xm8cvqaZ8Mp8f/5yMCCIiGi4jgoiIhksgiIhouASCiIiGSyCIiGi4BIKIiIZLIIiIaLgfKFx/TS6PY/4AAAAASUVORK5CYII=",
285 |       "text/plain": [
286 |        "<Figure size 432x288 with 1 Axes>"
287 |       ]
288 |      },
289 |      "metadata": {
290 |       "needs_background": "light",
291 |       "tags": []
292 |      },
293 |      "output_type": "display_data"
294 |     },
295 |     {
296 |      "name": "stdout",
297 |      "output_type": "stream",
298 |      "text": [
299 |       "Suggested LR 1.0148376909312998e-05\n"
300 |      ]
301 |     }
302 |    ],
303 |    "source": [
304 |     "lr_finder.plot()\n",
305 |     "\n",
306 |     "print(\"Suggested LR\", lr_finder.lr_suggestion())"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "markdown",
311 |    "metadata": {
312 |     "id": "NcT19wqkmvd6"
313 |    },
314 |    "source": [
315 |     "Now we will apply the suggested learning rate to the optimizer, and train the model again with optimal learning rate."
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "code",
320 |    "execution_count": 29,
321 |    "metadata": {
322 |     "colab": {
323 |      "base_uri": "https://localhost:8080/"
324 |     },
325 |     "id": "GtEYvL92RDXU",
326 |     "outputId": "fdc40ae4-8c0e-473f-8ae5-1d44d4cb8f9f"
327 |    },
328 |    "outputs": [
329 |     {
330 |      "name": "stdout",
331 |      "output_type": "stream",
332 |      "text": [
333 |       "1.0148376909312998e-05\n"
334 |      ]
335 |     }
336 |    ],
337 |    "source": [
338 |     "lr_finder.apply_suggested_lr(optimizer)\n",
339 |     "print(optimizer.param_groups[0][\"lr\"])"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": 30,
345 |    "metadata": {
346 |     "colab": {
347 |      "base_uri": "https://localhost:8080/"
348 |     },
349 |     "id": "DJqgyaFnmvd7",
350 |     "outputId": "bb1f62d5-c72e-45a7-b547-5a44b05f9efe"
351 |    },
352 |    "outputs": [
353 |     {
354 |      "data": {
355 |       "text/plain": [
356 |        "State:\n",
357 |        "\titeration: 1407\n",
358 |        "\tepoch: 3\n",
359 |        "\tepoch_length: 469\n",
360 |        "\tmax_epochs: 3\n",
361 |        "\toutput: 0.09644963592290878\n",
362 |        "\tbatch: <class 'list'>\n",
363 |        "\tmetrics: <class 'dict'>\n",
364 |        "\tdataloader: <class 'torch.utils.data.dataloader.DataLoader'>\n",
365 |        "\tseed: <class 'NoneType'>\n",
366 |        "\ttimes: <class 'dict'>"
367 |       ]
368 |      },
369 |      "execution_count": 30,
370 |      "metadata": {
371 |       "tags": []
372 |      },
373 |      "output_type": "execute_result"
374 |     }
375 |    ],
376 |    "source": [
377 |     "trainer.run(train_loader, max_epochs=3)"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": 31,
383 |    "metadata": {
384 |     "colab": {
385 |      "base_uri": "https://localhost:8080/"
386 |     },
387 |     "id": "MU8E7PpleaNm",
388 |     "outputId": "e308fcaa-92af-462b-9148-e64ec1532a34"
389 |    },
390 |    "outputs": [
391 |     {
392 |      "name": "stdout",
393 |      "output_type": "stream",
394 |      "text": [
395 |       "{'Accuracy': 0.9715, 'Loss': 0.0908882568359375}\n"
396 |      ]
397 |     }
398 |    ],
399 |    "source": [
400 |     "# Calculate the new metrics after using the optimal lr\n",
401 |     "evaluator.run(test_loader)\n",
402 |     "print(evaluator.state.metrics)"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "markdown",
407 |    "metadata": {
408 |     "id": "ejVpTNh3MJc2"
409 |    },
410 |    "source": [
411 |     "As we saw the accuracy increased and loss decreased on the test dataset when we trained our model for the same number of epochs with an optimal learning rate."
412 |    ]
413 |   }
414 |  ],
415 |  "metadata": {
416 |   "accelerator": "GPU",
417 |   "colab": {
418 |    "name": "fastai-lr-finder.ipynb",
419 |    "provenance": []
420 |   },
421 |   "kernelspec": {
422 |    "display_name": "Python 3 (ipykernel)",
423 |    "language": "python",
424 |    "name": "python3"
425 |   },
426 |   "language_info": {
427 |    "codemirror_mode": {
428 |     "name": "ipython",
429 |     "version": 3
430 |    },
431 |    "file_extension": ".py",
432 |    "mimetype": "text/x-python",
433 |    "name": "python",
434 |    "nbconvert_exporter": "python",
435 |    "pygments_lexer": "ipython3",
436 |    "version": "3.8.10"
437 |   },
438 |   "pycharm": {
439 |    "stem_cell": {
440 |     "cell_type": "raw",
441 |     "metadata": {
442 |      "collapsed": false
443 |     },
444 |     "source": []
445 |    }
446 |   }
447 |  },
448 |  "nbformat": 4,
449 |  "nbformat_minor": 4
450 | }
451 | 


--------------------------------------------------------------------------------
/how-to-guides/05-gradient-accumulation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "8a652d9a",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<!-- ---\n",
  9 |     "title: How to effectively increase batch size on limited compute\n",
 10 |     "date: 2021-08-04\n",
 11 |     "weight: 5\n",
 12 |     "downloads: true\n",
 13 |     "sidebar: true\n",
 14 |     "tags:\n",
 15 |     "  - gradient accumulation\n",
 16 |     "--- -->\n",
 17 |     "# How to effectively increase batch size on limited compute\n",
 18 |     "\n",
 19 |     "To effectively increase the batch size on limited GPU resources, follow\n",
 20 |     "this simple best practice.\n",
 21 |     "\n",
 22 |     "<!--more-->"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 1,
 28 |    "id": "dd80e226",
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "from ignite.engine import Engine\n",
 33 |     "\n",
 34 |     "accumulation_steps = 4\n",
 35 |     "\n",
 36 |     "def update_fn(engine, batch):\n",
 37 |     "    model.train()\n",
 38 |     "\n",
 39 |     "    x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)\n",
 40 |     "    y_pred = model(x)\n",
 41 |     "    loss = criterion(y_pred, y) / accumulation_steps\n",
 42 |     "    loss.backward()\n",
 43 |     "\n",
 44 |     "    if engine.state.iteration % accumulation_steps == 0:\n",
 45 |     "        optimizer.step()\n",
 46 |     "        optimizer.zero_grad()\n",
 47 |     "\n",
 48 |     "    return loss.item()\n",
 49 |     "\n",
 50 |     "trainer = Engine(update_fn)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "id": "c74f90a0",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "If you prefer to use the PyTorch-Ignite helper functions for supervised training mentioned [here](https://pytorch.org/ignite/engine.html#helper-methods-to-define-supervised-trainer-and-evaluator), they also support Gradient Accumulation through the ``gradient_accumulation_steps`` parameter. \n",
 59 |     "For example \n",
 60 |     "\n",
 61 |     "```python\n",
 62 |     "update_fn = supervised_training_step(model, optimizer, criterion, gradient_accumulation_steps=4)\n",
 63 |     "trainer = Engine(update_fn)\n",
 64 |     "```\n",
 65 |     "would result in the same Engine as above."
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "id": "a7c4ea05",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "## Resources\n",
 74 |     "\n",
 75 |     "1.  [Training Neural Nets on Larger Batches: Practical Tips for 1-GPU,\n",
 76 |     "    Multi-GPU & Distributed\n",
 77 |     "    setups](https://medium.com/huggingface/training-larger-batches-practical-tips-on-1-gpu-multi-gpu-distributed-setups-ec88c3e51255)\n",
 78 |     "2.  [Code](https://gist.github.com/thomwolf/ac7a7da6b1888c2eeac8ac8b9b05d3d3#file-gradient_accumulation-py)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "id": "4fcc2d56",
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": []
 88 |   }
 89 |  ],
 90 |  "metadata": {
 91 |   "kernelspec": {
 92 |    "display_name": "Python 3 (ipykernel)",
 93 |    "language": "python",
 94 |    "name": "python3"
 95 |   },
 96 |   "language_info": {
 97 |    "codemirror_mode": {
 98 |     "name": "ipython",
 99 |     "version": 3
100 |    },
101 |    "file_extension": ".py",
102 |    "mimetype": "text/x-python",
103 |    "name": "python",
104 |    "nbconvert_exporter": "python",
105 |    "pygments_lexer": "ipython3",
106 |    "version": "3.10.4"
107 |   }
108 |  },
109 |  "nbformat": 4,
110 |  "nbformat_minor": 5
111 | }
112 | 


--------------------------------------------------------------------------------
/how-to-guides/06-data-iterator.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "55c7fada",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "<!-- ---\n",
  9 |     "title: How to work with data iterators\n",
 10 |     "date: 2021-08-04\n",
 11 |     "weight: 6\n",
 12 |     "downloads: true\n",
 13 |     "sidebar: true\n",
 14 |     "tags:\n",
 15 |     "  - data iterators\n",
 16 |     "  - infinite iterator\n",
 17 |     "  - finite iterator\n",
 18 |     "--- -->\n",
 19 |     "# How to work with data iterators\n",
 20 |     "\n",
 21 |     "When the data provider for training or validation is an iterator\n",
 22 |     "(infinite or finite with known or unknown size), here are some basic\n",
 23 |     "examples of how to setup trainer or evaluator.\n",
 24 |     "\n",
 25 |     "<!--more-->"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "id": "e97045a5",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "## Infinite iterator for training\n",
 34 |     "\n",
 35 |     "Let’s use an infinite data iterator as training dataflow"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 1,
 41 |    "id": "5b5f175a",
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "1/3 : 1 - 63.862\n",
 49 |       "1/3 : 2 - 64.042\n",
 50 |       "1/3 : 3 - 63.936\n",
 51 |       "1/3 : 4 - 64.141\n",
 52 |       "1/3 : 5 - 64.767\n",
 53 |       "2/3 : 6 - 63.791\n",
 54 |       "2/3 : 7 - 64.565\n",
 55 |       "2/3 : 8 - 63.602\n",
 56 |       "2/3 : 9 - 63.995\n",
 57 |       "2/3 : 10 - 63.943\n",
 58 |       "3/3 : 11 - 63.831\n",
 59 |       "3/3 : 12 - 64.276\n",
 60 |       "3/3 : 13 - 64.148\n",
 61 |       "3/3 : 14 - 63.920\n",
 62 |       "3/3 : 15 - 64.226\n"
 63 |      ]
 64 |     },
 65 |     {
 66 |      "data": {
 67 |       "text/plain": [
 68 |        "State:\n",
 69 |        "\titeration: 15\n",
 70 |        "\tepoch: 3\n",
 71 |        "\tepoch_length: 5\n",
 72 |        "\tmax_epochs: 3\n",
 73 |        "\toutput: <class 'NoneType'>\n",
 74 |        "\tbatch: <class 'torch.Tensor'>\n",
 75 |        "\tmetrics: <class 'dict'>\n",
 76 |        "\tdataloader: <class 'generator'>\n",
 77 |        "\tseed: <class 'NoneType'>\n",
 78 |        "\ttimes: <class 'dict'>"
 79 |       ]
 80 |      },
 81 |      "execution_count": 1,
 82 |      "metadata": {},
 83 |      "output_type": "execute_result"
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "import torch\n",
 88 |     "from ignite.engine import Engine, Events\n",
 89 |     "\n",
 90 |     "torch.manual_seed(12)\n",
 91 |     "\n",
 92 |     "def infinite_iterator(batch_size):\n",
 93 |     "    while True:\n",
 94 |     "        batch = torch.rand(batch_size, 3, 32, 32)\n",
 95 |     "        yield batch\n",
 96 |     "\n",
 97 |     "def train_step(trainer, batch):\n",
 98 |     "    # ...\n",
 99 |     "    s = trainer.state\n",
100 |     "    print(\n",
101 |     "        f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch.norm():.3f}\"\n",
102 |     "    )\n",
103 |     "\n",
104 |     "trainer = Engine(train_step)\n",
105 |     "\n",
106 |     "# We need to specify epoch_length to define the epoch\n",
107 |     "trainer.run(infinite_iterator(4), epoch_length=5, max_epochs=3)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "id": "a755b048",
113 |    "metadata": {},
114 |    "source": [
115 |     "If we do not specify **epoch_length**, we can stop the training explicitly by calling [`terminate()`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine). In this case, there will be only a single epoch defined."
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 2,
121 |    "id": "d48531dd",
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "name": "stdout",
126 |      "output_type": "stream",
127 |      "text": [
128 |       "1/1 : 1 - 63.862\n",
129 |       "1/1 : 2 - 64.042\n",
130 |       "1/1 : 3 - 63.936\n",
131 |       "1/1 : 4 - 64.141\n",
132 |       "1/1 : 5 - 64.767\n",
133 |       "1/1 : 6 - 63.791\n",
134 |       "1/1 : 7 - 64.565\n",
135 |       "1/1 : 8 - 63.602\n",
136 |       "1/1 : 9 - 63.995\n",
137 |       "1/1 : 10 - 63.943\n",
138 |       "1/1 : 11 - 63.831\n",
139 |       "1/1 : 12 - 64.276\n",
140 |       "1/1 : 13 - 64.148\n",
141 |       "1/1 : 14 - 63.920\n",
142 |       "1/1 : 15 - 64.226\n"
143 |      ]
144 |     },
145 |     {
146 |      "data": {
147 |       "text/plain": [
148 |        "State:\n",
149 |        "\titeration: 15\n",
150 |        "\tepoch: 1\n",
151 |        "\tepoch_length: <class 'NoneType'>\n",
152 |        "\tmax_epochs: 1\n",
153 |        "\toutput: <class 'NoneType'>\n",
154 |        "\tbatch: <class 'torch.Tensor'>\n",
155 |        "\tmetrics: <class 'dict'>\n",
156 |        "\tdataloader: <class 'generator'>\n",
157 |        "\tseed: <class 'NoneType'>\n",
158 |        "\ttimes: <class 'dict'>"
159 |       ]
160 |      },
161 |      "execution_count": 2,
162 |      "metadata": {},
163 |      "output_type": "execute_result"
164 |     }
165 |    ],
166 |    "source": [
167 |     "import torch\n",
168 |     "from ignite.engine import Engine, Events\n",
169 |     "\n",
170 |     "torch.manual_seed(12)\n",
171 |     "\n",
172 |     "def infinite_iterator(batch_size):\n",
173 |     "    while True:\n",
174 |     "        batch = torch.rand(batch_size, 3, 32, 32)\n",
175 |     "        yield batch\n",
176 |     "\n",
177 |     "def train_step(trainer, batch):\n",
178 |     "    # ...\n",
179 |     "    s = trainer.state\n",
180 |     "    print(\n",
181 |     "        f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch.norm():.3f}\"\n",
182 |     "    )\n",
183 |     "\n",
184 |     "trainer = Engine(train_step)\n",
185 |     "\n",
186 |     "@trainer.on(Events.ITERATION_COMPLETED(once=15))\n",
187 |     "def stop_training():\n",
188 |     "    trainer.terminate()\n",
189 |     "\n",
190 |     "trainer.run(infinite_iterator(4))"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "id": "30d63d14",
196 |    "metadata": {},
197 |    "source": [
198 |     "Same code can be used for validating models."
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "markdown",
203 |    "id": "37190708",
204 |    "metadata": {},
205 |    "source": [
206 |     "## Finite iterator with unknown length\n",
207 |     "\n",
208 |     "Let's use a finite data iterator but with unknown length (for user). In\n",
209 |     "case of training, we would like to perform several passes over the\n",
210 |     "dataflow and thus we need to restart the data iterator when it is\n",
211 |     "exhausted. In the code, we do not specify `epoch_length` which will be automatically\n",
212 |     "determined."
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": 3,
218 |    "id": "199087b1",
219 |    "metadata": {},
220 |    "outputs": [
221 |     {
222 |      "name": "stdout",
223 |      "output_type": "stream",
224 |      "text": [
225 |       "1/5 : 1 - 0.000\n",
226 |       "1/5 : 2 - 1.000\n",
227 |       "1/5 : 3 - 2.000\n",
228 |       "1/5 : 4 - 3.000\n",
229 |       "1/5 : 5 - 4.000\n",
230 |       "1/5 : 6 - 5.000\n",
231 |       "1/5 : 7 - 6.000\n",
232 |       "1/5 : 8 - 7.000\n",
233 |       "1/5 : 9 - 8.000\n",
234 |       "1/5 : 10 - 9.000\n",
235 |       "1/5 : 11 - 10.000\n",
236 |       "2/5 : 12 - 0.000\n",
237 |       "2/5 : 13 - 1.000\n",
238 |       "2/5 : 14 - 2.000\n",
239 |       "2/5 : 15 - 3.000\n",
240 |       "2/5 : 16 - 4.000\n",
241 |       "2/5 : 17 - 5.000\n",
242 |       "2/5 : 18 - 6.000\n",
243 |       "2/5 : 19 - 7.000\n",
244 |       "2/5 : 20 - 8.000\n",
245 |       "2/5 : 21 - 9.000\n",
246 |       "2/5 : 22 - 10.000\n",
247 |       "3/5 : 23 - 0.000\n",
248 |       "3/5 : 24 - 1.000\n",
249 |       "3/5 : 25 - 2.000\n",
250 |       "3/5 : 26 - 3.000\n",
251 |       "3/5 : 27 - 4.000\n",
252 |       "3/5 : 28 - 5.000\n",
253 |       "3/5 : 29 - 6.000\n",
254 |       "3/5 : 30 - 7.000\n",
255 |       "3/5 : 31 - 8.000\n",
256 |       "3/5 : 32 - 9.000\n",
257 |       "3/5 : 33 - 10.000\n",
258 |       "4/5 : 34 - 0.000\n",
259 |       "4/5 : 35 - 1.000\n",
260 |       "4/5 : 36 - 2.000\n",
261 |       "4/5 : 37 - 3.000\n",
262 |       "4/5 : 38 - 4.000\n",
263 |       "4/5 : 39 - 5.000\n",
264 |       "4/5 : 40 - 6.000\n",
265 |       "4/5 : 41 - 7.000\n",
266 |       "4/5 : 42 - 8.000\n",
267 |       "4/5 : 43 - 9.000\n",
268 |       "4/5 : 44 - 10.000\n",
269 |       "5/5 : 45 - 0.000\n",
270 |       "5/5 : 46 - 1.000\n",
271 |       "5/5 : 47 - 2.000\n",
272 |       "5/5 : 48 - 3.000\n",
273 |       "5/5 : 49 - 4.000\n",
274 |       "5/5 : 50 - 5.000\n",
275 |       "5/5 : 51 - 6.000\n",
276 |       "5/5 : 52 - 7.000\n",
277 |       "5/5 : 53 - 8.000\n",
278 |       "5/5 : 54 - 9.000\n",
279 |       "5/5 : 55 - 10.000\n"
280 |      ]
281 |     },
282 |     {
283 |      "data": {
284 |       "text/plain": [
285 |        "State:\n",
286 |        "\titeration: 55\n",
287 |        "\tepoch: 5\n",
288 |        "\tepoch_length: 11\n",
289 |        "\tmax_epochs: 5\n",
290 |        "\toutput: <class 'NoneType'>\n",
291 |        "\tbatch: 10\n",
292 |        "\tmetrics: <class 'dict'>\n",
293 |        "\tdataloader: <class 'generator'>\n",
294 |        "\tseed: <class 'NoneType'>\n",
295 |        "\ttimes: <class 'dict'>"
296 |       ]
297 |      },
298 |      "execution_count": 3,
299 |      "metadata": {},
300 |      "output_type": "execute_result"
301 |     }
302 |    ],
303 |    "source": [
304 |     "import torch\n",
305 |     "from ignite.engine import Engine, Events\n",
306 |     "\n",
307 |     "torch.manual_seed(12)\n",
308 |     "\n",
309 |     "def finite_unk_size_data_iter():\n",
310 |     "    for i in range(11):\n",
311 |     "        yield i\n",
312 |     "\n",
313 |     "def train_step(trainer, batch):\n",
314 |     "    # ...\n",
315 |     "    s = trainer.state\n",
316 |     "    print(\n",
317 |     "        f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch:.3f}\"\n",
318 |     "    )\n",
319 |     "\n",
320 |     "trainer = Engine(train_step)\n",
321 |     "\n",
322 |     "@trainer.on(Events.DATALOADER_STOP_ITERATION)\n",
323 |     "def restart_iter():\n",
324 |     "    trainer.state.dataloader = finite_unk_size_data_iter()\n",
325 |     "\n",
326 |     "data_iter = finite_unk_size_data_iter()\n",
327 |     "trainer.run(data_iter, max_epochs=5)"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "markdown",
332 |    "id": "ee068ac8",
333 |    "metadata": {},
334 |    "source": [
335 |     "In case of validation, the code is simply"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": 4,
341 |    "id": "beae6490",
342 |    "metadata": {},
343 |    "outputs": [
344 |     {
345 |      "name": "stdout",
346 |      "output_type": "stream",
347 |      "text": [
348 |       "1/1 : 1 - 0.000\n",
349 |       "1/1 : 2 - 1.000\n",
350 |       "1/1 : 3 - 2.000\n",
351 |       "1/1 : 4 - 3.000\n",
352 |       "1/1 : 5 - 4.000\n",
353 |       "1/1 : 6 - 5.000\n",
354 |       "1/1 : 7 - 6.000\n",
355 |       "1/1 : 8 - 7.000\n",
356 |       "1/1 : 9 - 8.000\n",
357 |       "1/1 : 10 - 9.000\n",
358 |       "1/1 : 11 - 10.000\n"
359 |      ]
360 |     },
361 |     {
362 |      "data": {
363 |       "text/plain": [
364 |        "State:\n",
365 |        "\titeration: 11\n",
366 |        "\tepoch: 1\n",
367 |        "\tepoch_length: 11\n",
368 |        "\tmax_epochs: 1\n",
369 |        "\toutput: <class 'NoneType'>\n",
370 |        "\tbatch: <class 'NoneType'>\n",
371 |        "\tmetrics: <class 'dict'>\n",
372 |        "\tdataloader: <class 'generator'>\n",
373 |        "\tseed: <class 'NoneType'>\n",
374 |        "\ttimes: <class 'dict'>"
375 |       ]
376 |      },
377 |      "execution_count": 4,
378 |      "metadata": {},
379 |      "output_type": "execute_result"
380 |     }
381 |    ],
382 |    "source": [
383 |     "import torch\n",
384 |     "from ignite.engine import Engine, Events\n",
385 |     "\n",
386 |     "torch.manual_seed(12)\n",
387 |     "\n",
388 |     "def finite_unk_size_data_iter():\n",
389 |     "    for i in range(11):\n",
390 |     "        yield i\n",
391 |     "\n",
392 |     "def val_step(evaluator, batch):\n",
393 |     "    # ...\n",
394 |     "    s = evaluator.state\n",
395 |     "    print(\n",
396 |     "        f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch:.3f}\"\n",
397 |     "    )\n",
398 |     "\n",
399 |     "evaluator = Engine(val_step)\n",
400 |     "\n",
401 |     "data_iter = finite_unk_size_data_iter()\n",
402 |     "evaluator.run(data_iter)"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "markdown",
407 |    "id": "5d1abaa7",
408 |    "metadata": {},
409 |    "source": [
410 |     "## Finite iterator with known length\n",
411 |     "\n",
412 |     "Let's use a finite data iterator with known size for training or validation. If we need to restart the data iterator, we can do this either as in case of unknown size by attaching the restart handler on `@trainer.on(Events.DATALOADER_STOP_ITERATION)`, but here we will do this explicitly on iteration:"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "code",
417 |    "execution_count": 5,
418 |    "id": "a7f519ac",
419 |    "metadata": {},
420 |    "outputs": [
421 |     {
422 |      "name": "stdout",
423 |      "output_type": "stream",
424 |      "text": [
425 |       "1/5 : 1 - 0.000\n",
426 |       "1/5 : 2 - 1.000\n",
427 |       "1/5 : 3 - 2.000\n",
428 |       "1/5 : 4 - 3.000\n",
429 |       "1/5 : 5 - 4.000\n",
430 |       "1/5 : 6 - 5.000\n",
431 |       "1/5 : 7 - 6.000\n",
432 |       "1/5 : 8 - 7.000\n",
433 |       "1/5 : 9 - 8.000\n",
434 |       "1/5 : 10 - 9.000\n",
435 |       "1/5 : 11 - 10.000\n",
436 |       "2/5 : 12 - 0.000\n",
437 |       "2/5 : 13 - 1.000\n",
438 |       "2/5 : 14 - 2.000\n",
439 |       "2/5 : 15 - 3.000\n",
440 |       "2/5 : 16 - 4.000\n",
441 |       "2/5 : 17 - 5.000\n",
442 |       "2/5 : 18 - 6.000\n",
443 |       "2/5 : 19 - 7.000\n",
444 |       "2/5 : 20 - 8.000\n",
445 |       "2/5 : 21 - 9.000\n",
446 |       "2/5 : 22 - 10.000\n",
447 |       "3/5 : 23 - 0.000\n",
448 |       "3/5 : 24 - 1.000\n",
449 |       "3/5 : 25 - 2.000\n",
450 |       "3/5 : 26 - 3.000\n",
451 |       "3/5 : 27 - 4.000\n",
452 |       "3/5 : 28 - 5.000\n",
453 |       "3/5 : 29 - 6.000\n",
454 |       "3/5 : 30 - 7.000\n",
455 |       "3/5 : 31 - 8.000\n",
456 |       "3/5 : 32 - 9.000\n",
457 |       "3/5 : 33 - 10.000\n",
458 |       "4/5 : 34 - 0.000\n",
459 |       "4/5 : 35 - 1.000\n",
460 |       "4/5 : 36 - 2.000\n",
461 |       "4/5 : 37 - 3.000\n",
462 |       "4/5 : 38 - 4.000\n",
463 |       "4/5 : 39 - 5.000\n",
464 |       "4/5 : 40 - 6.000\n",
465 |       "4/5 : 41 - 7.000\n",
466 |       "4/5 : 42 - 8.000\n",
467 |       "4/5 : 43 - 9.000\n",
468 |       "4/5 : 44 - 10.000\n",
469 |       "5/5 : 45 - 0.000\n",
470 |       "5/5 : 46 - 1.000\n",
471 |       "5/5 : 47 - 2.000\n",
472 |       "5/5 : 48 - 3.000\n",
473 |       "5/5 : 49 - 4.000\n",
474 |       "5/5 : 50 - 5.000\n",
475 |       "5/5 : 51 - 6.000\n",
476 |       "5/5 : 52 - 7.000\n",
477 |       "5/5 : 53 - 8.000\n",
478 |       "5/5 : 54 - 9.000\n",
479 |       "5/5 : 55 - 10.000\n"
480 |      ]
481 |     },
482 |     {
483 |      "data": {
484 |       "text/plain": [
485 |        "State:\n",
486 |        "\titeration: 55\n",
487 |        "\tepoch: 5\n",
488 |        "\tepoch_length: 11\n",
489 |        "\tmax_epochs: 5\n",
490 |        "\toutput: <class 'NoneType'>\n",
491 |        "\tbatch: 10\n",
492 |        "\tmetrics: <class 'dict'>\n",
493 |        "\tdataloader: <class 'generator'>\n",
494 |        "\tseed: <class 'NoneType'>\n",
495 |        "\ttimes: <class 'dict'>"
496 |       ]
497 |      },
498 |      "execution_count": 5,
499 |      "metadata": {},
500 |      "output_type": "execute_result"
501 |     }
502 |    ],
503 |    "source": [
504 |     "import torch\n",
505 |     "from ignite.engine import Engine, Events\n",
506 |     "\n",
507 |     "torch.manual_seed(12)\n",
508 |     "\n",
509 |     "size = 11\n",
510 |     "\n",
511 |     "def finite_size_data_iter(size):\n",
512 |     "    for i in range(size):\n",
513 |     "        yield i\n",
514 |     "\n",
515 |     "def train_step(trainer, batch):\n",
516 |     "    # ...\n",
517 |     "    s = trainer.state\n",
518 |     "    print(\n",
519 |     "        f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch:.3f}\"\n",
520 |     "    )\n",
521 |     "\n",
522 |     "trainer = Engine(train_step)\n",
523 |     "\n",
524 |     "@trainer.on(Events.ITERATION_COMPLETED(every=size))\n",
525 |     "def restart_iter():\n",
526 |     "    trainer.state.dataloader = finite_size_data_iter(size)\n",
527 |     "\n",
528 |     "data_iter = finite_size_data_iter(size)\n",
529 |     "trainer.run(data_iter, max_epochs=5)"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "markdown",
534 |    "id": "a518b014",
535 |    "metadata": {},
536 |    "source": [
537 |     "In case of validation, the code is simply"
538 |    ]
539 |   },
540 |   {
541 |    "cell_type": "code",
542 |    "execution_count": 6,
543 |    "id": "d1402c18",
544 |    "metadata": {},
545 |    "outputs": [
546 |     {
547 |      "name": "stdout",
548 |      "output_type": "stream",
549 |      "text": [
550 |       "1/1 : 1 - 0.000\n",
551 |       "1/1 : 2 - 1.000\n",
552 |       "1/1 : 3 - 2.000\n",
553 |       "1/1 : 4 - 3.000\n",
554 |       "1/1 : 5 - 4.000\n",
555 |       "1/1 : 6 - 5.000\n",
556 |       "1/1 : 7 - 6.000\n",
557 |       "1/1 : 8 - 7.000\n",
558 |       "1/1 : 9 - 8.000\n",
559 |       "1/1 : 10 - 9.000\n",
560 |       "1/1 : 11 - 10.000\n"
561 |      ]
562 |     },
563 |     {
564 |      "data": {
565 |       "text/plain": [
566 |        "State:\n",
567 |        "\titeration: 11\n",
568 |        "\tepoch: 1\n",
569 |        "\tepoch_length: 11\n",
570 |        "\tmax_epochs: 1\n",
571 |        "\toutput: <class 'NoneType'>\n",
572 |        "\tbatch: <class 'NoneType'>\n",
573 |        "\tmetrics: <class 'dict'>\n",
574 |        "\tdataloader: <class 'generator'>\n",
575 |        "\tseed: <class 'NoneType'>\n",
576 |        "\ttimes: <class 'dict'>"
577 |       ]
578 |      },
579 |      "execution_count": 6,
580 |      "metadata": {},
581 |      "output_type": "execute_result"
582 |     }
583 |    ],
584 |    "source": [
585 |     "import torch\n",
586 |     "from ignite.engine import Engine, Events\n",
587 |     "\n",
588 |     "torch.manual_seed(12)\n",
589 |     "\n",
590 |     "size = 11\n",
591 |     "\n",
592 |     "def finite_size_data_iter(size):\n",
593 |     "    for i in range(size):\n",
594 |     "        yield i\n",
595 |     "\n",
596 |     "def val_step(evaluator, batch):\n",
597 |     "    # ...\n",
598 |     "    s = evaluator.state\n",
599 |     "    print(\n",
600 |     "        f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch:.3f}\"\n",
601 |     "    )\n",
602 |     "\n",
603 |     "evaluator = Engine(val_step)\n",
604 |     "\n",
605 |     "data_iter = finite_size_data_iter(size)\n",
606 |     "evaluator.run(data_iter)"
607 |    ]
608 |   }
609 |  ],
610 |  "metadata": {
611 |   "interpreter": {
612 |    "hash": "668c1b3fdfcad7da09e9c177fb24f18a657bbc5f55005750960a78843b3807f7"
613 |   },
614 |   "kernelspec": {
615 |    "display_name": "Python 3 (ipykernel)",
616 |    "language": "python",
617 |    "name": "python3"
618 |   },
619 |   "language_info": {
620 |    "codemirror_mode": {
621 |     "name": "ipython",
622 |     "version": 3
623 |    },
624 |    "file_extension": ".py",
625 |    "mimetype": "text/x-python",
626 |    "name": "python",
627 |    "nbconvert_exporter": "python",
628 |    "pygments_lexer": "ipython3",
629 |    "version": "3.8.10"
630 |   }
631 |  },
632 |  "nbformat": 4,
633 |  "nbformat_minor": 5
634 | }
635 | 


--------------------------------------------------------------------------------
/how-to-guides/08-custom-events.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "YQCt0TA0uaNc",
  6 |    "metadata": {
  7 |     "id": "YQCt0TA0uaNc"
  8 |    },
  9 |    "source": [
 10 |     "<!-- ---\n",
 11 |     "title: How to create Custom Events based on Forward or Backward Pass\n",
 12 |     "weight: 8\n",
 13 |     "downloads: true\n",
 14 |     "sidebar: true\n",
 15 |     "summary: Learn how to create custom events that depend on the loss calculated, backward pass, optimization step, etc.\n",
 16 |     "tags:\n",
 17 |     "  - custom events\n",
 18 |     "--- -->\n",
 19 |     "# How to create Custom Events based on Forward or Backward Pass"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "id": "aMOY2iPOuaNk",
 25 |    "metadata": {
 26 |     "id": "aMOY2iPOuaNk"
 27 |    },
 28 |    "source": [
 29 |     "This guide demonstrates how you can create [custom events](https://pytorch-ignite.ai/concepts/02-events-and-handlers#custom-events) that depend on the loss calculated and backward pass.\n",
 30 |     "\n",
 31 |     "In this example, we will be using a ResNet18 model on the MNIST dataset. The base code is the same as used in the [Getting Started Guide](https://pytorch-ignite.ai/tutorials/getting-started/)."
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "id": "THcUNAgpWMDF",
 37 |    "metadata": {
 38 |     "id": "THcUNAgpWMDF"
 39 |    },
 40 |    "source": [
 41 |     "## Basic Setup"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 7,
 47 |    "id": "Y0sJP9iFa1TB",
 48 |    "metadata": {
 49 |     "id": "Y0sJP9iFa1TB",
 50 |     "pycharm": {
 51 |      "is_executing": false
 52 |     }
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "import pandas as pd\n",
 57 |     "\n",
 58 |     "import torch\n",
 59 |     "from torch import nn\n",
 60 |     "from torch.utils.data import DataLoader\n",
 61 |     "from torchvision.datasets import MNIST\n",
 62 |     "from torchvision.models import resnet18\n",
 63 |     "from torchvision.transforms import Compose, Normalize, ToTensor\n",
 64 |     "\n",
 65 |     "from ignite.engine import Engine, EventEnum, Events, create_supervised_evaluator\n",
 66 |     "from ignite.metrics import Accuracy, Loss\n",
 67 |     "from ignite.handlers import Timer\n",
 68 |     "from ignite.contrib.handlers import BasicTimeProfiler, HandlersTimeProfiler"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "id": "iK_9cOP6a1TI",
 75 |    "metadata": {
 76 |     "id": "iK_9cOP6a1TI"
 77 |    },
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
 81 |     "\n",
 82 |     "\n",
 83 |     "class Net(nn.Module):\n",
 84 |     "    def __init__(self):\n",
 85 |     "        super(Net, self).__init__()\n",
 86 |     "\n",
 87 |     "        self.model = resnet18(num_classes=10)\n",
 88 |     "        self.model.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1, bias=False)\n",
 89 |     "\n",
 90 |     "    def forward(self, x):\n",
 91 |     "        return self.model(x)\n",
 92 |     "\n",
 93 |     "\n",
 94 |     "model = Net().to(device)\n",
 95 |     "\n",
 96 |     "data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])\n",
 97 |     "\n",
 98 |     "train_loader = DataLoader(\n",
 99 |     "    MNIST(download=True, root=\".\", transform=data_transform, train=True),\n",
100 |     "    batch_size=128,\n",
101 |     "    shuffle=True,\n",
102 |     ")\n",
103 |     "\n",
104 |     "optimizer = torch.optim.RMSprop(model.parameters(), lr=0.005)\n",
105 |     "criterion = nn.CrossEntropyLoss()"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "id": "Q_u0IS8q9IY-",
111 |    "metadata": {
112 |     "id": "Q_u0IS8q9IY-"
113 |    },
114 |    "source": [
115 |     "## Create Custom Events\n",
116 |     "\n",
117 |     "First let's create a few custom events based on backpropogation. All user-defined custom events should inherit from the base class [`EventEnum`](https://pytorch.org/ignite/generated/ignite.engine.events.EventEnum.html#ignite.engine.events.EventEnum)."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 27,
123 |    "id": "TbEoK_H8yIAj",
124 |    "metadata": {
125 |     "id": "TbEoK_H8yIAj"
126 |    },
127 |    "outputs": [],
128 |    "source": [
129 |     "class BackpropEvents(EventEnum):\n",
130 |     "    BACKWARD_STARTED = 'backward_started'\n",
131 |     "    BACKWARD_COMPLETED = 'backward_completed'\n",
132 |     "    OPTIM_STEP_COMPLETED = 'optim_step_completed'"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "id": "9lwr621Y9Lnx",
138 |    "metadata": {
139 |     "id": "9lwr621Y9Lnx"
140 |    },
141 |    "source": [
142 |     "## Create `trainer`\n",
143 |     "\n",
144 |     "Then we define the `train_step` function to be applied on all batches. Within this, we use [`fire_event`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine.fire_event) to execute all handlers related to a specific event at that point."
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 28,
150 |    "id": "8aqUFTEdxxvz",
151 |    "metadata": {
152 |     "id": "8aqUFTEdxxvz"
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "def train_step(engine, batch):\n",
157 |     "    model.train()\n",
158 |     "    optimizer.zero_grad()\n",
159 |     "    x, y = batch[0].to(device), batch[1].to(device)\n",
160 |     "    y_pred = model(x)\n",
161 |     "    loss = criterion(y_pred, y)\n",
162 |     "    \n",
163 |     "    engine.fire_event(BackpropEvents.BACKWARD_STARTED)\n",
164 |     "    loss.backward()\n",
165 |     "    engine.fire_event(BackpropEvents.BACKWARD_COMPLETED)\n",
166 |     "\n",
167 |     "    optimizer.step()\n",
168 |     "    engine.fire_event(BackpropEvents.OPTIM_STEP_COMPLETED)\n",
169 |     "\n",
170 |     "    return loss.item()\n",
171 |     "\n",
172 |     "\n",
173 |     "trainer = Engine(train_step)"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "id": "eiLRGHAK9Q12",
179 |    "metadata": {
180 |     "id": "eiLRGHAK9Q12"
181 |    },
182 |    "source": [
183 |     "## Register Custom Events in `trainer`\n",
184 |     "\n",
185 |     "Finally, to make sure our events can be fired, we register them in `trainer` using [`register_events`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine.register_events)."
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "id": "4byi6J6N9d4K",
192 |    "metadata": {
193 |     "id": "4byi6J6N9d4K"
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "trainer.register_events(*BackpropEvents)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "id": "WZbJwRUD9e-d",
203 |    "metadata": {
204 |     "id": "WZbJwRUD9e-d"
205 |    },
206 |    "source": [
207 |     "## Attach handlers to Custom Events\n",
208 |     "\n",
209 |     "And now we can easily attach handlers to be executed when a particular event like `BACKWARD_COMPLETED` is fired."
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 29,
215 |    "id": "9Dp6QBfQysOq",
216 |    "metadata": {
217 |     "id": "9Dp6QBfQysOq"
218 |    },
219 |    "outputs": [],
220 |    "source": [
221 |     "@trainer.on(BackpropEvents.BACKWARD_COMPLETED)\n",
222 |     "def function_before_backprop(engine):\n",
223 |     "    print(f\"Iter[{engine.state.iteration}] Function fired after backward pass\")"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "markdown",
228 |    "id": "XMKXagQk-VLl",
229 |    "metadata": {
230 |     "id": "XMKXagQk-VLl"
231 |    },
232 |    "source": [
233 |     "And finally you can run the `trainer` for some epochs. "
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "id": "3G9DV6h767fj",
240 |    "metadata": {
241 |     "id": "3G9DV6h767fj"
242 |    },
243 |    "outputs": [],
244 |    "source": [
245 |     "trainer.run(train_loader, max_epochs=3)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "id": "x031SkP2-Lg9",
251 |    "metadata": {
252 |     "id": "x031SkP2-Lg9"
253 |    },
254 |    "source": [
255 |     "## Additional Links\n",
256 |     "\n",
257 |     "You can also checkout the source code of [TBPTT Trainer](https://pytorch.org/ignite/_modules/ignite/contrib/engines/tbptt.html#create_supervised_tbptt_trainer) for a detailed explanation."
258 |    ]
259 |   }
260 |  ],
261 |  "metadata": {
262 |   "accelerator": "GPU",
263 |   "colab": {
264 |    "name": "08-custom-events.ipynb",
265 |    "provenance": []
266 |   },
267 |   "kernelspec": {
268 |    "display_name": "Python 3 (ipykernel)",
269 |    "language": "python",
270 |    "name": "python3"
271 |   },
272 |   "language_info": {
273 |    "codemirror_mode": {
274 |     "name": "ipython",
275 |     "version": 3
276 |    },
277 |    "file_extension": ".py",
278 |    "mimetype": "text/x-python",
279 |    "name": "python",
280 |    "nbconvert_exporter": "python",
281 |    "pygments_lexer": "ipython3",
282 |    "version": "3.10.4"
283 |   }
284 |  },
285 |  "nbformat": 4,
286 |  "nbformat_minor": 5
287 | }
288 | 


--------------------------------------------------------------------------------
/how-to-guides/09-switch-data-training.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "OuJ-YDzWCgEI",
  6 |    "metadata": {
  7 |     "id": "OuJ-YDzWCgEI"
  8 |    },
  9 |    "source": [
 10 |     "<!-- ---\n",
 11 |     "title: How to switch data provider during training\n",
 12 |     "weight: 9\n",
 13 |     "downloads: true\n",
 14 |     "sidebar: true\n",
 15 |     "summary: Example on how to switch data during training after some number of iterations\n",
 16 |     "tags:\n",
 17 |     "  - custom events\n",
 18 |     "--- -->\n",
 19 |     "\n",
 20 |     "# How to switch data provider during training"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "id": "zi2JOUi1CgEO",
 26 |    "metadata": {
 27 |     "id": "zi2JOUi1CgEO"
 28 |    },
 29 |    "source": [
 30 |     "In this example, we will see how one can easily switch the data provider during the training using\n",
 31 |     "[`set_data()`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine.set_data). "
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "id": "wJKPRmQZIV_S",
 37 |    "metadata": {
 38 |     "id": "wJKPRmQZIV_S"
 39 |    },
 40 |    "source": [
 41 |     "## Basic Setup"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "id": "QwpM9M-XI23h",
 47 |    "metadata": {
 48 |     "id": "QwpM9M-XI23h"
 49 |    },
 50 |    "source": [
 51 |     "### Required Dependencies"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "id": "H_UgUurNIb53",
 58 |    "metadata": {
 59 |     "id": "H_UgUurNIb53"
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "!pip install pytorch-ignite"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "id": "Z2Yo1WSWI6vr",
 69 |    "metadata": {
 70 |     "id": "Z2Yo1WSWI6vr"
 71 |    },
 72 |    "source": [
 73 |     "### Import"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 2,
 79 |    "id": "-2Ai1Ht_HWiB",
 80 |    "metadata": {
 81 |     "id": "-2Ai1Ht_HWiB"
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "from ignite.engine import Engine, Events"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "id": "Q9tTpXjmI9R_",
 91 |    "metadata": {
 92 |     "id": "Q9tTpXjmI9R_"
 93 |    },
 94 |    "source": [
 95 |     "### Data Providers"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 3,
101 |    "id": "g7ctwsy3Han_",
102 |    "metadata": {
103 |     "id": "g7ctwsy3Han_"
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "data1 = [1, 2, 3]\n",
108 |     "data2 = [11, 12, 13]"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "id": "S-aQnuihJbZz",
114 |    "metadata": {
115 |     "id": "S-aQnuihJbZz"
116 |    },
117 |    "source": [
118 |     "## Create dummy `trainer`\n",
119 |     "\n",
120 |     "Let's create a dummy `train_step` which will print the current iteration and batch of data. "
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 17,
126 |    "id": "2Skq9nmSHnce",
127 |    "metadata": {
128 |     "id": "2Skq9nmSHnce"
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "def train_step(engine, batch):\n",
133 |     "    print(f\"Iter[{engine.state.iteration}] Current datapoint = \", batch)\n",
134 |     "\n",
135 |     "trainer = Engine(train_step)"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "id": "YIBlmaO6JW9c",
141 |    "metadata": {
142 |     "id": "YIBlmaO6JW9c"
143 |    },
144 |    "source": [
145 |     "## Attach handler to switch data\n",
146 |     "\n",
147 |     "Now we have to decide when to switch the data provider. It can be after an epoch, iteration or something custom. Below, we are going to switch data after some specific iteration. And then we attach a handler to `trainer` that will be executed once after `switch_iteration` and use `set_data()` so that when:\n",
148 |     "\n",
149 |     "* iteration <= `switch_iteration`, batch is from `data1`\n",
150 |     "* iteration > `switch_iteration`, batch is from `data2`"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 18,
156 |    "id": "RaMkWUwnCgEQ",
157 |    "metadata": {
158 |     "id": "RaMkWUwnCgEQ"
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "switch_iteration = 5\n",
163 |     "\n",
164 |     "\n",
165 |     "@trainer.on(Events.ITERATION_COMPLETED(once=switch_iteration))\n",
166 |     "def switch_dataloader():\n",
167 |     "    print(\"<------- Switch Data ------->\")\n",
168 |     "    trainer.set_data(data2)"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "id": "BvJ2qms6M44n",
174 |    "metadata": {
175 |     "id": "BvJ2qms6M44n"
176 |    },
177 |    "source": [
178 |     "And finally we run the `trainer` for some epochs."
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 19,
184 |    "id": "8W-WFdZ8HzJU",
185 |    "metadata": {
186 |     "colab": {
187 |      "base_uri": "https://localhost:8080/"
188 |     },
189 |     "id": "8W-WFdZ8HzJU",
190 |     "outputId": "7c2c5a36-f657-4d75-8086-ec3fd1fdf10e"
191 |    },
192 |    "outputs": [
193 |     {
194 |      "name": "stdout",
195 |      "output_type": "stream",
196 |      "text": [
197 |       "Iter[1] Current datapoint =  1\n",
198 |       "Iter[2] Current datapoint =  2\n",
199 |       "Iter[3] Current datapoint =  3\n",
200 |       "Iter[4] Current datapoint =  1\n",
201 |       "Iter[5] Current datapoint =  2\n",
202 |       "<------- Switch Data ------->\n",
203 |       "Iter[6] Current datapoint =  11\n",
204 |       "Iter[7] Current datapoint =  12\n",
205 |       "Iter[8] Current datapoint =  13\n",
206 |       "Iter[9] Current datapoint =  11\n",
207 |       "Iter[10] Current datapoint =  12\n",
208 |       "Iter[11] Current datapoint =  13\n",
209 |       "Iter[12] Current datapoint =  11\n",
210 |       "Iter[13] Current datapoint =  12\n",
211 |       "Iter[14] Current datapoint =  13\n",
212 |       "Iter[15] Current datapoint =  11\n"
213 |      ]
214 |     },
215 |     {
216 |      "data": {
217 |       "text/plain": [
218 |        "State:\n",
219 |        "\titeration: 15\n",
220 |        "\tepoch: 5\n",
221 |        "\tepoch_length: 3\n",
222 |        "\tmax_epochs: 5\n",
223 |        "\toutput: <class 'NoneType'>\n",
224 |        "\tbatch: 11\n",
225 |        "\tmetrics: <class 'dict'>\n",
226 |        "\tdataloader: <class 'list'>\n",
227 |        "\tseed: <class 'NoneType'>\n",
228 |        "\ttimes: <class 'dict'>"
229 |       ]
230 |      },
231 |      "execution_count": 19,
232 |      "metadata": {},
233 |      "output_type": "execute_result"
234 |     }
235 |    ],
236 |    "source": [
237 |     "trainer.run(data1, max_epochs=5)"
238 |    ]
239 |   }
240 |  ],
241 |  "metadata": {
242 |   "colab": {
243 |    "name": "switch-data-training.ipynb",
244 |    "provenance": []
245 |   },
246 |   "kernelspec": {
247 |    "display_name": "Python 3 (ipykernel)",
248 |    "language": "python",
249 |    "name": "python3"
250 |   },
251 |   "language_info": {
252 |    "codemirror_mode": {
253 |     "name": "ipython",
254 |     "version": 3
255 |    },
256 |    "file_extension": ".py",
257 |    "mimetype": "text/x-python",
258 |    "name": "python",
259 |    "nbconvert_exporter": "python",
260 |    "pygments_lexer": "ipython3",
261 |    "version": "3.8.8"
262 |   }
263 |  },
264 |  "nbformat": 4,
265 |  "nbformat_minor": 5
266 | }
267 | 


--------------------------------------------------------------------------------
/how-to-guides/10-loggers.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<!-- ---\n",
  8 |     "title: How to use Loggers\n",
  9 |     "date: 2021-10-25\n",
 10 |     "downloads: true\n",
 11 |     "weight: 10\n",
 12 |     "summary: \n",
 13 |     "tags:\n",
 14 |     "  - loggers\n",
 15 |     "  - ClearML\n",
 16 |     "--- -->\n",
 17 |     "\n",
 18 |     "# How to use Loggers"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "This how-to guide demonstrates the usage of loggers with Ignite. As part of this guide, we will be using the [ClearML](https://clear.ml/docs/latest/docs/fundamentals/logger/) logger and also highlight how this code can be easily modified to make use of other loggers. You can see all the other loggers supported [here](https://pytorch.org/ignite/contrib/handlers.html#loggers).\n",
 26 |     "\n",
 27 |     "<!--more-->\n",
 28 |     "\n",
 29 |     "In this example, we will be using a simple convolutional network on the [MNIST](https://pytorch.org/vision/stable/datasets.html#torchvision.datasets.MNIST) dataset to show how logging works in Ignite.\n",
 30 |     "\n",
 31 |     "## Prerequisities\n",
 32 |     "- Refer to the [installation-guide](https://pytorch-ignite.ai/how-to-guides/01-installation/) to install Ignite (and Pytorch).\n",
 33 |     "- To get started with ClearML create your account [here](https://app.community.clear.ml/profile). Then create a credential: Profile > Create new credentials > Copy to clipboard."
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## Install dependencies"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 6,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "%%capture\n",
 50 |     "! pip install torchvision"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 7,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "%%capture\n",
 60 |     "! pip install clearml"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 1,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "%%capture\n",
 70 |     "! clearml-init # You may want to run this command on your terminal separately and paste what you copied in the step above."
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "## Imports "
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 1,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "\n",
 87 |     "import torch\n",
 88 |     "import torch.nn.functional as F\n",
 89 |     "from torch import nn\n",
 90 |     "from torch.optim import SGD\n",
 91 |     "from torch.utils.data import DataLoader\n",
 92 |     "from torchvision.datasets import MNIST\n",
 93 |     "from torchvision.transforms import Compose, Normalize, ToTensor\n",
 94 |     "\n",
 95 |     "from ignite.contrib.handlers.clearml_logger import (\n",
 96 |     "    ClearMLLogger,\n",
 97 |     "    ClearMLSaver,\n",
 98 |     "    GradsHistHandler,\n",
 99 |     "    GradsScalarHandler,\n",
100 |     "    WeightsHistHandler,\n",
101 |     "    WeightsScalarHandler,\n",
102 |     "    global_step_from_engine,\n",
103 |     ")\n",
104 |     "\n",
105 |     "from ignite.engine import Events, create_supervised_evaluator, create_supervised_trainer\n",
106 |     "from ignite.handlers import Checkpoint\n",
107 |     "from ignite.metrics import Accuracy, Loss\n",
108 |     "from ignite.utils import setup_logger"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "## Model"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 2,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "class Net(nn.Module):\n",
125 |     "    def __init__(self):\n",
126 |     "        super(Net, self).__init__()\n",
127 |     "        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)\n",
128 |     "        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)\n",
129 |     "        self.conv2_drop = nn.Dropout2d()\n",
130 |     "        self.fc1 = nn.Linear(320, 50)\n",
131 |     "        self.fc2 = nn.Linear(50, 10)\n",
132 |     "\n",
133 |     "    def forward(self, x):\n",
134 |     "        x = F.relu(F.max_pool2d(self.conv1(x), 2))\n",
135 |     "        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))\n",
136 |     "        x = x.view(-1, 320)\n",
137 |     "        x = F.relu(self.fc1(x))\n",
138 |     "        x = F.dropout(x, training=self.training)\n",
139 |     "        x = self.fc2(x)\n",
140 |     "        return F.log_softmax(x, dim=-1)\n"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "metadata": {},
146 |    "source": [
147 |     "## Dataloader"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 8,
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "def get_data_loaders(train_batch_size, val_batch_size):\n",
157 |     "    data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])\n",
158 |     "\n",
159 |     "    train_loader = DataLoader(\n",
160 |     "        MNIST(download=True, root=\".\", transform=data_transform, train=True), batch_size=train_batch_size, shuffle=True\n",
161 |     "    )\n",
162 |     "\n",
163 |     "    val_loader = DataLoader(\n",
164 |     "        MNIST(download=False, root=\".\", transform=data_transform, train=False), batch_size=val_batch_size, shuffle=False\n",
165 |     "    )\n",
166 |     "    return train_loader, val_loader"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "## Training\n",
174 |     "\n",
175 |     "Ignite makes use of handlers to configure what we want to log. Each handler takes takes in some common attributes like:\n",
176 |     "\n",
177 |     "- Engine Object, which could for example be the trainer if we are interested in training logs\n",
178 |     "- Event Name, through which we tell when do we want the information to be logged, for example `event_name=Event.ITERATION_COMPLETED(every=100)` would mean that we want the information to be logged every 100 iterations.\n",
179 |     "- args (or kwargs), using which you pass some metadata and provide information of what is to be logged, for example to log the 'loss' we could pass `output_transform=lambda loss: {\"batchloss\": loss}`\n",
180 |     "- Ignite also provides the flexibility to execute custom event handlers, these can be set with `log_handler` attribute of the `attach_output_handler`. For example, `log_handler=WeightsScalarHandler(model)` would log the norm of model's weights."
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 4,
186 |    "metadata": {},
187 |    "outputs": [],
188 |    "source": [
189 |     "def run(train_batch_size, val_batch_size, epochs, lr, momentum):\n",
190 |     "    train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size)\n",
191 |     "    model = Net()\n",
192 |     "    device = \"cpu\"\n",
193 |     "\n",
194 |     "    if torch.cuda.is_available():\n",
195 |     "        device = \"cuda\"\n",
196 |     "\n",
197 |     "    model.to(device) \n",
198 |     "    optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)\n",
199 |     "    criterion = nn.CrossEntropyLoss()\n",
200 |     "    trainer = create_supervised_trainer(model, optimizer, criterion, device=device)\n",
201 |     "    trainer.logger = setup_logger(\"Trainer\")\n",
202 |     "\n",
203 |     "    metrics = {\"accuracy\": Accuracy(), \"loss\": Loss(criterion)}\n",
204 |     "\n",
205 |     "    train_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)\n",
206 |     "    train_evaluator.logger = setup_logger(\"Train Evaluator\")\n",
207 |     "    validation_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)\n",
208 |     "    validation_evaluator.logger = setup_logger(\"Val Evaluator\")\n",
209 |     "\n",
210 |     "    @trainer.on(Events.EPOCH_COMPLETED)\n",
211 |     "    def compute_metrics(engine):\n",
212 |     "        train_evaluator.run(train_loader)\n",
213 |     "        validation_evaluator.run(val_loader)\n",
214 |     "        \n",
215 |     "    # To utilize other loggers we need to change the object here\n",
216 |     "    clearml_logger = ClearMLLogger(project_name=\"examples\", task_name=\"ignite\") \n",
217 |     "\n",
218 |     "    # Attach the logger to the trainer to log training loss \n",
219 |     "    clearml_logger.attach_output_handler(\n",
220 |     "        trainer,\n",
221 |     "        event_name=Events.ITERATION_COMPLETED(every=100),\n",
222 |     "        tag=\"training\",\n",
223 |     "        output_transform=lambda loss: {\"batchloss\": loss},\n",
224 |     "    )\n",
225 |     "  \n",
226 |     "    # Attach the logger to log loss and accuracy for both training and validation\n",
227 |     "    for tag, evaluator in [(\"training metrics\", train_evaluator), (\"validation metrics\", validation_evaluator)]:\n",
228 |     "        clearml_logger.attach_output_handler(\n",
229 |     "            evaluator,\n",
230 |     "            event_name=Events.EPOCH_COMPLETED,\n",
231 |     "            tag=tag,\n",
232 |     "            metric_names=[\"loss\", \"accuracy\"],\n",
233 |     "            global_step_transform=global_step_from_engine(trainer),\n",
234 |     "        )\n",
235 |     "\n",
236 |     "    # Attach the logger to the trainer to log optimizer's parameters, e.g. learning rate \n",
237 |     "    clearml_logger.attach_opt_params_handler(\n",
238 |     "        trainer, event_name=Events.ITERATION_COMPLETED(every=100), optimizer=optimizer\n",
239 |     "    )\n",
240 |     "\n",
241 |     "    # Attach the logger to the trainer to log model's weights norm\n",
242 |     "    clearml_logger.attach(\n",
243 |     "        trainer, log_handler=WeightsScalarHandler(model), event_name=Events.ITERATION_COMPLETED(every=100)\n",
244 |     "    )\n",
245 |     "\n",
246 |     "    # Attach the logger to the trainer to log model's weights as a histogram \n",
247 |     "    clearml_logger.attach(trainer, log_handler=WeightsHistHandler(model), event_name=Events.EPOCH_COMPLETED(every=100))\n",
248 |     "\n",
249 |     "    # Attach the logger to the trainer to log model’s gradients as scalars\n",
250 |     "    clearml_logger.attach(\n",
251 |     "        trainer, log_handler=GradsScalarHandler(model), event_name=Events.ITERATION_COMPLETED(every=100)\n",
252 |     "    )\n",
253 |     "\n",
254 |     "    #Attach the logger to the trainer to log model's gradients as a histogram    \n",
255 |     "    clearml_logger.attach(trainer, log_handler=GradsHistHandler(model), event_name=Events.EPOCH_COMPLETED(every=100))\n",
256 |     "\n",
257 |     "    handler = Checkpoint(\n",
258 |     "        {\"model\": model},\n",
259 |     "        ClearMLSaver(),\n",
260 |     "        n_saved=1,\n",
261 |     "        score_function=lambda e: e.state.metrics[\"accuracy\"],\n",
262 |     "        score_name=\"val_acc\",\n",
263 |     "        filename_prefix=\"best\",\n",
264 |     "        global_step_transform=global_step_from_engine(trainer),\n",
265 |     "    )\n",
266 |     "    validation_evaluator.add_event_handler(Events.EPOCH_COMPLETED, handler)\n",
267 |     "\n",
268 |     "    trainer.run(train_loader, max_epochs=epochs)\n",
269 |     "\n",
270 |     "    clearml_logger.close()\n"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 5,
276 |    "metadata": {},
277 |    "outputs": [],
278 |    "source": [
279 |     "batch_size=64\n",
280 |     "val_batch_size=1000\n",
281 |     "epochs=5 \n",
282 |     "lr=0.01\n",
283 |     "momentum=0.5"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": 6,
289 |    "metadata": {},
290 |    "outputs": [
291 |     {
292 |      "name": "stdout",
293 |      "output_type": "stream",
294 |      "text": [
295 |       "ClearML Task: created new task id=575b4d9b5c8a47589ac7edb7e5e0bb59\n",
296 |       "ClearML results page: https://app.community.clear.ml/projects/4d6b8ac509bc46da91607e83011248fb/experiments/575b4d9b5c8a47589ac7edb7e5e0bb59/output/log\n"
297 |      ]
298 |     },
299 |     {
300 |      "name": "stderr",
301 |      "output_type": "stream",
302 |      "text": [
303 |       "/home/anirudh/miniconda3/envs/ignite/lib/python3.9/site-packages/ignite/contrib/handlers/clearml_logger.py:659: UserWarning: ClearMLSaver created a temporary checkpoints directory: /tmp/ignite_checkpoints_2021_10_25_20_21_50_gkx2f03c\n",
304 |       "  warnings.warn(f\"ClearMLSaver created a temporary checkpoints directory: {dirname}\")\n",
305 |       "2021-10-25 20:21:50,778 Trainer INFO: Engine run starting with max_epochs=5.\n",
306 |       "2021-10-25 20:22:08,993 Train Evaluator INFO: Engine run starting with max_epochs=1.\n",
307 |       "2021-10-25 20:22:18,656 Train Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:10\n",
308 |       "2021-10-25 20:22:18,657 Train Evaluator INFO: Engine run complete. Time taken: 00:00:10\n",
309 |       "2021-10-25 20:22:18,658 Val Evaluator INFO: Engine run starting with max_epochs=1.\n",
310 |       "2021-10-25 20:22:29,442 Val Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:11\n",
311 |       "2021-10-25 20:22:29,443 Val Evaluator INFO: Engine run complete. Time taken: 00:00:11\n",
312 |       "2021-10-25 20:22:29,444 Trainer INFO: Epoch[1] Complete. Time taken: 00:00:39\n",
313 |       "2021-10-25 20:22:46,879 Train Evaluator INFO: Engine run starting with max_epochs=1.\n",
314 |       "2021-10-25 20:22:57,516 Train Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:11\n",
315 |       "2021-10-25 20:22:57,518 Train Evaluator INFO: Engine run complete. Time taken: 00:00:11\n",
316 |       "2021-10-25 20:22:57,519 Val Evaluator INFO: Engine run starting with max_epochs=1.\n",
317 |       "2021-10-25 20:23:12,853 Val Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:15\n",
318 |       "2021-10-25 20:23:12,854 Val Evaluator INFO: Engine run complete. Time taken: 00:00:15\n",
319 |       "2021-10-25 20:23:12,855 Trainer INFO: Epoch[2] Complete. Time taken: 00:00:43\n",
320 |       "2021-10-25 20:23:29,609 Train Evaluator INFO: Engine run starting with max_epochs=1.\n",
321 |       "2021-10-25 20:23:40,388 Train Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:11\n",
322 |       "2021-10-25 20:23:40,390 Train Evaluator INFO: Engine run complete. Time taken: 00:00:11\n",
323 |       "2021-10-25 20:23:40,390 Val Evaluator INFO: Engine run starting with max_epochs=1.\n",
324 |       "2021-10-25 20:23:55,842 Val Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:15\n",
325 |       "2021-10-25 20:23:55,845 Val Evaluator INFO: Engine run complete. Time taken: 00:00:15\n",
326 |       "2021-10-25 20:23:55,845 Trainer INFO: Epoch[3] Complete. Time taken: 00:00:43\n",
327 |       "2021-10-25 20:24:13,223 Train Evaluator INFO: Engine run starting with max_epochs=1.\n",
328 |       "2021-10-25 20:24:23,924 Train Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:11\n",
329 |       "2021-10-25 20:24:23,925 Train Evaluator INFO: Engine run complete. Time taken: 00:00:11\n",
330 |       "2021-10-25 20:24:23,925 Val Evaluator INFO: Engine run starting with max_epochs=1.\n",
331 |       "2021-10-25 20:24:39,658 Val Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:16\n",
332 |       "2021-10-25 20:24:39,661 Val Evaluator INFO: Engine run complete. Time taken: 00:00:16\n",
333 |       "2021-10-25 20:24:39,662 Trainer INFO: Epoch[4] Complete. Time taken: 00:00:44\n",
334 |       "2021-10-25 20:24:57,385 Train Evaluator INFO: Engine run starting with max_epochs=1.\n",
335 |       "2021-10-25 20:25:07,264 Train Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:10\n",
336 |       "2021-10-25 20:25:07,265 Train Evaluator INFO: Engine run complete. Time taken: 00:00:10\n",
337 |       "2021-10-25 20:25:07,267 Val Evaluator INFO: Engine run starting with max_epochs=1.\n",
338 |       "2021-10-25 20:25:22,536 Val Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:15\n",
339 |       "2021-10-25 20:25:22,537 Val Evaluator INFO: Engine run complete. Time taken: 00:00:15\n",
340 |       "2021-10-25 20:25:22,538 Trainer INFO: Epoch[5] Complete. Time taken: 00:00:43\n",
341 |       "2021-10-25 20:25:22,539 Trainer INFO: Engine run complete. Time taken: 00:03:32\n"
342 |      ]
343 |     }
344 |    ],
345 |    "source": [
346 |     "run(batch_size, val_batch_size, epochs, lr, momentum)"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "markdown",
351 |    "metadata": {},
352 |    "source": [
353 |     "If you followed along, Congratulations! You can take a look at some of the visualisations from the results page mentioned in you logs above (`ClearML results page`). Here's an example!"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "markdown",
358 |    "metadata": {},
359 |    "source": [
360 |     "![Clear ML Dashboard](assets/clearml-dashboard.png)"
361 |    ]
362 |   }
363 |  ],
364 |  "metadata": {
365 |   "accelerator": "GPU",
366 |   "kernelspec": {
367 |    "display_name": "Python 3 (ipykernel)",
368 |    "language": "python",
369 |    "name": "python3"
370 |   },
371 |   "language_info": {
372 |    "codemirror_mode": {
373 |     "name": "ipython",
374 |     "version": 3
375 |    },
376 |    "file_extension": ".py",
377 |    "mimetype": "text/x-python",
378 |    "name": "python",
379 |    "nbconvert_exporter": "python",
380 |    "pygments_lexer": "ipython3",
381 |    "version": "3.10.4"
382 |   }
383 |  },
384 |  "nbformat": 4,
385 |  "nbformat_minor": 4
386 | }
387 | 


--------------------------------------------------------------------------------
/how-to-guides/assets/clearml-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch-ignite/examples/3cde534fdff10d9ae5dd90799301d9d0a6c07868/how-to-guides/assets/clearml-dashboard.png


--------------------------------------------------------------------------------
/how-to-guides/assets/convert-pytorch2ignite.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch-ignite/examples/3cde534fdff10d9ae5dd90799301d9d0a6c07868/how-to-guides/assets/convert-pytorch2ignite.gif


--------------------------------------------------------------------------------
/tests/test_generate.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import shutil
 4 | import subprocess
 5 | from datetime import datetime
 6 | 
 7 | import pytest
 8 | 
 9 | today = datetime.now().strftime("%Y-%m-%d")
10 | 
11 | new_notebook_empty = {"cells": [], "metadata": {}, "nbformat": 4, "nbformat_minor": 4}
12 | new_notebook_empty = json.dumps(new_notebook_empty, indent=4)
13 | 
14 | cell_front_matter = {
15 |     "cell_type": "markdown",
16 |     "metadata": {},
17 |     "source": [
18 |         "<!-- ---\n",
19 |         "title: <required-title>\n",
20 |         f"date: {today}\n",
21 |         "downloads: true\n",
22 |         "weight: <required-weight> See: https://github.com/pytorch-ignite/examples/issues/30\n",
23 |         "summary: <use either this or the `<!--more-->` tag below to provide summary for this notebook, "
24 |         "and delete the other>\n"
25 |         "tags:\n",
26 |         "  - <required-tag>\n",
27 |         "--- -->\n",
28 |         "\n",
29 |         "# title-placeholder\n",
30 |         "\n",
31 |         "<If you are not using the `summary` variable above, use this space to "
32 |         "provide a summary for this notebook.>\n",
33 |         "<Otherwise, delete the `<!--more-->` below.>",
34 |         "\n",
35 |         "<!--more-->",
36 |     ],
37 | }
38 | 
39 | 
40 | @pytest.mark.parametrize("name", ["dummy_notebook", "dummy_notebook.ipynb"])
41 | def test_new_notebook_creation(name, tmp_path):
42 |     notebook_path = os.path.join(tmp_path, name)
43 | 
44 |     output = subprocess.check_output(["python", "generate.py", notebook_path]).decode("utf-8")
45 | 
46 |     if not notebook_path.endswith(".ipynb"):
47 |         notebook_path = notebook_path + ".ipynb"
48 | 
49 |     assert output == f"Generated {notebook_path}\n"
50 | 
51 | 
52 | def test_existing_blank_notebook(tmp_path):
53 |     notebook_path = os.path.join(tmp_path, "dummy_notebook_empty.ipynb")
54 |     with open(notebook_path, "w") as f:
55 |         f.write(new_notebook_empty)
56 | 
57 |     output = subprocess.check_output(["python", "generate.py", notebook_path]).decode("utf-8")
58 | 
59 |     assert output == f"Added frontmatter to {notebook_path}\n"
60 | 
61 | 
62 | def test_existing_non_empty_notebook(tmp_path):
63 |     notebook_name = "01-getting-started.ipynb"
64 |     notebook_path = os.path.join(tmp_path, notebook_name)
65 |     shutil.copyfile(os.path.join("./tutorials/beginner", notebook_name), notebook_path)
66 | 
67 |     output = subprocess.check_output(["python", "generate.py", notebook_path]).decode("utf-8")
68 | 
69 |     assert output == f"Added frontmatter to {notebook_path}\n"
70 | 
71 |     # Check to make sure its added as the first cell
72 |     with open(notebook_path) as fp:
73 |         content = json.load(fp)
74 |     assert content["cells"][0] == cell_front_matter
75 | 
76 | 
77 | @pytest.mark.parametrize("name", ["dummy_notebook_empty", "dummy_notebook_empty.ipynb"])
78 | def test_front_matter_multiple_times(name, tmp_path):
79 |     notebook_path = os.path.join(tmp_path, name)
80 | 
81 |     # This will create a notebook with frontmatter
82 |     _ = subprocess.check_output(["python", "generate.py", notebook_path])
83 | 
84 |     # Second call should not add frontmatter again
85 |     output = subprocess.check_output(["python", "generate.py", notebook_path]).decode("utf-8")
86 | 
87 |     if not notebook_path.endswith(".ipynb"):
88 |         notebook_path = notebook_path + ".ipynb"
89 | 
90 |     assert output == f"Frontmatter cell already exists in {notebook_path}. Exiting\n"
91 |     # Check to make sure only added once.
92 |     with open(notebook_path) as fp:
93 |         content = json.load(fp)
94 | 
95 |     if len(content["cells"]) > 1:
96 |         assert content["cells"][0] != content["cells"][1]
97 | 


--------------------------------------------------------------------------------
/tutorials/advanced/01-collective-communication.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "id": "5w-QlZE9mvdY"
  7 |    },
  8 |    "source": [
  9 |     "<!-- ---\n",
 10 |     "title: Collective Communication with Ignite\n",
 11 |     "weight: 1\n",
 12 |     "date: 2021-10-5\n",
 13 |     "downloads: true\n",
 14 |     "sidebar: true\n",
 15 |     "tags:\n",
 16 |     "  - idist\n",
 17 |     "  - all_gather\n",
 18 |     "  - all_reduce\n",
 19 |     "  - broadcast\n",
 20 |     "  - barrier\n",
 21 |     "--- -->\n",
 22 |     "\n",
 23 |     "# Collective Communication with Ignite"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {
 29 |     "id": "vJgTaKWU8Doq"
 30 |    },
 31 |    "source": [
 32 |     "In this tutorial, we will see how to use advanced distributed functions like `all_reduce()`, `all_gather()`, `broadcast()` and `barrier()`. We will discuss unique use cases for all of them and represent them visually.\n",
 33 |     "\n",
 34 |     "<!--more-->"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {
 40 |     "id": "Qhiy_ylcn2GD"
 41 |    },
 42 |    "source": [
 43 |     "## Required Dependencies"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {
 50 |     "id": "1zevsoVQ4nx7"
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "!pip install pytorch-ignite"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {
 60 |     "id": "TrvIsRKQn42e"
 61 |    },
 62 |    "source": [
 63 |     "## Imports"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 2,
 69 |    "metadata": {
 70 |     "id": "lMphyBmmmvdw",
 71 |     "pycharm": {
 72 |      "is_executing": false
 73 |     }
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "import torch\n",
 78 |     "\n",
 79 |     "import ignite.distributed as idist"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {
 85 |     "id": "Y2KPTliNC2r9"
 86 |    },
 87 |    "source": [
 88 |     "## All Reduce\n",
 89 |     "\n",
 90 |     "![All Reduce Diagram](https://github.com/pytorch-ignite/examples/blob/main/tutorials/assets/all-reduce.png?raw=1)\n",
 91 |     "\n",
 92 |     "The [`all_reduce()`](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.all_reduce) method is used to collect specified tensors from each process and make them available on every node then perform a specified operation (sum, product, min, max, etc) on them. Let's spawn 3 processes with ranks 0, 1 and 2 and define a `tensor` on all of them. If we performed `all_reduce` with the operation SUM on `tensor` then `tensor` on all ranks will be gathered, added and stored in `tensor` as shown below:"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 3,
 98 |    "metadata": {
 99 |     "id": "GHT6EftPOwUq"
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "def all_reduce_example(local_rank):\n",
104 |     "    tensor = torch.arange(2, dtype=torch.int64) + 1 + 2 * local_rank\n",
105 |     "    print(f\"Rank {local_rank}, Initial value: {tensor}\")\n",
106 |     "\n",
107 |     "    idist.all_reduce(tensor, op=\"SUM\")\n",
108 |     "    print(f\"Rank {local_rank}, After performing all_reduce: {tensor}\")"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {
114 |     "id": "uz5mYRS76gjm"
115 |    },
116 |    "source": [
117 |     "We can use [idist.spawn](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.spawn) to spawn 3 processes (`nproc_per_node`) and execute the above function."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 12,
123 |    "metadata": {
124 |     "colab": {
125 |      "base_uri": "https://localhost:8080/"
126 |     },
127 |     "id": "yCc1OxDg3X38",
128 |     "outputId": "97640f5c-aae7-45d8-b4de-21b5ff49ce3d"
129 |    },
130 |    "outputs": [
131 |     {
132 |      "name": "stdout",
133 |      "output_type": "stream",
134 |      "text": [
135 |       "Rank 0, Initial value: tensor([1, 2])\n",
136 |       "Rank 2, Initial value: tensor([5, 6])\n",
137 |       "Rank 1, Initial value: tensor([3, 4])\n",
138 |       "Rank 0, After performing all_reduce: tensor([ 9, 12])\n",
139 |       "Rank 1, After performing all_reduce: tensor([ 9, 12])\n",
140 |       "Rank 2, After performing all_reduce: tensor([ 9, 12])\n"
141 |      ]
142 |     }
143 |    ],
144 |    "source": [
145 |     "idist.spawn(backend=\"gloo\", fn=all_reduce_example, args=(), nproc_per_node=3)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {
151 |     "id": "FLntezo0NhQg"
152 |    },
153 |    "source": [
154 |     "Now let's assume a more real world scenario - You need to find the average of all the gradients available on different processes. \n",
155 |     "\n",
156 |     "> First, we get the number of GPUs available, with the get_world_size method. Then, for every model parameter, we do the following:\n",
157 |     ">\n",
158 |     ">    1. Gather the gradients on each process\n",
159 |     ">    2. Apply the sum operation on the gradients\n",
160 |     ">    3. Divide by the world size to average them\n",
161 |     ">\n",
162 |     "> Finally, we can go on to update the model parameters using the averaged gradients!\n",
163 |     ">\n",
164 |     "> -- <cite>[Distributed Deep Learning 101: Introduction](https://towardsdatascience.com/distributed-deep-learning-101-introduction-ebfc1bcd59d9)</cite>\n",
165 |     "\n",
166 |     "You can get the number of GPUs (processes) available using another helper method [`idist.get_world_size()`](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.get_world_size) and then use `all_reduce()` to collect the gradients and apply the SUM operation."
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 7,
172 |    "metadata": {
173 |     "id": "0j_ErUWhHpTl"
174 |    },
175 |    "outputs": [],
176 |    "source": [
177 |     "def average_gradients(model):\n",
178 |     "    num_processes = idist.get_world_size()\n",
179 |     "    for param in model.parameters():\n",
180 |     "        idist.all_reduce(param.grad.data, op=\"SUM\")\n",
181 |     "        param.grad.data = param.grad.data / num_processes"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {
187 |     "id": "7w9oIcIiC6_4"
188 |    },
189 |    "source": [
190 |     "## All Gather\n",
191 |     "\n",
192 |     "![All Gather Diagram](https://github.com/pytorch-ignite/examples/blob/main/tutorials/assets/all-gather.png?raw=1)\n",
193 |     "\n",
194 |     "The [`all_gather()`](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.all_gather) method is used when you just want to collect a tensor, number or string across all participating processes. As a basic example, suppose you have to collect all the different values stored in `num` on all ranks. You can achieve this by using `all_gather` as below:"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 8,
200 |    "metadata": {
201 |     "id": "Q1ZD4jPK5CVm"
202 |    },
203 |    "outputs": [],
204 |    "source": [
205 |     "def all_gather_example(local_rank):\n",
206 |     "    num = 2.0 * idist.get_rank()\n",
207 |     "    print(f\"Rank {local_rank}, Initial value: {num}\")\n",
208 |     "\n",
209 |     "    all_nums = idist.all_gather(num)\n",
210 |     "    print(f\"Rank {local_rank}, After performing all_gather: {all_nums}\")"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 11,
216 |    "metadata": {
217 |     "colab": {
218 |      "base_uri": "https://localhost:8080/"
219 |     },
220 |     "id": "DyGu-S7I5Uzw",
221 |     "outputId": "50c9e712-819e-4e6e-ef53-305e27787804"
222 |    },
223 |    "outputs": [
224 |     {
225 |      "name": "stdout",
226 |      "output_type": "stream",
227 |      "text": [
228 |       "Rank 0, Initial value: 0.0\n",
229 |       "Rank 2, Initial value: 4.0\n",
230 |       "Rank 1, Initial value: 2.0\n",
231 |       "Rank 2, After performing all_gather: [0.0, 2.0, 4.0]\n",
232 |       "Rank 0, After performing all_gather: [0.0, 2.0, 4.0]\n",
233 |       "Rank 1, After performing all_gather: [0.0, 2.0, 4.0]\n"
234 |      ]
235 |     }
236 |    ],
237 |    "source": [
238 |     "idist.spawn(backend=\"gloo\", fn=all_gather_example, args=(), nproc_per_node=3)"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {
244 |     "id": "FdTcvz4pStfD"
245 |    },
246 |    "source": [
247 |     "Now let's assume you need to gather the predicted values which are distributed across all the processes on the main process so you could store them to a file. Here is how you can do it: "
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": 13,
253 |    "metadata": {
254 |     "id": "NRvgasbLC8Ne"
255 |    },
256 |    "outputs": [],
257 |    "source": [
258 |     "def write_preds_to_file(predictions, filename):\n",
259 |     "    prediction_tensor = torch.tensor(predictions)\n",
260 |     "    prediction_tensor = idist.all_gather(prediction_tensor)\n",
261 |     "\n",
262 |     "    if idist.get_rank() == 0:\n",
263 |     "        torch.save(prediction_tensor, filename)"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "markdown",
268 |    "metadata": {
269 |     "id": "Ib9PfbieNMw_"
270 |    },
271 |    "source": [
272 |     "**Note:** In the above example, only the main process required the gathered values and not all the processes. This can also be done via the `gather()` method."
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {
278 |     "id": "0fsu-NybC8t1"
279 |    },
280 |    "source": [
281 |     "## Broadcast\n",
282 |     "\n",
283 |     "![Broadcast Diagram](https://github.com/pytorch-ignite/examples/blob/main/tutorials/assets/broadcast.png?raw=1)\n",
284 |     "\n",
285 |     "The [`broadcast()`](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.broadcast) method copies a tensor, float or string from a source process to all the other processes. For example, you need to send a message from rank 0 to all other ranks. You can do this by creating the actual message on rank 0 and a placeholder on all other ranks, then broadcast the message mentioning a source rank. You can also use `safe_mode=True` in case the placeholder is not defined on all ranks. "
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": 14,
291 |    "metadata": {
292 |     "id": "TWowyRRw55XM"
293 |    },
294 |    "outputs": [],
295 |    "source": [
296 |     "def broadcast_example(local_rank):\n",
297 |     "    message = f\"hello from rank {idist.get_rank()}\"\n",
298 |     "    print(f\"Rank {local_rank}, Initial value: {message}\")\n",
299 |     "\n",
300 |     "    message = idist.broadcast(message, src=0)\n",
301 |     "    print(f\"Rank {local_rank}, After performing broadcast: {message}\")"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": 15,
307 |    "metadata": {
308 |     "colab": {
309 |      "base_uri": "https://localhost:8080/"
310 |     },
311 |     "id": "SYXfJFBfUYiV",
312 |     "outputId": "3d81fbed-4531-4fb3-c671-775d4649653d"
313 |    },
314 |    "outputs": [
315 |     {
316 |      "name": "stdout",
317 |      "output_type": "stream",
318 |      "text": [
319 |       "Rank 1, Initial value: hello from rank 1\n",
320 |       "Rank 2, Initial value: hello from rank 2\n",
321 |       "Rank 0, Initial value: hello from rank 0\n",
322 |       "Rank 2, After performing broadcast: hello from rank 0\n",
323 |       "Rank 0, After performing broadcast: hello from rank 0\n",
324 |       "Rank 1, After performing broadcast: hello from rank 0\n"
325 |      ]
326 |     }
327 |    ],
328 |    "source": [
329 |     "idist.spawn(backend=\"gloo\", fn=broadcast_example, args=(), nproc_per_node=3)"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "markdown",
334 |    "metadata": {
335 |     "id": "cVXVi2rcTz9X"
336 |    },
337 |    "source": [
338 |     "For a real world use case, let's assume you need to gather the predicted and actual values from all the processes on rank 0 for computing a metric and avoiding a memory error. You can do this by first using `all_gather()`, then computing the metric and finally using `broadcast()` to share the result with all processes. `src` below refers to the rank of the source process."
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": null,
344 |    "metadata": {
345 |     "id": "7aXwKnrTC96T"
346 |    },
347 |    "outputs": [],
348 |    "source": [
349 |     "def compute_metric(prediction_tensor, target_tensor):\n",
350 |     "\n",
351 |     "    prediction_tensor = idist.all_gather(prediction_tensor)\n",
352 |     "    target_tensor = idist.all_gather(target_tensor)\n",
353 |     "\n",
354 |     "    result = 0.0\n",
355 |     "    if idist.get_rank() == 0:\n",
356 |     "        result = compute_fn(prediction_tensor, target_tensor)\n",
357 |     "\n",
358 |     "    result = idist.broadcast(result, src=0)\n",
359 |     "\n",
360 |     "    return result"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "markdown",
365 |    "metadata": {
366 |     "id": "p5ma7l5cDIuC"
367 |    },
368 |    "source": [
369 |     "## Barrier\n",
370 |     "\n",
371 |     "The [`barrier()`](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.barrier) method helps synchronize all processes. For example - while downloading data during training, we have to make sure only the main process (`rank = 0`) downloads the datasets to prevent the sub processes (`rank > 0`) from downloading the same file to the same path at the same time. This way all sub processes get a copy of this already downloaded dataset. This is where we can utilize `barrier()` to make the sub processes wait until the main process downloads the datasets. Once that is done, all the subprocesses instantiate the datasets, while the main process waits. Finally, all the processes are synced up."
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "metadata": {
378 |     "id": "XInr0zlhDJl6"
379 |    },
380 |    "outputs": [],
381 |    "source": [
382 |     "def get_datasets(config):\n",
383 |     "    if idist.get_local_rank() > 0:\n",
384 |     "        idist.barrier()\n",
385 |     "\n",
386 |     "    train_dataset, test_dataset = get_train_test_datasets(config[\"data_path\"])\n",
387 |     "\n",
388 |     "    if idist.get_local_rank() == 0:\n",
389 |     "        idist.barrier()\n",
390 |     "\n",
391 |     "    return train_dataset, test_dataset"
392 |    ]
393 |   }
394 |  ],
395 |  "metadata": {
396 |   "colab": {
397 |    "collapsed_sections": [],
398 |    "name": "idist-collective-communication.ipynb",
399 |    "provenance": []
400 |   },
401 |   "kernelspec": {
402 |    "display_name": "Python 3 (ipykernel)",
403 |    "language": "python",
404 |    "name": "python3"
405 |   },
406 |   "language_info": {
407 |    "codemirror_mode": {
408 |     "name": "ipython",
409 |     "version": 3
410 |    },
411 |    "file_extension": ".py",
412 |    "mimetype": "text/x-python",
413 |    "name": "python",
414 |    "nbconvert_exporter": "python",
415 |    "pygments_lexer": "ipython3",
416 |    "version": "3.10.4"
417 |   },
418 |   "pycharm": {
419 |    "stem_cell": {
420 |     "cell_type": "raw",
421 |     "metadata": {
422 |      "collapsed": false
423 |     },
424 |     "source": []
425 |    }
426 |   }
427 |  },
428 |  "nbformat": 4,
429 |  "nbformat_minor": 4
430 | }
431 | 


--------------------------------------------------------------------------------
/tutorials/assets/all-gather.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch-ignite/examples/3cde534fdff10d9ae5dd90799301d9d0a6c07868/tutorials/assets/all-gather.png


--------------------------------------------------------------------------------
/tutorials/assets/all-reduce.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch-ignite/examples/3cde534fdff10d9ae5dd90799301d9d0a6c07868/tutorials/assets/all-reduce.png


--------------------------------------------------------------------------------
/tutorials/assets/broadcast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytorch-ignite/examples/3cde534fdff10d9ae5dd90799301d9d0a6c07868/tutorials/assets/broadcast.png


--------------------------------------------------------------------------------
/tutorials/beginner/01-getting-started.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "HgljXEAJEcFq",
  6 |    "metadata": {
  7 |     "id": "HgljXEAJEcFq"
  8 |    },
  9 |    "source": [
 10 |     "<!-- ---\n",
 11 |     "title: Getting Started\n",
 12 |     "weight: 1\n",
 13 |     "date: 2021-07-27\n",
 14 |     "downloads: true\n",
 15 |     "tags:\n",
 16 |     "  - PyTorch-Ignite\n",
 17 |     "--- -->\n",
 18 |     "\n",
 19 |     "# Getting Started"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "id": "P9VjKOirEcFu",
 25 |    "metadata": {
 26 |     "id": "P9VjKOirEcFu"
 27 |    },
 28 |    "source": [
 29 |     "Welcome to **PyTorch-Ignite**’s quick start guide that covers the\n",
 30 |     "essentials of getting a project up and running while walking through\n",
 31 |     "basic concepts of Ignite. In just a few lines of code, you can get your\n",
 32 |     "model trained and validated. The complete code can be found at the end\n",
 33 |     "of this guide.\n",
 34 |     "\n",
 35 |     "<!--more-->"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "id": "1QNvbg3SEcFw",
 41 |    "metadata": {
 42 |     "id": "1QNvbg3SEcFw"
 43 |    },
 44 |    "source": [
 45 |     "## Prerequisites\n",
 46 |     "\n",
 47 |     "This tutorial assumes you are familiar with the:\n",
 48 |     "\n",
 49 |     "1.  Basics of Python and deep learning\n",
 50 |     "2.  Structure of PyTorch code"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "id": "XTHzzYyoEcFy",
 56 |    "metadata": {
 57 |     "id": "XTHzzYyoEcFy"
 58 |    },
 59 |    "source": [
 60 |     "## Installation\n",
 61 |     "\n",
 62 |     "From `pip`\n",
 63 |     "\n",
 64 |     "``` shell\n",
 65 |     "pip install pytorch-ignite\n",
 66 |     "```\n",
 67 |     "\n",
 68 |     "From `conda`\n",
 69 |     "\n",
 70 |     "``` shell\n",
 71 |     "conda install ignite -c pytorch\n",
 72 |     "```\n",
 73 |     "\n",
 74 |     "See [here](https://pytorch-ignite.ai/how-to-guides/installation/) for other installation\n",
 75 |     "options."
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "id": "DcnSr5sGEcFz",
 81 |    "metadata": {
 82 |     "id": "DcnSr5sGEcFz"
 83 |    },
 84 |    "source": [
 85 |     "## Code\n",
 86 |     "\n",
 87 |     "Import the following:"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "id": "Saizk3heEcFz",
 94 |    "metadata": {
 95 |     "id": "Saizk3heEcFz"
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "import torch\n",
100 |     "from torch import nn\n",
101 |     "from torch.utils.data import DataLoader\n",
102 |     "from torchvision.datasets import MNIST\n",
103 |     "from torchvision.models import resnet18\n",
104 |     "from torchvision.transforms import Compose, Normalize, ToTensor\n",
105 |     "\n",
106 |     "from ignite.engine import Engine, Events, create_supervised_trainer, create_supervised_evaluator\n",
107 |     "from ignite.metrics import Accuracy, Loss\n",
108 |     "from ignite.handlers import ModelCheckpoint\n",
109 |     "from ignite.contrib.handlers import TensorboardLogger, global_step_from_engine"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "id": "ecMYtJF7OvgT",
115 |    "metadata": {
116 |     "id": "ecMYtJF7OvgT"
117 |    },
118 |    "source": [
119 |     "Speed things up by setting [device](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device) to `cuda` if available else `cpu`."
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "id": "sdjDKcFhOuQn",
126 |    "metadata": {
127 |     "id": "sdjDKcFhOuQn"
128 |    },
129 |    "outputs": [],
130 |    "source": [
131 |     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "id": "4r_PUH1yEcF1",
137 |    "metadata": {
138 |     "id": "4r_PUH1yEcF1"
139 |    },
140 |    "source": [
141 |     "Define a class of your model or use the predefined ResNet18 model (modified for MNIST) below, instantiate it and move it to device:"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "id": "dVSVAT0OEcF1",
148 |    "metadata": {
149 |     "id": "dVSVAT0OEcF1"
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "class Net(nn.Module):\n",
154 |     "\n",
155 |     "    def __init__(self):\n",
156 |     "        super(Net, self).__init__()\n",
157 |     "        \n",
158 |     "        # Changed the output layer to output 10 classes instead of 1000 classes\n",
159 |     "        self.model = resnet18(num_classes=10)\n",
160 |     "\n",
161 |     "        # Changed the input layer to take grayscale images for MNIST instead of RGB images\n",
162 |     "        self.model.conv1 = nn.Conv2d(\n",
163 |     "            1, 64, kernel_size=3, padding=1, bias=False\n",
164 |     "        )\n",
165 |     "\n",
166 |     "    def forward(self, x):\n",
167 |     "        return self.model(x)\n",
168 |     "\n",
169 |     "\n",
170 |     "model = Net().to(device)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "id": "DDIW2zedEcF3",
176 |    "metadata": {
177 |     "id": "DDIW2zedEcF3"
178 |    },
179 |    "source": [
180 |     "Now let us define the training and validation datasets (as\n",
181 |     "[torch.utils.data.DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader))\n",
182 |     "and store them in `train_loader` and `val_loader` respectively. We have\n",
183 |     "used the [MNIST](https://pytorch.org/vision/stable/datasets.html#mnist)\n",
184 |     "dataset for ease of understanding.\n"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "id": "PFNgx_-TEcF4",
191 |    "metadata": {
192 |     "id": "PFNgx_-TEcF4"
193 |    },
194 |    "outputs": [],
195 |    "source": [
196 |     "data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])\n",
197 |     "\n",
198 |     "train_loader = DataLoader(\n",
199 |     "    MNIST(download=True, root=\".\", transform=data_transform, train=True), batch_size=128, shuffle=True\n",
200 |     ")\n",
201 |     "\n",
202 |     "val_loader = DataLoader(\n",
203 |     "    MNIST(download=True, root=\".\", transform=data_transform, train=False), batch_size=256, shuffle=False\n",
204 |     ")"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "id": "VC9BUtWXEcF6",
210 |    "metadata": {
211 |     "id": "VC9BUtWXEcF6"
212 |    },
213 |    "source": [
214 |     "Finally, we will specify the optimizer and the loss function:"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "id": "6VkGmtVZEcF7",
221 |    "metadata": {
222 |     "id": "6VkGmtVZEcF7"
223 |    },
224 |    "outputs": [],
225 |    "source": [
226 |     "optimizer = torch.optim.RMSprop(model.parameters(), lr=0.005)\n",
227 |     "criterion = nn.CrossEntropyLoss()"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "id": "cb-ak9gEEcF7",
233 |    "metadata": {
234 |     "id": "cb-ak9gEEcF7"
235 |    },
236 |    "source": [
237 |     "And we’re done with setting up the important parts of the project.\n",
238 |     "PyTorch-Ignite will handle all other boilerplate code as we will see\n",
239 |     "below. Next we have to define a trainer engine by passing our model,\n",
240 |     "optimizer and loss function to\n",
241 |     "[`create_supervised_trainer`](https://pytorch.org/ignite/generated/ignite.engine.create_supervised_trainer.html),\n",
242 |     "and two evaluator engines by passing Ignite’s out-of-the-box\n",
243 |     "[metrics](https://pytorch.org/ignite/metrics.html#complete-list-of-metrics)\n",
244 |     "and the model to\n",
245 |     "[`create_supervised_evaluator`](https://pytorch.org/ignite/generated/ignite.engine.create_supervised_evaluator.html#create-supervised-evaluator). We have defined separate evaluator engines for training and validation because they will serve different functions as we will see later in this tutorial:"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "id": "NufcPqJaEcF8",
252 |    "metadata": {
253 |     "id": "NufcPqJaEcF8"
254 |    },
255 |    "outputs": [],
256 |    "source": [
257 |     "trainer = create_supervised_trainer(model, optimizer, criterion, device)\n",
258 |     "\n",
259 |     "val_metrics = {\n",
260 |     "    \"accuracy\": Accuracy(),\n",
261 |     "    \"loss\": Loss(criterion)\n",
262 |     "}\n",
263 |     "\n",
264 |     "train_evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)\n",
265 |     "val_evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "id": "S7YThetiEcF8",
271 |    "metadata": {
272 |     "id": "S7YThetiEcF8"
273 |    },
274 |    "source": [
275 |     "The objects `trainer`, `train_evaluator` and `val_evaluator` are all instances of\n",
276 |     "[`Engine`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine) - the main component of Ignite, which is essentially an abstraction over\n",
277 |     "the training or validation loop.\n",
278 |     "\n",
279 |     "If you need more control over your training and validation loops, you\n",
280 |     "can create custom `trainer`, `train_evaluator` and `val_evaluator` objects by wrapping the step\n",
281 |     "logic in `Engine` :\n",
282 |     "\n",
283 |     "```python\n",
284 |     "def train_step(engine, batch):\n",
285 |     "    model.train()\n",
286 |     "    optimizer.zero_grad()\n",
287 |     "    x, y = batch[0].to(device), batch[1].to(device)\n",
288 |     "    y_pred = model(x)\n",
289 |     "    loss = criterion(y_pred, y)\n",
290 |     "    loss.backward()\n",
291 |     "    optimizer.step()\n",
292 |     "    return loss.item()\n",
293 |     "\n",
294 |     "trainer = Engine(train_step)\n",
295 |     "\n",
296 |     "def validation_step(engine, batch):\n",
297 |     "    model.eval()\n",
298 |     "    with torch.no_grad():\n",
299 |     "        x, y = batch[0].to(device), batch[1].to(device)\n",
300 |     "        y_pred = model(x)\n",
301 |     "        return y_pred, y\n",
302 |     "\n",
303 |     "train_evaluator = Engine(validation_step)\n",
304 |     "val_evaluator = Engine(validation_step)\n",
305 |     "\n",
306 |     "# Attach metrics to the evaluators\n",
307 |     "for name, metric in val_metrics.items():\n",
308 |     "    metric.attach(train_evaluator, name)\n",
309 |     "\n",
310 |     "for name, metric in val_metrics.items():\n",
311 |     "    metric.attach(val_evaluator, name)\n",
312 |     "```"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "markdown",
317 |    "id": "Sw90sOK9EcF9",
318 |    "metadata": {
319 |     "id": "Sw90sOK9EcF9"
320 |    },
321 |    "source": [
322 |     "We can customize the code further by adding all kinds of event handlers.\n",
323 |     "`Engine` allows adding handlers on various events that are triggered\n",
324 |     "during the run. When an event is triggered, attached handlers\n",
325 |     "(functions) are executed. Thus, for logging purposes we add a function\n",
326 |     "to be executed at the end of every `log_interval`-th iteration:"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": null,
332 |    "id": "YGm_-loUEcF9",
333 |    "metadata": {
334 |     "id": "YGm_-loUEcF9"
335 |    },
336 |    "outputs": [],
337 |    "source": [
338 |     "# How many batches to wait before logging training status\n",
339 |     "log_interval = 100"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": null,
345 |    "id": "V3xpFBI6EcF9",
346 |    "metadata": {
347 |     "id": "V3xpFBI6EcF9"
348 |    },
349 |    "outputs": [],
350 |    "source": [
351 |     "@trainer.on(Events.ITERATION_COMPLETED(every=log_interval))\n",
352 |     "def log_training_loss(engine):\n",
353 |     "    print(f\"Epoch[{engine.state.epoch}], Iter[{engine.state.iteration}] Loss: {engine.state.output:.2f}\")"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "markdown",
358 |    "id": "O6uwwXO8EcF-",
359 |    "metadata": {
360 |     "id": "O6uwwXO8EcF-"
361 |    },
362 |    "source": [
363 |     "or equivalently without the decorator but attaching the handler function\n",
364 |     "to the `trainer` via\n",
365 |     "[`add_event_handler`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine.add_event_handler)\n",
366 |     "\n",
367 |     "``` python\n",
368 |     "def log_training_loss(engine):\n",
369 |     "    print(f\"Epoch[{engine.state.epoch}], Iter[{engine.state.iteration}] Loss: {engine.state.output:.2f}\")\n",
370 |     "\n",
371 |     "trainer.add_event_handler(Events.ITERATION_COMPLETED, log_training_loss)\n",
372 |     "```"
373 |    ]
374 |   },
375 |   {
376 |    "cell_type": "markdown",
377 |    "id": "quQzbAv6EcF-",
378 |    "metadata": {
379 |     "id": "quQzbAv6EcF-"
380 |    },
381 |    "source": [
382 |     "After an epoch ends during training, we can compute the training and\n",
383 |     "validation metrics by running `train_evaluator` on `train_loader` and `val_evaluator` on\n",
384 |     "`val_loader` respectively. Hence we will attach two additional handlers to `trainer`\n",
385 |     "when an epoch completes:"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "code",
390 |    "execution_count": null,
391 |    "id": "eCE552PFEcF_",
392 |    "metadata": {
393 |     "id": "eCE552PFEcF_"
394 |    },
395 |    "outputs": [],
396 |    "source": [
397 |     "@trainer.on(Events.EPOCH_COMPLETED)\n",
398 |     "def log_training_results(trainer):\n",
399 |     "    train_evaluator.run(train_loader)\n",
400 |     "    metrics = train_evaluator.state.metrics\n",
401 |     "    print(f\"Training Results - Epoch[{trainer.state.epoch}] Avg accuracy: {metrics['accuracy']:.2f} Avg loss: {metrics['loss']:.2f}\")\n",
402 |     "\n",
403 |     "\n",
404 |     "@trainer.on(Events.EPOCH_COMPLETED)\n",
405 |     "def log_validation_results(trainer):\n",
406 |     "    val_evaluator.run(val_loader)\n",
407 |     "    metrics = val_evaluator.state.metrics\n",
408 |     "    print(f\"Validation Results - Epoch[{trainer.state.epoch}] Avg accuracy: {metrics['accuracy']:.2f} Avg loss: {metrics['loss']:.2f}\")"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "markdown",
413 |    "id": "7JRLbhiw903w",
414 |    "metadata": {
415 |     "id": "7JRLbhiw903w"
416 |    },
417 |    "source": [
418 |     "We can use [`ModelCheckpoint()`](https://pytorch.org/ignite/generated/ignite.handlers.checkpoint.ModelCheckpoint.html#modelcheckpoint) as shown below to save the `n_saved` best models determined by a metric (here accuracy) after each epoch is completed. We attach `model_checkpoint` to `val_evaluator` because we want the two models with the highest accuracies on the validation dataset rather than the training dataset. This is why we defined two separate evaluators (`val_evaluator` and `train_evaluator`) before."
419 |    ]
420 |   },
421 |   {
422 |    "cell_type": "code",
423 |    "execution_count": null,
424 |    "id": "W6Zd7vKn1LLO",
425 |    "metadata": {
426 |     "id": "W6Zd7vKn1LLO"
427 |    },
428 |    "outputs": [],
429 |    "source": [
430 |     "# Score function to return current value of any metric we defined above in val_metrics\n",
431 |     "def score_function(engine):\n",
432 |     "    return engine.state.metrics[\"accuracy\"]\n",
433 |     "\n",
434 |     "# Checkpoint to store n_saved best models wrt score function\n",
435 |     "model_checkpoint = ModelCheckpoint(\n",
436 |     "    \"checkpoint\",\n",
437 |     "    n_saved=2,\n",
438 |     "    filename_prefix=\"best\",\n",
439 |     "    score_function=score_function,\n",
440 |     "    score_name=\"accuracy\",\n",
441 |     "    global_step_transform=global_step_from_engine(trainer), # helps fetch the trainer's state\n",
442 |     ")\n",
443 |     "  \n",
444 |     "# Save the model after every epoch of val_evaluator is completed\n",
445 |     "val_evaluator.add_event_handler(Events.COMPLETED, model_checkpoint, {\"model\": model})"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "markdown",
450 |    "id": "KAB0QtrgiTLK",
451 |    "metadata": {
452 |     "id": "KAB0QtrgiTLK"
453 |    },
454 |    "source": [
455 |     "We will use [`TensorboardLogger()`](https://pytorch.org/ignite/generated/ignite.contrib.handlers.tensorboard_logger.html#ignite.contrib.handlers.tensorboard_logger.TensorboardLogger) to log trainer's loss, and training and validation metrics separately."
456 |    ]
457 |   },
458 |   {
459 |    "cell_type": "code",
460 |    "execution_count": null,
461 |    "id": "Rdt6AE6oeh6k",
462 |    "metadata": {
463 |     "id": "Rdt6AE6oeh6k"
464 |    },
465 |    "outputs": [],
466 |    "source": [
467 |     "# Define a Tensorboard logger\n",
468 |     "tb_logger = TensorboardLogger(log_dir=\"tb-logger\")\n",
469 |     "\n",
470 |     "# Attach handler to plot trainer's loss every 100 iterations\n",
471 |     "tb_logger.attach_output_handler(\n",
472 |     "    trainer,\n",
473 |     "    event_name=Events.ITERATION_COMPLETED(every=log_interval),\n",
474 |     "    tag=\"training\",\n",
475 |     "    output_transform=lambda loss: {\"batch_loss\": loss},\n",
476 |     ")\n",
477 |     "\n",
478 |     "# Attach handler for plotting both evaluators' metrics after every epoch completes\n",
479 |     "for tag, evaluator in [(\"training\", train_evaluator), (\"validation\", val_evaluator)]:\n",
480 |     "    tb_logger.attach_output_handler(\n",
481 |     "        evaluator,\n",
482 |     "        event_name=Events.EPOCH_COMPLETED,\n",
483 |     "        tag=tag,\n",
484 |     "        metric_names=\"all\",\n",
485 |     "        global_step_transform=global_step_from_engine(trainer),\n",
486 |     "    )"
487 |    ]
488 |   },
489 |   {
490 |    "cell_type": "markdown",
491 |    "id": "Aq0qwiZrEcF_",
492 |    "metadata": {
493 |     "id": "Aq0qwiZrEcF_"
494 |    },
495 |    "source": [
496 |     "Finally, we start the engine on the training dataset and run it for 5\n",
497 |     "epochs:"
498 |    ]
499 |   },
500 |   {
501 |    "cell_type": "code",
502 |    "execution_count": null,
503 |    "id": "qnmTh4FeEcGA",
504 |    "metadata": {
505 |     "colab": {
506 |      "base_uri": "https://localhost:8080/"
507 |     },
508 |     "id": "qnmTh4FeEcGA",
509 |     "outputId": "f444d98f-8f45-44ea-bd82-9cecb6971bbe"
510 |    },
511 |    "outputs": [
512 |     {
513 |      "name": "stdout",
514 |      "output_type": "stream",
515 |      "text": [
516 |       "Epoch[1], Iter[100] Loss: 0.19\n",
517 |       "Epoch[1], Iter[200] Loss: 0.13\n",
518 |       "Epoch[1], Iter[300] Loss: 0.08\n",
519 |       "Epoch[1], Iter[400] Loss: 0.11\n",
520 |       "Training Results - Epoch[1] Avg accuracy: 0.97 Avg loss: 0.09\n",
521 |       "Validation Results - Epoch[1] Avg accuracy: 0.97 Avg loss: 0.08\n",
522 |       "Epoch[2], Iter[500] Loss: 0.07\n",
523 |       "Epoch[2], Iter[600] Loss: 0.04\n",
524 |       "Epoch[2], Iter[700] Loss: 0.09\n",
525 |       "Epoch[2], Iter[800] Loss: 0.07\n",
526 |       "Epoch[2], Iter[900] Loss: 0.16\n",
527 |       "Training Results - Epoch[2] Avg accuracy: 0.93 Avg loss: 0.20\n",
528 |       "Validation Results - Epoch[2] Avg accuracy: 0.93 Avg loss: 0.20\n",
529 |       "Epoch[3], Iter[1000] Loss: 0.02\n",
530 |       "Epoch[3], Iter[1100] Loss: 0.02\n",
531 |       "Epoch[3], Iter[1200] Loss: 0.05\n",
532 |       "Epoch[3], Iter[1300] Loss: 0.06\n",
533 |       "Epoch[3], Iter[1400] Loss: 0.06\n",
534 |       "Training Results - Epoch[3] Avg accuracy: 0.94 Avg loss: 0.20\n",
535 |       "Validation Results - Epoch[3] Avg accuracy: 0.94 Avg loss: 0.23\n",
536 |       "Epoch[4], Iter[1500] Loss: 0.08\n",
537 |       "Epoch[4], Iter[1600] Loss: 0.02\n",
538 |       "Epoch[4], Iter[1700] Loss: 0.08\n",
539 |       "Epoch[4], Iter[1800] Loss: 0.07\n",
540 |       "Training Results - Epoch[4] Avg accuracy: 0.98 Avg loss: 0.06\n",
541 |       "Validation Results - Epoch[4] Avg accuracy: 0.98 Avg loss: 0.07\n",
542 |       "Epoch[5], Iter[1900] Loss: 0.02\n",
543 |       "Epoch[5], Iter[2000] Loss: 0.11\n",
544 |       "Epoch[5], Iter[2100] Loss: 0.05\n",
545 |       "Epoch[5], Iter[2200] Loss: 0.02\n",
546 |       "Epoch[5], Iter[2300] Loss: 0.01\n",
547 |       "Training Results - Epoch[5] Avg accuracy: 0.99 Avg loss: 0.02\n",
548 |       "Validation Results - Epoch[5] Avg accuracy: 0.99 Avg loss: 0.03\n"
549 |      ]
550 |     },
551 |     {
552 |      "data": {
553 |       "text/plain": [
554 |        "State:\n",
555 |        "\titeration: 2345\n",
556 |        "\tepoch: 5\n",
557 |        "\tepoch_length: 469\n",
558 |        "\tmax_epochs: 5\n",
559 |        "\toutput: 0.005351857747882605\n",
560 |        "\tbatch: <class 'list'>\n",
561 |        "\tmetrics: <class 'dict'>\n",
562 |        "\tdataloader: <class 'torch.utils.data.dataloader.DataLoader'>\n",
563 |        "\tseed: <class 'NoneType'>\n",
564 |        "\ttimes: <class 'dict'>"
565 |       ]
566 |      },
567 |      "execution_count": 85,
568 |      "metadata": {
569 |       "tags": []
570 |      },
571 |      "output_type": "execute_result"
572 |     }
573 |    ],
574 |    "source": [
575 |     "trainer.run(train_loader, max_epochs=5)"
576 |    ]
577 |   },
578 |   {
579 |    "cell_type": "code",
580 |    "execution_count": null,
581 |    "id": "ZXhL1-vDgBeT",
582 |    "metadata": {
583 |     "id": "ZXhL1-vDgBeT"
584 |    },
585 |    "outputs": [],
586 |    "source": [
587 |     "# Let's close the logger and inspect our results\n",
588 |     "tb_logger.close()\n",
589 |     "\n",
590 |     "%load_ext tensorboard\n",
591 |     "\n",
592 |     "%tensorboard --logdir=."
593 |    ]
594 |   },
595 |   {
596 |    "cell_type": "code",
597 |    "execution_count": null,
598 |    "id": "_xj4NMjdArYh",
599 |    "metadata": {
600 |     "colab": {
601 |      "base_uri": "https://localhost:8080/"
602 |     },
603 |     "id": "_xj4NMjdArYh",
604 |     "outputId": "3291ca3b-809a-4ed4-d657-0b83eeb45bc5"
605 |    },
606 |    "outputs": [
607 |     {
608 |      "name": "stdout",
609 |      "output_type": "stream",
610 |      "text": [
611 |       "'best_model_4_accuracy=0.9856.pt'  'best_model_5_accuracy=0.9857.pt'\n"
612 |      ]
613 |     }
614 |    ],
615 |    "source": [
616 |     "# At last we can view our best models\n",
617 |     "!ls checkpoints"
618 |    ]
619 |   },
620 |   {
621 |    "cell_type": "markdown",
622 |    "id": "wJ9k2coEEcGD",
623 |    "metadata": {
624 |     "id": "wJ9k2coEEcGD"
625 |    },
626 |    "source": [
627 |     "## Next Steps\n",
628 |     "\n",
629 |     "1.  Check out [tutorials](https://pytorch-ignite.ai/tutorials) if you want to continue\n",
630 |     "    learning more about PyTorch-Ignite.\n",
631 |     "2.  Head over to [how-to guides](https://pytorch-ignite.ai/how-to-guides) if you’re looking\n",
632 |     "    for a specific solution.\n",
633 |     "3.  If you want to set-up a PyTorch-Ignite project, visit [Code\n",
634 |     "    Generator](https://code-generator.pytorch-ignite.ai/) to get a variety of\n",
635 |     "    easily customizable templates and out-of-the-box features."
636 |    ]
637 |   },
638 |   {
639 |    "cell_type": "markdown",
640 |    "id": "vya75pqVEcGE",
641 |    "metadata": {
642 |     "id": "vya75pqVEcGE",
643 |     "tags": []
644 |    },
645 |    "source": [
646 |     "## Complete Code\n",
647 |     "\n",
648 |     "``` python\n",
649 |     "import torch\n",
650 |     "from torch import nn\n",
651 |     "from torch.utils.data import DataLoader\n",
652 |     "from torchvision.datasets import MNIST\n",
653 |     "from torchvision.models import resnet18\n",
654 |     "from torchvision.transforms import Compose, Normalize, ToTensor\n",
655 |     "\n",
656 |     "from ignite.engine import Engine, Events, create_supervised_trainer, create_supervised_evaluator\n",
657 |     "from ignite.metrics import Accuracy, Loss\n",
658 |     "from ignite.handlers import ModelCheckpoint\n",
659 |     "from ignite.contrib.handlers import TensorboardLogger, global_step_from_engine\n",
660 |     "\n",
661 |     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
662 |     "\n",
663 |     "\n",
664 |     "class Net(nn.Module):\n",
665 |     "\n",
666 |     "    def __init__(self):\n",
667 |     "        super(Net, self).__init__()\n",
668 |     "    \n",
669 |     "        self.model = resnet18(num_classes=10)\n",
670 |     "\n",
671 |     "        self.model.conv1 = self.model.conv1 = nn.Conv2d(\n",
672 |     "            1, 64, kernel_size=3, padding=1, bias=False\n",
673 |     "        )\n",
674 |     "\n",
675 |     "    def forward(self, x):\n",
676 |     "        return self.model(x)\n",
677 |     "\n",
678 |     "\n",
679 |     "model = Net().to(device)\n",
680 |     "\n",
681 |     "data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])\n",
682 |     "\n",
683 |     "train_loader = DataLoader(\n",
684 |     "    MNIST(download=True, root=\".\", transform=data_transform, train=True), batch_size=128, shuffle=True\n",
685 |     ")\n",
686 |     "\n",
687 |     "val_loader = DataLoader(\n",
688 |     "    MNIST(download=True, root=\".\", transform=data_transform, train=False), batch_size=256, shuffle=False\n",
689 |     ")\n",
690 |     "\n",
691 |     "optimizer = torch.optim.RMSprop(model.parameters(), lr=0.005)\n",
692 |     "criterion = nn.CrossEntropyLoss()\n",
693 |     "\n",
694 |     "trainer = create_supervised_trainer(model, optimizer, criterion, device)\n",
695 |     "\n",
696 |     "val_metrics = {\n",
697 |     "    \"accuracy\": Accuracy(),\n",
698 |     "    \"loss\": Loss(criterion)\n",
699 |     "}\n",
700 |     "\n",
701 |     "train_evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)\n",
702 |     "val_evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)\n",
703 |     "\n",
704 |     "log_interval = 100\n",
705 |     "\n",
706 |     "@trainer.on(Events.ITERATION_COMPLETED(every=log_interval))\n",
707 |     "def log_training_loss(engine):\n",
708 |     "    print(f\"Epoch[{engine.state.epoch}], Iter[{engine.state.iteration}] Loss: {engine.state.output:.2f}\")\n",
709 |     "\n",
710 |     "@trainer.on(Events.EPOCH_COMPLETED)\n",
711 |     "def log_training_results(trainer):\n",
712 |     "    train_evaluator.run(train_loader)\n",
713 |     "    metrics = train_evaluator.state.metrics\n",
714 |     "    print(f\"Training Results - Epoch[{trainer.state.epoch}] Avg accuracy: {metrics['accuracy']:.2f} Avg loss: {metrics['loss']:.2f}\")\n",
715 |     "\n",
716 |     "\n",
717 |     "@trainer.on(Events.EPOCH_COMPLETED)\n",
718 |     "def log_validation_results(trainer):\n",
719 |     "    val_evaluator.run(val_loader)\n",
720 |     "    metrics = val_evaluator.state.metrics\n",
721 |     "    print(f\"Validation Results - Epoch[{trainer.state.epoch}] Avg accuracy: {metrics['accuracy']:.2f} Avg loss: {metrics['loss']:.2f}\")\n",
722 |     "\n",
723 |     "\n",
724 |     "def score_function(engine):\n",
725 |     "    return engine.state.metrics[\"accuracy\"]\n",
726 |     "\n",
727 |     "\n",
728 |     "model_checkpoint = ModelCheckpoint(\n",
729 |     "    \"checkpoint\",\n",
730 |     "    n_saved=2,\n",
731 |     "    filename_prefix=\"best\",\n",
732 |     "    score_function=score_function,\n",
733 |     "    score_name=\"accuracy\",\n",
734 |     "    global_step_transform=global_step_from_engine(trainer),\n",
735 |     ")\n",
736 |     "  \n",
737 |     "val_evaluator.add_event_handler(Events.COMPLETED, model_checkpoint, {\"model\": model})\n",
738 |     "\n",
739 |     "tb_logger = TensorboardLogger(log_dir=\"tb-logger\")\n",
740 |     "\n",
741 |     "tb_logger.attach_output_handler(\n",
742 |     "    trainer,\n",
743 |     "    event_name=Events.ITERATION_COMPLETED(every=log_interval),\n",
744 |     "    tag=\"training\",\n",
745 |     "    output_transform=lambda loss: {\"batch_loss\": loss},\n",
746 |     ")\n",
747 |     "\n",
748 |     "for tag, evaluator in [(\"training\", train_evaluator), (\"validation\", val_evaluator)]:\n",
749 |     "    tb_logger.attach_output_handler(\n",
750 |     "        evaluator,\n",
751 |     "        event_name=Events.EPOCH_COMPLETED,\n",
752 |     "        tag=tag,\n",
753 |     "        metric_names=\"all\",\n",
754 |     "        global_step_transform=global_step_from_engine(trainer),\n",
755 |     "    )\n",
756 |     "\n",
757 |     "trainer.run(train_loader, max_epochs=5)\n",
758 |     "\n",
759 |     "tb_logger.close()\n",
760 |     "```"
761 |    ]
762 |   }
763 |  ],
764 |  "metadata": {
765 |   "accelerator": "GPU",
766 |   "colab": {
767 |    "collapsed_sections": [],
768 |    "name": "getting-started.ipynb",
769 |    "provenance": []
770 |   },
771 |   "kernelspec": {
772 |    "display_name": "Python 3 (ipykernel)",
773 |    "language": "python",
774 |    "name": "python3"
775 |   },
776 |   "language_info": {
777 |    "codemirror_mode": {
778 |     "name": "ipython",
779 |     "version": 3
780 |    },
781 |    "file_extension": ".py",
782 |    "mimetype": "text/x-python",
783 |    "name": "python",
784 |    "nbconvert_exporter": "python",
785 |    "pygments_lexer": "ipython3",
786 |    "version": "3.10.4"
787 |   }
788 |  },
789 |  "nbformat": 4,
790 |  "nbformat_minor": 5
791 | }
792 | 


--------------------------------------------------------------------------------
/tutorials/beginner/02-transformers-text-classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "id": "HCS-d1T3znj2"
  7 |    },
  8 |    "source": [
  9 |     "<!-- ---\n",
 10 |     "title: Transformers for Text Classification with IMDb Reviews\n",
 11 |     "date: 2021-09-18\n",
 12 |     "downloads: true\n",
 13 |     "weight: 2\n",
 14 |     "tags:\n",
 15 |     "  - NLP\n",
 16 |     "  - BERT\n",
 17 |     "  - Transformers\n",
 18 |     "  - Text\n",
 19 |     "  - Beginner\n",
 20 |     "--- -->\n",
 21 |     "# Transformers for Text Classification with IMDb Reviews"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {
 27 |     "id": "rjZMYxFoznj9"
 28 |    },
 29 |    "source": [
 30 |     "In this tutorial we will fine tune a model from the Transformers library for text classification using PyTorch-Ignite. We will be following the [Fine-tuning a pretrained model](https://huggingface.co/transformers/training.html) tutorial for preprocessing text and defining the model, optimizer and dataloaders. <!--more--> Then we are going to use Ignite for:\n",
 31 |     "* Training and evaluating the model\n",
 32 |     "* Computing metrics\n",
 33 |     "* Setting up experiments and monitoring the model\n",
 34 |     "\n",
 35 |     "According to the tutorial, we will use the [IMDb Movie Reviews Dataset](https://ai.stanford.edu/~amaas/data/sentiment/) to classify a review as either positive or negative."
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {
 41 |     "id": "sovYyC0Zznj-"
 42 |    },
 43 |    "source": [
 44 |     "## Required Dependencies "
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "id": "7XHAD9x7znj_"
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "!pip install pytorch-ignite transformers datasets"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {
 61 |     "id": "I80XSUXpvk1i"
 62 |    },
 63 |    "source": [
 64 |     "Before we dive in, we will seed everything using [`manual_seed`](https://pytorch.org/ignite/utils.html#ignite.utils.manual_seed)."
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 2,
 70 |    "metadata": {
 71 |     "id": "enczLgLTznkH"
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "from ignite.utils import manual_seed\n",
 76 |     "\n",
 77 |     "manual_seed(42)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {
 83 |     "id": "WZYyXYB5znkH"
 84 |    },
 85 |    "source": [
 86 |     "## Basic Setup\n",
 87 |     "\n",
 88 |     "Next we will follow the tutorial and load up our dataset and tokenizer to preprocess the data."
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {
 94 |     "id": "gCVrdHOVgjtH"
 95 |    },
 96 |    "source": [
 97 |     "### Data Preprocessing"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {
104 |     "id": "PQ80tXFPwAnR"
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "from datasets import load_dataset\n",
109 |     "\n",
110 |     "raw_datasets = load_dataset(\"imdb\")"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {
117 |     "id": "inP62g7LwV5n"
118 |    },
119 |    "outputs": [],
120 |    "source": [
121 |     "from transformers import AutoTokenizer\n",
122 |     "\n",
123 |     "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-cased\")"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {
130 |     "id": "aQgWjFq1yESe"
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "def tokenize_function(examples):\n",
135 |     "    return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True)\n",
136 |     "\n",
137 |     "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {
143 |     "id": "xnLbsx2Jhurm"
144 |    },
145 |    "source": [
146 |     "We move towards the end of the tutorial for PyTorch specific instructions. Here we are extracting a larger subset of our original datasets. We also don't need to provide a seed since we seeded everything at the beginning."
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 6,
152 |    "metadata": {
153 |     "id": "iPQy_FMcxBy3"
154 |    },
155 |    "outputs": [],
156 |    "source": [
157 |     "tokenized_datasets = tokenized_datasets.remove_columns([\"text\"])\n",
158 |     "tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n",
159 |     "tokenized_datasets.set_format(\"torch\")\n",
160 |     "\n",
161 |     "small_train_dataset = tokenized_datasets[\"train\"].shuffle().select(range(5000))\n",
162 |     "small_eval_dataset = tokenized_datasets[\"test\"].shuffle().select(range(5000))"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "metadata": {
168 |     "id": "0JvnKjqRiEFP"
169 |    },
170 |    "source": [
171 |     "### Dataloaders"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 7,
177 |    "metadata": {
178 |     "id": "APNr5lgsygtw"
179 |    },
180 |    "outputs": [],
181 |    "source": [
182 |     "from torch.utils.data import DataLoader\n",
183 |     "\n",
184 |     "train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)\n",
185 |     "eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {
191 |     "id": "ax8ToOAviGGS"
192 |    },
193 |    "source": [
194 |     "### Model"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {
201 |     "id": "XVS15FELytIj"
202 |    },
203 |    "outputs": [],
204 |    "source": [
205 |     "from transformers import AutoModelForSequenceClassification\n",
206 |     "\n",
207 |     "model = AutoModelForSequenceClassification.from_pretrained(\"bert-base-cased\", num_labels=2)"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "markdown",
212 |    "metadata": {
213 |     "id": "QOI8dLDDiI7c"
214 |    },
215 |    "source": [
216 |     "### Optimizer"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 9,
222 |    "metadata": {
223 |     "id": "RIihFsPryvI_"
224 |    },
225 |    "outputs": [],
226 |    "source": [
227 |     "from transformers import AdamW\n",
228 |     "\n",
229 |     "optimizer = AdamW(model.parameters(), lr=5e-5)"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "markdown",
234 |    "metadata": {
235 |     "id": "0-Yd47-UiLFs"
236 |    },
237 |    "source": [
238 |     "### LR Scheduler\n",
239 |     "\n",
240 |     "We will use the built-in Ignite alternative of `linear` scheduler which is [`PiecewiseLinear`](https://pytorch.org/ignite/generated/ignite.handlers.param_scheduler.PiecewiseLinear.html#piecewiselinear). We will also increase the number of epochs."
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": 10,
246 |    "metadata": {
247 |     "id": "U5gBJL-uS9WG"
248 |    },
249 |    "outputs": [],
250 |    "source": [
251 |     "from ignite.contrib.handlers import PiecewiseLinear\n",
252 |     "\n",
253 |     "num_epochs = 10\n",
254 |     "num_training_steps = num_epochs * len(train_dataloader)\n",
255 |     "\n",
256 |     "milestones_values = [\n",
257 |     "        (0, 5e-5),\n",
258 |     "        (num_training_steps, 0.0),\n",
259 |     "    ]\n",
260 |     "lr_scheduler = PiecewiseLinear(\n",
261 |     "        optimizer, param_name=\"lr\", milestones_values=milestones_values\n",
262 |     "    )"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {
268 |     "id": "vEPNZEO8jRSf"
269 |    },
270 |    "source": [
271 |     "### Set Device"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": null,
277 |    "metadata": {
278 |     "id": "A0WXnFD7bg2C"
279 |    },
280 |    "outputs": [],
281 |    "source": [
282 |     "import torch\n",
283 |     "\n",
284 |     "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
285 |     "model.to(device)"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "markdown",
290 |    "metadata": {
291 |     "id": "elcEG2gojTsI"
292 |    },
293 |    "source": [
294 |     "## Create Trainer\n",
295 |     "\n",
296 |     "Ignite's [`Engine`](https://pytorch-ignite.ai/concepts/01-engine/) allows users to define a `process_function` to process a given batch of data. This function is applied to all the batches of the dataset. This is a general class that can be applied to train and validate models. A `process_function` has two parameters `engine` and `batch`."
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "markdown",
301 |    "metadata": {
302 |     "id": "iVoVnJTlWau5"
303 |    },
304 |    "source": [
305 |     "The code for processing a batch of training data in the tutorial is as follows:\n",
306 |     "\n",
307 |     "```python\n",
308 |     "for batch in train_dataloader:\n",
309 |     "    batch = {k: v.to(device) for k, v in batch.items()}\n",
310 |     "    outputs = model(**batch)\n",
311 |     "    loss = outputs.loss\n",
312 |     "    loss.backward()\n",
313 |     "\n",
314 |     "    optimizer.step()\n",
315 |     "    lr_scheduler.step()\n",
316 |     "    optimizer.zero_grad()\n",
317 |     "    progress_bar.update(1)\n",
318 |     "```\n",
319 |     "\n",
320 |     "Therefore we will define a `process_function` (called `train_step` below) to do the above tasks:\n",
321 |     "\n",
322 |     "* Set `model` in train mode. \n",
323 |     "* Move items of the `batch` to `device`.\n",
324 |     "* Perform forward pass and generate `output`.\n",
325 |     "* Extract loss.\n",
326 |     "* Perform backward pass using loss to calculate gradients for the model parameters.\n",
327 |     "* Optimize model parameters using gradients and optimizer.\n",
328 |     "\n",
329 |     "Finally, we choose to return the `loss` so we can utilize it for further processing.\n",
330 |     "\n",
331 |     "You will also notice that we do not update the `lr_scheduler` and `progress_bar` in `train_step`. This is because Ignite automatically takes care of it as we will see later in this tutorial."
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": 37,
337 |    "metadata": {
338 |     "id": "Q4ncIcYcznkQ"
339 |    },
340 |    "outputs": [],
341 |    "source": [
342 |     "def train_step(engine, batch):  \n",
343 |     "    model.train()\n",
344 |     "    \n",
345 |     "    batch = {k: v.to(device) for k, v in batch.items()}\n",
346 |     "    outputs = model(**batch)\n",
347 |     "    loss = outputs.loss\n",
348 |     "    loss.backward()\n",
349 |     "\n",
350 |     "    optimizer.step()\n",
351 |     "    optimizer.zero_grad()\n",
352 |     "\n",
353 |     "    return loss"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "markdown",
358 |    "metadata": {
359 |     "id": "n1rXDRO4Y_dp"
360 |    },
361 |    "source": [
362 |     "And then we create a model `trainer` by attaching the `train_step` to the training engine. Later, we will use `trainer` for looping over the training dataset for `num_epochs`."
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": 38,
368 |    "metadata": {
369 |     "id": "LFWgXnX4cWV1"
370 |    },
371 |    "outputs": [],
372 |    "source": [
373 |     "from ignite.engine import Engine\n",
374 |     "\n",
375 |     "trainer = Engine(train_step)"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "markdown",
380 |    "metadata": {
381 |     "id": "U4Wx_3rrns0a"
382 |    },
383 |    "source": [
384 |     "The `lr_scheduler` we defined previously was a handler. \n",
385 |     "\n",
386 |     "[Handlers](https://pytorch-ignite.ai/concepts/02-events-and-handlers/#handlers) can be any type of function (lambda functions, class methods, etc.). On top of that, Ignite provides several built-in handlers to reduce redundant code. We attach these handlers to engine which is triggered at a specific [event](https://pytorch-ignite.ai/concepts/02-events-and-handlers/). These events can be anything like the start of an iteration or the end of an epoch. [Here](https://pytorch.org/ignite/generated/ignite.engine.events.Events.html#events) is a complete list of built-in events.\n",
387 |     "\n",
388 |     "Therefore, we will attach the `lr_scheduler` (handler) to the `trainer` (`engine`) via [`add_event_handler()`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine.add_event_handler) so it can be triggered at `Events.ITERATION_STARTED` (start of an iteration) automatically."
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": null,
394 |    "metadata": {
395 |     "id": "LauRCX2Vi8Su"
396 |    },
397 |    "outputs": [],
398 |    "source": [
399 |     "from ignite.engine import Events\n",
400 |     "\n",
401 |     "trainer.add_event_handler(Events.ITERATION_STARTED, lr_scheduler)"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "markdown",
406 |    "metadata": {
407 |     "id": "vPXFO_3dc2ll"
408 |    },
409 |    "source": [
410 |     "This is the reason we did not include `lr_scheduler.step()` in `train_step()`."
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "markdown",
415 |    "metadata": {
416 |     "id": "bNSOB05LjfVB"
417 |    },
418 |    "source": [
419 |     "## Progress Bar\n",
420 |     "\n",
421 |     "Next we create an instance of Ignite's [`ProgessBar()`](https://pytorch.org/ignite/generated/ignite.contrib.handlers.tqdm_logger.html#ignite.contrib.handlers.tqdm_logger.ProgressBar) and attach it to the trainer to replace `progress_bar.update(1)`."
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "code",
426 |    "execution_count": 40,
427 |    "metadata": {
428 |     "id": "PxSVmnLica2c"
429 |    },
430 |    "outputs": [],
431 |    "source": [
432 |     "from ignite.contrib.handlers import ProgressBar\n",
433 |     "\n",
434 |     "pbar = ProgressBar()"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "markdown",
439 |    "metadata": {
440 |     "id": "X9KSg4s5eLJn"
441 |    },
442 |    "source": [
443 |     "We can either, simply track the progress:"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": 29,
449 |    "metadata": {
450 |     "id": "C7t_K2MOd6qp"
451 |    },
452 |    "outputs": [],
453 |    "source": [
454 |     "pbar.attach(trainer)"
455 |    ]
456 |   },
457 |   {
458 |    "cell_type": "markdown",
459 |    "metadata": {
460 |     "id": "sxwfbhkCeRf0"
461 |    },
462 |    "source": [
463 |     "Or also track the output of `trainer` (or `train_step`):"
464 |    ]
465 |   },
466 |   {
467 |    "cell_type": "code",
468 |    "execution_count": 41,
469 |    "metadata": {
470 |     "id": "Ib1uF2GHeQlt"
471 |    },
472 |    "outputs": [],
473 |    "source": [
474 |     "pbar.attach(trainer, output_transform=lambda x: {'loss': x})"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "markdown",
479 |    "metadata": {
480 |     "id": "Bf7_CjXvjj0K"
481 |    },
482 |    "source": [
483 |     "## Create Evaluator\n",
484 |     "\n",
485 |     "Similar to the training `process_function`, we setup a function to evaluate a single batch of train/validation/test data.\n",
486 |     "\n",
487 |     "```python\n",
488 |     "model.eval()\n",
489 |     "for batch in eval_dataloader:\n",
490 |     "    batch = {k: v.to(device) for k, v in batch.items()}\n",
491 |     "    with torch.no_grad():\n",
492 |     "        outputs = model(**batch)\n",
493 |     "\n",
494 |     "    logits = outputs.logits\n",
495 |     "    predictions = torch.argmax(logits, dim=-1)\n",
496 |     "    metric.add_batch(predictions=predictions, references=batch[\"labels\"])\n",
497 |     "```\n",
498 |     "\n",
499 |     " Here is what `evaluate_step()` below does:\n",
500 |     "\n",
501 |     "* Sets model in eval mode.\n",
502 |     "* Move items of the `batch` to `device`.\n",
503 |     "* With `torch.no_grad()`, no gradients are calculated for any succeding steps.\n",
504 |     "* Perform a forward pass on the model to calculate `outputs` from `batch`\n",
505 |     "* Get the real `predictions` from `logits` (probability of positive and negative classes).\n",
506 |     "\n",
507 |     "Finally, we return the predictions and the actual labels so that we can compute the metrics.\n",
508 |     "\n",
509 |     "You will notice that we did not compute the metrics in `evaluate_step()`. This is because Ignite provides built-in [metrics](https://pytorch-ignite.ai/concepts/04-metrics/) which we can later attach to the engine."
510 |    ]
511 |   },
512 |   {
513 |    "cell_type": "markdown",
514 |    "metadata": {
515 |     "id": "98H3y4xagmao"
516 |    },
517 |    "source": [
518 |     "**Note:** Ignite suggests attaching metrics to evaluators and not trainers because during the training the model parameters are constantly changing and it is best to evaluate model on a stationary model. This information is important as there is a difference in the functions for training and evaluating. Training returns a single scalar loss. Evaluating returns `y_pred` and `y` as that output is used to calculate metrics per batch for the entire dataset.\n",
519 |     "\n",
520 |     "All metrics in Ignite require `y_pred` and `y` as outputs of the function attached to the Engine. "
521 |    ]
522 |   },
523 |   {
524 |    "cell_type": "code",
525 |    "execution_count": 42,
526 |    "metadata": {
527 |     "id": "H3F69ZOwcUdQ"
528 |    },
529 |    "outputs": [],
530 |    "source": [
531 |     "def evaluate_step(engine, batch):\n",
532 |     "    model.eval()\n",
533 |     "\n",
534 |     "    batch = {k: v.to(device) for k, v in batch.items()}\n",
535 |     "    with torch.no_grad():\n",
536 |     "        outputs = model(**batch)\n",
537 |     "\n",
538 |     "    logits = outputs.logits\n",
539 |     "    predictions = torch.argmax(logits, dim=-1)\n",
540 |     "\n",
541 |     "    return {'y_pred': predictions, 'y': batch[\"labels\"]}"
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "markdown",
546 |    "metadata": {
547 |     "id": "VFrSJYx-mutP"
548 |    },
549 |    "source": [
550 |     "Below we create two engines, a training evaluator and a validation evaluator. `train_evaluator` and `validation_evaluator` use the same function but they serve different purposes as we will see later in this tutorial."
551 |    ]
552 |   },
553 |   {
554 |    "cell_type": "code",
555 |    "execution_count": 43,
556 |    "metadata": {
557 |     "id": "xfxf_TGadIjc"
558 |    },
559 |    "outputs": [],
560 |    "source": [
561 |     "train_evaluator = Engine(evaluate_step)\n",
562 |     "validation_evaluator = Engine(evaluate_step)"
563 |    ]
564 |   },
565 |   {
566 |    "cell_type": "markdown",
567 |    "metadata": {
568 |     "id": "ESp1oe8jjtI6"
569 |    },
570 |    "source": [
571 |     "## Attach Metrics\n",
572 |     "\n",
573 |     "The 🤗 tutorial defines one metric, accuracy, to be used for evaluation:\n",
574 |     "\n",
575 |     "```python\n",
576 |     "metric= load_metric(\"accuracy\")\n",
577 |     "```\n",
578 |     "\n",
579 |     "We can easily attach Ignite's built-in [`Accuracy()`](https://pytorch.org/ignite/generated/ignite.metrics.Accuracy.html#accuracy) metric to to `train_evaluator` and `validation_evaluator`. We also need to specify the metric name (`accuracy` below). Internally, it will use `y_pred` and `y` to compute the accuracy. "
580 |    ]
581 |   },
582 |   {
583 |    "cell_type": "code",
584 |    "execution_count": 44,
585 |    "metadata": {
586 |     "id": "RsT3Yq1sd5Os"
587 |    },
588 |    "outputs": [],
589 |    "source": [
590 |     "from ignite.metrics import Accuracy\n",
591 |     "\n",
592 |     "Accuracy().attach(train_evaluator, 'accuracy')\n",
593 |     "Accuracy().attach(validation_evaluator, 'accuracy')"
594 |    ]
595 |   },
596 |   {
597 |    "cell_type": "markdown",
598 |    "metadata": {
599 |     "id": "so6yub_fjydU"
600 |    },
601 |    "source": [
602 |     "## Log Metrics\n",
603 |     "\n",
604 |     "Now we will define custom handlers (functions) and attach them to various `Events` of the training process.\n",
605 |     "\n",
606 |     "The functions below both achieve similar tasks. They print the results of the `evaluator` run on a dataset. `log_training_results()` does this on the training evaluator and train dataset, while `log_validation_results()` on the validation evaluator and validation dataset. Another difference is how these functions are attached in the trainer engine.\n",
607 |     "\n",
608 |     "The first method involves using a decorator, the syntax is simple - `@` `trainer.on(Events.EPOCH_COMPLETED)`, means that the decorated function will be attached to the trainer and called at the end of each epoch. \n",
609 |     "\n",
610 |     "The second method involves using the add_event_handler method of trainer - `trainer.add_event_handler(Events.EPOCH_COMPLETED, custom_function)`. This achieves the same result as the above. "
611 |    ]
612 |   },
613 |   {
614 |    "cell_type": "code",
615 |    "execution_count": null,
616 |    "metadata": {
617 |     "id": "mrl-60C9aEkW"
618 |    },
619 |    "outputs": [],
620 |    "source": [
621 |     "@trainer.on(Events.EPOCH_COMPLETED)\n",
622 |     "def log_training_results(engine):\n",
623 |     "    train_evaluator.run(train_dataloader)\n",
624 |     "    metrics = train_evaluator.state.metrics\n",
625 |     "    avg_accuracy = metrics['accuracy']\n",
626 |     "    print(f\"Training Results - Epoch: {engine.state.epoch}  Avg accuracy: {avg_accuracy:.3f}\")\n",
627 |     "    \n",
628 |     "def log_validation_results(engine):\n",
629 |     "    validation_evaluator.run(eval_dataloader)\n",
630 |     "    metrics = validation_evaluator.state.metrics\n",
631 |     "    avg_accuracy = metrics['accuracy']\n",
632 |     "    print(f\"Validation Results - Epoch: {engine.state.epoch}  Avg accuracy: {avg_accuracy:.3f}\")\n",
633 |     "\n",
634 |     "trainer.add_event_handler(Events.EPOCH_COMPLETED, log_validation_results)"
635 |    ]
636 |   },
637 |   {
638 |    "cell_type": "markdown",
639 |    "metadata": {
640 |     "id": "fGbSjScZj2Bt"
641 |    },
642 |    "source": [
643 |     "## Early Stopping\n",
644 |     "\n",
645 |     "Now we'll setup a [`EarlyStopping`](https://pytorch.org/ignite/generated/ignite.handlers.early_stopping.EarlyStopping.html#earlystopping) handler for the training process. `EarlyStopping` requires a score_function that allows the user to define whatever criteria to stop training. In this case, if the loss of the validation set does not decrease in 2 epochs (`patience`), the training process will stop early.  "
646 |    ]
647 |   },
648 |   {
649 |    "cell_type": "code",
650 |    "execution_count": null,
651 |    "metadata": {
652 |     "id": "O0xElFVYeuL9"
653 |    },
654 |    "outputs": [],
655 |    "source": [
656 |     "from ignite.handlers import EarlyStopping\n",
657 |     "\n",
658 |     "def score_function(engine):\n",
659 |     "    val_accuracy = engine.state.metrics['accuracy']\n",
660 |     "    return val_accuracy\n",
661 |     "\n",
662 |     "handler = EarlyStopping(patience=2, score_function=score_function, trainer=trainer)\n",
663 |     "validation_evaluator.add_event_handler(Events.COMPLETED, handler)"
664 |    ]
665 |   },
666 |   {
667 |    "cell_type": "markdown",
668 |    "metadata": {
669 |     "id": "C1UtBUmmj9dq"
670 |    },
671 |    "source": [
672 |     "## Model Checkpoint\n",
673 |     "\n",
674 |     "Lastly, we want to save the best model weights. So we will use Ignite's [`ModelCheckpoint`](https://pytorch.org/ignite/generated/ignite.handlers.checkpoint.ModelCheckpoint.html#modelcheckpoint) handler to checkpoint models at the end of each epoch. This will create a `models` directory and save the 2 best models (`n_saved`) with the prefix `bert-base-cased`."
675 |    ]
676 |   },
677 |   {
678 |    "cell_type": "code",
679 |    "execution_count": null,
680 |    "metadata": {
681 |     "id": "7xz8qozReQuG"
682 |    },
683 |    "outputs": [],
684 |    "source": [
685 |     "from ignite.handlers import ModelCheckpoint\n",
686 |     "\n",
687 |     "checkpointer = ModelCheckpoint(dirname='models', filename_prefix='bert-base-cased', n_saved=2, create_dir=True)\n",
688 |     "trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model})"
689 |    ]
690 |   },
691 |   {
692 |    "cell_type": "markdown",
693 |    "metadata": {
694 |     "id": "jgzcQVfvkAXK"
695 |    },
696 |    "source": [
697 |     "## Begin Training!\n",
698 |     "\n",
699 |     "Next, we'll run the trainer for 10 epochs and monitor the results. Below we can see that `ProgessBar` prints the loss per iteration, and prints the results of training and validation as we specified in our custom function. "
700 |    ]
701 |   },
702 |   {
703 |    "cell_type": "code",
704 |    "execution_count": null,
705 |    "metadata": {
706 |     "id": "sS0Ut5z0dmQc"
707 |    },
708 |    "outputs": [],
709 |    "source": [
710 |     "trainer.run(train_dataloader, max_epochs=num_epochs)"
711 |    ]
712 |   },
713 |   {
714 |    "cell_type": "markdown",
715 |    "metadata": {
716 |     "id": "OpqXiZUsznkY"
717 |    },
718 |    "source": [
719 |     "That's it! We have successfully trained and evaluated a Transformer for Text Classification. "
720 |    ]
721 |   }
722 |  ],
723 |  "metadata": {
724 |   "accelerator": "GPU",
725 |   "colab": {
726 |    "collapsed_sections": [],
727 |    "name": "transformers-text-classification.ipynb",
728 |    "provenance": []
729 |   },
730 |   "kernelspec": {
731 |    "display_name": "Python 3 (ipykernel)",
732 |    "language": "python",
733 |    "name": "python3"
734 |   },
735 |   "language_info": {
736 |    "codemirror_mode": {
737 |     "name": "ipython",
738 |     "version": 3
739 |    },
740 |    "file_extension": ".py",
741 |    "mimetype": "text/x-python",
742 |    "name": "python",
743 |    "nbconvert_exporter": "python",
744 |    "pygments_lexer": "ipython3",
745 |    "version": "3.10.4"
746 |   }
747 |  },
748 |  "nbformat": 4,
749 |  "nbformat_minor": 4
750 | }
751 | 


--------------------------------------------------------------------------------
/tutorials/intermediate/cifar10-distributed.py:
--------------------------------------------------------------------------------
  1 | import fire
  2 | from datetime import datetime
  3 | from pathlib import Path
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.optim as optim
  8 | from torchvision import datasets, models
  9 | from torchvision.transforms import (
 10 |     Compose,
 11 |     Normalize,
 12 |     Pad,
 13 |     RandomCrop,
 14 |     RandomHorizontalFlip,
 15 |     ToTensor,
 16 | )
 17 | 
 18 | import ignite
 19 | import ignite.distributed as idist
 20 | from ignite.contrib.engines import common
 21 | from ignite.handlers import PiecewiseLinear
 22 | from ignite.engine import (
 23 |     Events,
 24 |     create_supervised_trainer,
 25 |     create_supervised_evaluator,
 26 | )
 27 | from ignite.handlers import Checkpoint, global_step_from_engine
 28 | from ignite.metrics import Accuracy, Loss
 29 | from ignite.utils import manual_seed, setup_logger
 30 | 
 31 | 
 32 | config = {
 33 |     "seed": 543,
 34 |     "data_path": "cifar10",
 35 |     "output_path": "output-cifar10/",
 36 |     "model": "resnet18",
 37 |     "batch_size": 512,
 38 |     "momentum": 0.9,
 39 |     "weight_decay": 1e-4,
 40 |     "num_workers": 2,
 41 |     "num_epochs": 5,
 42 |     "learning_rate": 0.4,
 43 |     "num_warmup_epochs": 1,
 44 |     "validate_every": 3,
 45 |     "checkpoint_every": 200,
 46 |     "backend": None,
 47 |     "resume_from": None,
 48 |     "log_every_iters": 15,
 49 |     "nproc_per_node": None,
 50 |     "with_clearml": False,
 51 |     "with_amp": False,
 52 | }
 53 | 
 54 | 
 55 | def get_train_test_datasets(path):
 56 |     train_transform = Compose(
 57 |         [
 58 |             Pad(4),
 59 |             RandomCrop(32, fill=128),
 60 |             RandomHorizontalFlip(),
 61 |             ToTensor(),
 62 |             Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
 63 |         ]
 64 |     )
 65 |     test_transform = Compose(
 66 |         [
 67 |             ToTensor(),
 68 |             Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
 69 |         ]
 70 |     )
 71 | 
 72 |     train_ds = datasets.CIFAR10(
 73 |         root=path, train=True, download=False, transform=train_transform
 74 |     )
 75 |     test_ds = datasets.CIFAR10(
 76 |         root=path, train=False, download=False, transform=test_transform
 77 |     )
 78 | 
 79 |     return train_ds, test_ds
 80 | 
 81 | 
 82 | def get_dataflow(config):
 83 |     train_dataset, test_dataset = get_train_test_datasets(config["data_path"])
 84 | 
 85 |     train_loader = idist.auto_dataloader(
 86 |         train_dataset,
 87 |         batch_size=config["batch_size"],
 88 |         num_workers=config["num_workers"],
 89 |         shuffle=True,
 90 |         drop_last=True,
 91 |     )
 92 | 
 93 |     test_loader = idist.auto_dataloader(
 94 |         test_dataset,
 95 |         batch_size=2 * config["batch_size"],
 96 |         num_workers=config["num_workers"],
 97 |         shuffle=False,
 98 |     )
 99 |     return train_loader, test_loader
100 | 
101 | 
102 | def get_model(config):
103 |     model_name = config["model"]
104 |     if model_name in models.__dict__:
105 |         fn = models.__dict__[model_name]
106 |     else:
107 |         raise RuntimeError(f"Unknown model name {model_name}")
108 | 
109 |     model = idist.auto_model(fn(num_classes=10))
110 | 
111 |     return model
112 | 
113 | 
114 | def get_optimizer(config, model):
115 |     optimizer = optim.SGD(
116 |         model.parameters(),
117 |         lr=config["learning_rate"],
118 |         momentum=config["momentum"],
119 |         weight_decay=config["weight_decay"],
120 |         nesterov=True,
121 |     )
122 |     optimizer = idist.auto_optim(optimizer)
123 | 
124 |     return optimizer
125 | 
126 | 
127 | def get_criterion():
128 |     return nn.CrossEntropyLoss().to(idist.device())
129 | 
130 | 
131 | def get_lr_scheduler(config, optimizer):
132 |     milestones_values = [
133 |         (0, 0.0),
134 |         (
135 |             config["num_iters_per_epoch"] * config["num_warmup_epochs"],
136 |             config["learning_rate"],
137 |         ),
138 |         (config["num_iters_per_epoch"] * config["num_epochs"], 0.0),
139 |     ]
140 |     lr_scheduler = PiecewiseLinear(
141 |         optimizer, param_name="lr", milestones_values=milestones_values
142 |     )
143 |     return lr_scheduler
144 | 
145 | 
146 | def get_save_handler(config):
147 |     if config["with_clearml"]:
148 |         from ignite.contrib.handlers.clearml_logger import ClearMLSaver
149 | 
150 |         return ClearMLSaver(dirname=config["output_path"])
151 | 
152 |     return config["output_path"]
153 | 
154 | 
155 | def load_checkpoint(resume_from):
156 |     checkpoint_fp = Path(resume_from)
157 |     assert (
158 |         checkpoint_fp.exists()
159 |     ), f"Checkpoint '{checkpoint_fp.as_posix()}' is not found"
160 |     checkpoint = torch.load(checkpoint_fp.as_posix(), map_location="cpu")
161 |     return checkpoint
162 | 
163 | 
164 | def create_trainer(
165 |     model, optimizer, criterion, lr_scheduler, train_sampler, config, logger
166 | ):
167 | 
168 |     device = idist.device()
169 |     amp_mode = None
170 |     scaler = False
171 | 
172 |     trainer = create_supervised_trainer(
173 |         model,
174 |         optimizer,
175 |         criterion,
176 |         device=device,
177 |         non_blocking=True,
178 |         output_transform=lambda x, y, y_pred, loss: {"batch loss": loss.item()},
179 |         amp_mode="amp" if config["with_amp"] else None,
180 |         scaler=config["with_amp"],
181 |     )
182 |     trainer.logger = logger
183 | 
184 |     to_save = {
185 |         "trainer": trainer,
186 |         "model": model,
187 |         "optimizer": optimizer,
188 |         "lr_scheduler": lr_scheduler,
189 |     }
190 |     metric_names = [
191 |         "batch loss",
192 |     ]
193 | 
194 |     common.setup_common_training_handlers(
195 |         trainer=trainer,
196 |         train_sampler=train_sampler,
197 |         to_save=to_save,
198 |         save_every_iters=config["checkpoint_every"],
199 |         save_handler=get_save_handler(config),
200 |         lr_scheduler=lr_scheduler,
201 |         output_names=metric_names if config["log_every_iters"] > 0 else None,
202 |         with_pbars=False,
203 |         clear_cuda_cache=False,
204 |     )
205 | 
206 |     if config["resume_from"] is not None:
207 |         checkpoint = load_checkpoint(config["resume_from"])
208 |         Checkpoint.load_objects(to_load=to_save, checkpoint=checkpoint)
209 | 
210 |     return trainer
211 | 
212 | 
213 | def create_evaluator(model, metrics, config):
214 |     device = idist.device()
215 | 
216 |     amp_mode = "amp" if config["with_amp"] else None
217 |     evaluator = create_supervised_evaluator(
218 |         model, metrics=metrics, device=device, non_blocking=True, amp_mode=amp_mode
219 |     )
220 | 
221 |     return evaluator
222 | 
223 | 
224 | def setup_rank_zero(logger, config):
225 |     device = idist.device()
226 | 
227 |     now = datetime.now().strftime("%Y%m%d-%H%M%S")
228 |     output_path = config["output_path"]
229 |     folder_name = (
230 |         f"{config['model']}_backend-{idist.backend()}-{idist.get_world_size()}_{now}"
231 |     )
232 |     output_path = Path(output_path) / folder_name
233 |     if not output_path.exists():
234 |         output_path.mkdir(parents=True)
235 |     config["output_path"] = output_path.as_posix()
236 |     logger.info(f"Output path: {config['output_path']}")
237 | 
238 |     if config["with_clearml"]:
239 |         from clearml import Task
240 | 
241 |         task = Task.init("CIFAR10-Training", task_name=output_path.stem)
242 |         task.connect_configuration(config)
243 |         # Log hyper parameters
244 |         hyper_params = [
245 |             "model",
246 |             "batch_size",
247 |             "momentum",
248 |             "weight_decay",
249 |             "num_epochs",
250 |             "learning_rate",
251 |             "num_warmup_epochs",
252 |         ]
253 |         task.connect({k: v for k, v in config.items()})
254 | 
255 | 
256 | def log_basic_info(logger, config):
257 |     logger.info(f"Train on CIFAR10")
258 |     logger.info(f"- PyTorch version: {torch.__version__}")
259 |     logger.info(f"- Ignite version: {ignite.__version__}")
260 |     if torch.cuda.is_available():
261 |         # explicitly import cudnn as torch.backends.cudnn can not be pickled with hvd spawning procs
262 |         from torch.backends import cudnn
263 | 
264 |         logger.info(
265 |             f"- GPU Device: {torch.cuda.get_device_name(idist.get_local_rank())}"
266 |         )
267 |         logger.info(f"- CUDA version: {torch.version.cuda}")
268 |         logger.info(f"- CUDNN version: {cudnn.version()}")
269 | 
270 |     logger.info("\n")
271 |     logger.info("Configuration:")
272 |     for key, value in config.items():
273 |         logger.info(f"\t{key}: {value}")
274 |     logger.info("\n")
275 | 
276 |     if idist.get_world_size() > 1:
277 |         logger.info("\nDistributed setting:")
278 |         logger.info(f"\tbackend: {idist.backend()}")
279 |         logger.info(f"\tworld size: {idist.get_world_size()}")
280 |         logger.info("\n")
281 | 
282 | 
283 | def log_metrics(logger, epoch, elapsed, tag, metrics):
284 |     metrics_output = "\n".join([f"\t{k}: {v}" for k, v in metrics.items()])
285 |     logger.info(
286 |         f"\nEpoch {epoch} - Evaluation time (seconds): {elapsed:.2f} - {tag} metrics:\n {metrics_output}"
287 |     )
288 | 
289 | 
290 | def training(local_rank, config):
291 | 
292 |     rank = idist.get_rank()
293 |     manual_seed(config["seed"] + rank)
294 | 
295 |     logger = setup_logger(name="CIFAR10-Training")
296 |     log_basic_info(logger, config)
297 | 
298 |     if rank == 0:
299 |         setup_rank_zero(logger, config)
300 | 
301 |     train_loader, val_loader = get_dataflow(config)
302 |     model = get_model(config)
303 |     optimizer = get_optimizer(config, model)
304 |     criterion = get_criterion()
305 |     config["num_iters_per_epoch"] = len(train_loader)
306 |     lr_scheduler = get_lr_scheduler(config, optimizer)
307 | 
308 |     trainer = create_trainer(
309 |         model, optimizer, criterion, lr_scheduler, train_loader.sampler, config, logger
310 |     )
311 | 
312 |     metrics = {
313 |         "Accuracy": Accuracy(),
314 |         "Loss": Loss(criterion),
315 |     }
316 | 
317 |     train_evaluator = create_evaluator(model, metrics, config)
318 |     val_evaluator = create_evaluator(model, metrics, config)
319 | 
320 |     def run_validation(engine):
321 |         epoch = trainer.state.epoch
322 |         state = train_evaluator.run(train_loader)
323 |         log_metrics(logger, epoch, state.times["COMPLETED"], "train", state.metrics)
324 |         state = val_evaluator.run(val_loader)
325 |         log_metrics(logger, epoch, state.times["COMPLETED"], "val", state.metrics)
326 | 
327 |     trainer.add_event_handler(
328 |         Events.EPOCH_COMPLETED(every=config["validate_every"]) | Events.COMPLETED,
329 |         run_validation,
330 |     )
331 | 
332 |     if rank == 0:
333 |         evaluators = {"train": train_evaluator, "val": val_evaluator}
334 |         tb_logger = common.setup_tb_logging(
335 |             config["output_path"], trainer, optimizer, evaluators=evaluators
336 |         )
337 | 
338 |     best_model_handler = Checkpoint(
339 |         {"model": model},
340 |         get_save_handler(config),
341 |         filename_prefix="best",
342 |         n_saved=2,
343 |         global_step_transform=global_step_from_engine(trainer),
344 |         score_name="val_accuracy",
345 |         score_function=Checkpoint.get_default_score_fn("Accuracy"),
346 |     )
347 |     val_evaluator.add_event_handler(
348 |         Events.COMPLETED,
349 |         best_model_handler,
350 |     )
351 | 
352 |     try:
353 |         trainer.run(train_loader, max_epochs=config["num_epochs"])
354 |     except Exception as e:
355 |         logger.exception("")
356 |         raise e
357 | 
358 |     if rank == 0:
359 |         tb_logger.close()
360 | 
361 | 
362 | def run(backend=None, **spawn_kwargs):
363 |     config["backend"] = backend
364 | 
365 |     with idist.Parallel(backend=config["backend"], **spawn_kwargs) as parallel:
366 |         parallel.run(training, config)
367 | 
368 | 
369 | if __name__ == "__main__":
370 |     fire.Fire({"run": run})
371 | 


--------------------------------------------------------------------------------