├── .github └── workflows │ ├── ci.yml │ └── label.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── generate.py ├── how-to-guides ├── 01-installation.ipynb ├── 02-convert-pytorch-to-ignite.ipynb ├── 03-time-profiling.ipynb ├── 04-fastai-lr-finder.ipynb ├── 05-gradient-accumulation.ipynb ├── 06-data-iterator.ipynb ├── 07-cross-validation.ipynb ├── 08-custom-events.ipynb ├── 09-switch-data-training.ipynb ├── 10-loggers.ipynb ├── 11-load-checkpoint.ipynb └── assets │ ├── clearml-dashboard.png │ └── convert-pytorch2ignite.gif ├── tests └── test_generate.py └── tutorials ├── advanced └── 01-collective-communication.ipynb ├── assets ├── all-gather.png ├── all-reduce.png └── broadcast.png ├── beginner ├── 01-getting-started.ipynb └── 02-transformers-text-classification.ipynb └── intermediate ├── 01-cifar10-distributed.ipynb ├── 02-Machine_Translation_using_PyTorch_Ignite.ipynb ├── 03-reinforcement-learning.ipynb └── cifar10-distributed.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | push: 4 | paths: 5 | - "generate.py" 6 | - ".github/workflows/ci.yml" 7 | 8 | pull_request: 9 | paths: 10 | - "generate.py" 11 | - ".github/workflows/ci.yml" 12 | 13 | jobs: 14 | Run-Tests: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v2 18 | - name: Install dependencies 19 | run: | 20 | pip install pytest 21 | - name: Run tests 22 | run: | 23 | pytest tests -vvv 24 | -------------------------------------------------------------------------------- /.github/workflows/label.yml: -------------------------------------------------------------------------------- 1 | # This workflow will triage pull requests and apply a label based on the 2 | # paths that are modified in the pull request. 3 | # 4 | # To use this workflow, you will need to set up a .github/labeler.yml 5 | # file with configuration. For more information, see: 6 | # https://github.com/actions/labeler 7 | 8 | name: Labeler 9 | on: [pull_request] 10 | 11 | jobs: 12 | label: 13 | 14 | runs-on: ubuntu-latest 15 | permissions: 16 | contents: read 17 | pull-requests: write 18 | 19 | steps: 20 | - uses: actions/labeler@v2 21 | with: 22 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # How to Guide 132 | # Ignore generated files 133 | how-to-guides/MNIST/ 134 | *.csv -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guide 2 | 3 | This project is a community effort, and everyone is welcome to contribute ! 4 | 5 | If you are interested in contributing to PyTorch-Ignite's examples, there are many ways to help out. Your contributions may fall into the following categories: 6 | 7 | 1. It helps us very much if you could report issues you’re facing with: 8 | 9 | - Executing the Juypter notebooks, scripts or rendering assets. 10 | - Understanding the language written in any of the notebooks that can be simplified. 11 | 12 | 2. You would like to add more examples. These fall into two categories: 13 | 1. Tutorials: Something general which has emphasis on explanation and has self-contained end-to-end code which showcases an Ignite concept or concepts. These are meant for learning purposes when exploring the library. See cifar10 tutorial on distributed training: https://pytorch-ignite.ai/tutorials/cifar10-distributed/ for an example. 14 | 2. How-to guides: These are very specific and more code-based. They are used to answer a specific question like how to use FastaiLR finder with Ignite or how to do cross validation. Comparing it with the `cifar10` tutorial above, if we were to make a how to guide it could go like: how to train a model using multiple gpus with ignite. 15 | 16 | Please refer to [README.md](https://github.com/pytorch-ignite/examples#readme) on how to generate Jupyter notebooks with built-in frontmatter. 17 | 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, PyTorch-Ignite Organization 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch-Ignite Examples, tutorials and how-to guides 2 | 3 | Notebooks in this repo requires the essential frontmatters to be used 4 | when rendering on the [website](https://pytorch-ignite.ai). 5 | 6 | To contribute the notebooks, please use [`generate.py`](./generate.py) 7 | script to generate the notebook. 8 | 9 | **Usage:** 10 | 11 | ```sh 12 | # python generate.py ... 13 | python generate.py yolo 14 | # > Generated /workspace/yolo.ipynb 15 | ``` 16 | 17 | Alternatively, you can run `generate.py` on your existing notebooks to add the required frontmatter cell to them. 18 | ```sh 19 | # If your completed tutorial is present in /workspace/yolo.ipynb 20 | python generate.py yolo 21 | # > Added frontmatter to /workspace/yolo.ipynb 22 | ``` 23 | This will add the necessary frontmatter cell at the top of the notebook, now you need to open it and update the values. 24 | 25 | See more in [`generate.py`](./generate.py). 26 | -------------------------------------------------------------------------------- /generate.py: -------------------------------------------------------------------------------- 1 | # Generate plain notebooks with the required frontmatter defined 2 | 3 | # Usage: 4 | # $ python generate.py ... [-h] 5 | # 6 | # Generate plain notebooks with the required frontmatter defined. 7 | # 8 | # Positional arguments: 9 | # notebook_names Notebooks to generate 10 | # 11 | # Options: 12 | # -h, --help show this help message and exit 13 | # 14 | # Example: 15 | # python generate.py {data-iterator,fastai-lr-finder,gradient-accumulation,installation} 16 | 17 | import json 18 | import os 19 | from argparse import ArgumentParser 20 | from datetime import datetime 21 | 22 | today = datetime.now().strftime('%Y-%m-%d') 23 | 24 | notebook = { 25 | 'nbformat': 4, 26 | 'nbformat_minor': 0, 27 | 'metadata': { 28 | 'kernelspec': { 29 | 'display_name': 'Python 3', 30 | 'name': 'python3', 31 | }, 32 | 'accelerator': 'GPU', 33 | }, 34 | 'cells': [ 35 | { 36 | 'cell_type': 'markdown', 37 | 'metadata': {}, 38 | 'source': [ 39 | '` tag below to provide summary for this notebook, ' 45 | 'and delete the other>\n' 46 | 'tags:\n', 47 | ' - \n', 48 | '--- -->\n', 49 | '\n', 50 | '# title-placeholder\n', 51 | '\n', 52 | '\n', 54 | '` below.>', 55 | '\n', 56 | '', 57 | ] 58 | } 59 | ] 60 | } 61 | 62 | if __name__ == '__main__': 63 | cwd = os.getcwd() 64 | parser = ArgumentParser( 65 | 'generate', 66 | '$ python generate.py ... [-h]', 67 | 'Generate plain notebooks with the required frontmatter defined.' 68 | ) 69 | parser.add_argument( 70 | 'notebook_names', 71 | help='Notebooks to generate', 72 | nargs='+', 73 | ) 74 | args = parser.parse_args() 75 | for name in args.notebook_names: 76 | if not name.endswith('.ipynb'): 77 | name = name + ".ipynb" 78 | 79 | if os.path.isfile(name): 80 | with open(name) as fp: 81 | content = json.load(fp) 82 | if len(content['cells']) > 0 and content['cells'][0] == notebook['cells'][0]: 83 | print(f'Frontmatter cell already exists in {os.path.join(cwd, name)}. Exiting') 84 | 85 | else: 86 | for key, value in content.items(): 87 | if key != 'cells': 88 | content[key] = notebook[key] 89 | else: 90 | content[key] = notebook[key] + content[key] 91 | 92 | with open(name, mode='w') as f: 93 | f.write(json.dumps(content, indent=2)) 94 | print(f'Added frontmatter to {os.path.join(cwd, name)}') 95 | 96 | else: 97 | with open(name, 'w') as fp: 98 | json.dump(notebook, fp, indent=2) 99 | print(f'Generated {os.path.join(cwd, name)}') 100 | -------------------------------------------------------------------------------- /how-to-guides/01-installation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "d13be020", 6 | "metadata": {}, 7 | "source": [ 8 | "\n", 22 | "# How to install PyTorch-Ignite" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "513c70bb", 28 | "metadata": {}, 29 | "source": [ 30 | "From [pip](https://pypi.org/project/pytorch-ignite/):\n", 31 | "\n", 32 | "``` shell\n", 33 | "pip install pytorch-ignite\n", 34 | "```\n", 35 | "\n", 36 | "From [conda](https://anaconda.org/pytorch/ignite):\n", 37 | "\n", 38 | "``` shell\n", 39 | "conda install ignite -c pytorch\n", 40 | "```\n", 41 | "\n", 42 | "From source:\n", 43 | "\n", 44 | "``` shell\n", 45 | "pip install git+https://github.com/pytorch/ignite\n", 46 | "```" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "id": "7df69584", 52 | "metadata": {}, 53 | "source": [ 54 | "## Nightly releases\n", 55 | "\n", 56 | "From pip:\n", 57 | "\n", 58 | "``` shell\n", 59 | "pip install --pre pytorch-ignite\n", 60 | "```\n", 61 | "\n", 62 | "From conda (please install the [pytorch nightly\n", 63 | "release](https://anaconda.org/pytorch-nightly/pytorch) instead of the\n", 64 | "stable version as a dependency):\n", 65 | "\n", 66 | "``` shell\n", 67 | "conda install ignite -c pytorch-nightly\n", 68 | "```" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "id": "a0f0118c", 74 | "metadata": {}, 75 | "source": [ 76 | "\n", 77 | "## Docker Images\n", 78 | "\n", 79 | "### Using pre-built images\n", 80 | "\n", 81 | "Pull a pre-built docker image from [our Docker\n", 82 | "Hub](https://hub.docker.com/u/pytorchignite) using :\n", 83 | "\n", 84 | "``` shell\n", 85 | "docker pull IMAGE_NAME\n", 86 | "```\n", 87 | "\n", 88 | "Available pre-built images are :\n", 89 | "\n", 90 | "| | Base | Horovod | MS DeepSpeed |\n", 91 | "|:-------------:|:----------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------:|\n", 92 | "| Base | [`pytorchignite/base:latest`](https://hub.docker.com/r/pytorchignite/base) | [`pytorchignite/hvd-base:latest`](https://hub.docker.com/r/pytorchignite/hvd-base) | X |\n", 93 | "| Vision | [`pytorchignite/vision:latest`](https://hub.docker.com/r/pytorchignite/vision) | [`pytorchignite/hvd-vision:latest`](https://hub.docker.com/r/pytorchignite/hvd-vision) | X |\n", 94 | "| NLP | [`pytorchignite/nlp:latest`](https://hub.docker.com/r/pytorchignite/nlp) | [`pytorchignite/hvd-nlp:latest`](https://hub.docker.com/r/pytorchignite/hvd-nlp) | X |\n", 95 | "| NVIDIA Apex | [`pytorchignite/apex:latest`](https://hub.docker.com/r/pytorchignite/apex) | [`pytorchignite/hvd-apex:latest`](https://hub.docker.com/r/pytorchignite/hvd-apex) | [`pytorchignite/msdp-apex:latest`](https://hub.docker.com/r/pytorchignite/msdp-apex) |\n", 96 | "| Apex + Vision | [`pytorchignite/apex-vision:latest`](https://hub.docker.com/r/pytorchignite/apex-vision) | [`pytorchignite/hvd-apex-vision:latest`](https://hub.docker.com/r/pytorchignite/hvd-apex-vision) | [`pytorchignite/msdp-apex-vision:latest`](https://hub.docker.com/r/pytorchignite/msdp-apex-vision) |\n", 97 | "| Apex + NLP | [`pytorchignite/apex-nlp:latest`](https://hub.docker.com/r/pytorchignite/apex-nlp) | [`pytorchignite/hvd-apex-nlp:latest`](https://hub.docker.com/r/pytorchignite/hvd-apex-nlp) | [`pytorchignite/msdp-apex-nlp:latest`](https://hub.docker.com/r/pytorchignite/msdp-apex-nlp) |\n", 98 | "\n", 99 | "and run it with Docker v19.03+ :\n", 100 | "\n", 101 | "``` shell\n", 102 | "docker run --gpus all -it -v $PWD:/workspace/project --network=host --shm-size 16G IMAGE_NAME\n", 103 | "```\n", 104 | "\n", 105 | "For more details, [check out our\n", 106 | "GitHub](https://github.com/pytorch/ignite/tree/master/docker)." 107 | ] 108 | } 109 | ], 110 | "metadata": { 111 | "kernelspec": { 112 | "display_name": "Python 3 (ipykernel)", 113 | "language": "python", 114 | "name": "python3" 115 | }, 116 | "language_info": { 117 | "codemirror_mode": { 118 | "name": "ipython", 119 | "version": 3 120 | }, 121 | "file_extension": ".py", 122 | "mimetype": "text/x-python", 123 | "name": "python", 124 | "nbconvert_exporter": "python", 125 | "pygments_lexer": "ipython3", 126 | "version": "3.10.4" 127 | } 128 | }, 129 | "nbformat": 4, 130 | "nbformat_minor": 5 131 | } 132 | -------------------------------------------------------------------------------- /how-to-guides/02-convert-pytorch-to-ignite.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "xo0JaCAvVI64" 7 | }, 8 | "source": [ 9 | "\n", 22 | "# How to convert pure PyTorch code to Ignite " 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "id": "CXNZ4XPeV8_I" 29 | }, 30 | "source": [ 31 | "In this guide, we will show how PyTorch code components can be converted into compact and flexible PyTorch-Ignite code. \n", 32 | "\n", 33 | "\n", 34 | "\n", 35 | "![Convert PyTorch to Ignite](assets/convert-pytorch2ignite.gif)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "Since Ignite focuses on the training and validation pipeline, the code for models, datasets, optimizers, etc will remain user-defined and in pure PyTorch." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "id": "L6zvxAsVjP-Z" 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "model = ...\n", 54 | "train_loader = ...\n", 55 | "val_loader = ...\n", 56 | "optimizer = ...\n", 57 | "criterion = ..." 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "id": "2EmmpiTX6huF" 64 | }, 65 | "source": [ 66 | "## Training Loop to `trainer`\n", 67 | "\n", 68 | "A typical PyTorch training loop processes a single batch of data, passes it through the `model`, calculates `loss`, etc as below:\n", 69 | "\n", 70 | "```python\n", 71 | "for batch in train_loader:\n", 72 | " model.train()\n", 73 | " inputs, targets = batch\n", 74 | " optimizer.zero_grad()\n", 75 | " outputs = model(inputs)\n", 76 | " loss = criterion(outputs, targets)\n", 77 | " loss.backward()\n", 78 | " optimizer.step()\n", 79 | "```" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": { 85 | "id": "zDkeEWz58hCJ" 86 | }, 87 | "source": [ 88 | "To convert the above code into Ignite we need to move the code or steps taken to process a single batch of data while training under a function (`train_step()` below). This function will take `engine` and `batch` (current batch of data) as arguments and can return any data (usually the loss) that can be accessed via `engine.state.output`. We pass this function to `Engine` which creates a `trainer` object." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "id": "lkWiJVuvh-LC" 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "from ignite.engine import Engine\n", 100 | "\n", 101 | "\n", 102 | "def train_step(engine, batch):\n", 103 | " model.train()\n", 104 | " inputs, targets = batch\n", 105 | " optimizer.zero_grad()\n", 106 | " outputs = model(inputs)\n", 107 | " loss = criterion(outputs, targets)\n", 108 | " loss.backward()\n", 109 | " optimizer.step()\n", 110 | " return loss.item()\n", 111 | "\n", 112 | "\n", 113 | "trainer = Engine(train_step)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": { 119 | "id": "4MWJzKK8-AiC" 120 | }, 121 | "source": [ 122 | "There are other [helper methods](https://pytorch.org/ignite/engine.html#helper-methods-to-define-supervised-trainer-and-evaluator) that directly create the `trainer` object without writing a custom function for some common use cases like [supervised training](https://pytorch.org/ignite/generated/ignite.engine.create_supervised_trainer.html#ignite.engine.create_supervised_trainer) and [truncated backprop through time](https://pytorch.org/ignite/contrib/engines.html#ignite.contrib.engines.tbptt.create_supervised_tbptt_trainer)." 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": { 128 | "id": "cocfuUFZ8okw" 129 | }, 130 | "source": [ 131 | "## Validation Loop to `evaluator`\n", 132 | "\n", 133 | "The validation loop typically makes predictions (`y_pred` below) on the `val_loader` batch by batch and uses them to calculate evaluation metrics (Accuracy, Intersection over Union, etc) as below:\n", 134 | "\n", 135 | "```python\n", 136 | "model.eval()\n", 137 | "num_correct = 0\n", 138 | "num_examples = 0\n", 139 | "\n", 140 | "for batch in val_loader:\n", 141 | " x, y = batch\n", 142 | " y_pred = model(x)\n", 143 | "\n", 144 | " correct = torch.eq(torch.round(y_pred).type(y.type()), y).view(-1)\n", 145 | " num_correct = torch.sum(correct).item()\n", 146 | " num_examples = correct.shape[0]\n", 147 | " print(f\"Epoch: {epoch}, Accuracy: {num_correct / num_examples}\")\n", 148 | "```" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": { 154 | "id": "N0ETiWo9E0D4" 155 | }, 156 | "source": [ 157 | "We will convert this to Ignite in two steps by separating the validation and metrics logic.\n", 158 | "\n", 159 | "We will move the model evaluation logic under another function (`validation_step()` below) which receives the same parameters as `train_step()` and processes a single batch of data to return some output (usually the predicted and actual value which can be used to calculate metrics) stored in `engine.state.output`. Another instance (called `evaluator` below) of `Engine` is created by passing the `validation_step()` function." 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "id": "zv2kceT0CS-L" 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "def validation_step(engine, batch):\n", 171 | " model.eval()\n", 172 | " with torch.no_grad():\n", 173 | " x, y = batch\n", 174 | " y_pred = model(x)\n", 175 | "\n", 176 | " return y_pred, y\n", 177 | " \n", 178 | " \n", 179 | "evaluator = Engine(validation_step)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": { 185 | "id": "EAIBqfFm8oqS" 186 | }, 187 | "source": [ 188 | "Similar to the training loop, there are [helper methods](https://pytorch.org/ignite/engine.html#helper-methods-to-define-supervised-trainer-and-evaluator) to avoid writing this custom evaluation function like [`create_supervised_evaluator`](https://pytorch.org/ignite/generated/ignite.engine.create_supervised_evaluator.html#ignite.engine.create_supervised_evaluator).\n", 189 | "\n", 190 | "**Note**: You can create different evaluators for training, validation, and testing if they serve different purposes. A common practice is to have two separate evaluators for training and validation, since the results of the validation evaluator are helpful in determining the best model to save after training." 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "id": "4t4PsYXn8ost" 197 | }, 198 | "source": [ 199 | "## Switch to built-in Metrics\n", 200 | "\n", 201 | "Then we can replace the code for calculating metrics like accuracy and instead use several [out-of-the-box metrics](https://pytorch.org/ignite/metrics.html#complete-list-of-metrics) that Ignite provides or write a custom one (refer [here](https://pytorch.org/ignite/metrics.html#how-to-create-a-custom-metric)). The metrics will be computed using the `evaluator`'s output. Finally, we attach these metrics to the `evaluator` by providing a key name (\"accuracy\" below) so they can be accessed via `engine.state.metrics[key_name]`." 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": { 208 | "id": "iUVAOP6kFdA-" 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "from ignite.metrics import Accuracy\n", 213 | "\n", 214 | "Accuracy().attach(evaluator, \"accuracy\")" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": { 220 | "id": "WnGK925N5AR7" 221 | }, 222 | "source": [ 223 | "## Organizing code into Events and Handlers\n", 224 | "\n", 225 | "Next, we need to identify any code that is triggered when an event occurs. Examples of events can be the start of an iteration, completion of an epoch, or even the start of backprop. We already provide some predefined events (complete list [here](https://pytorch.org/ignite/generated/ignite.engine.events.Events.html#ignite.engine.events.Events)) however we can also create custom ones (refer [here](https://pytorch-ignite.ai/concepts/02-events-and-handlers#custom-events). We move the event-specific code to different handlers (named functions, lambdas, class functions) which are attached to these events and executed whenever a specific event happens. Here are some common handlers:" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": { 231 | "id": "uZIdI39b-rB4" 232 | }, 233 | "source": [ 234 | "### Running `evaluator`\n", 235 | "\n", 236 | "We can convert the code that runs the `evaluator` on the training/validation/test dataset after `validate_every` epoch:\n", 237 | "\n", 238 | "```python\n", 239 | "if epoch % validate_every == 0:\n", 240 | " # Validation logic\n", 241 | "```\n", 242 | "\n", 243 | "by attaching a handler to a built-in event `EPOCH_COMPLETED` like:" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": { 250 | "id": "62Z6RmfJVn7s" 251 | }, 252 | "outputs": [], 253 | "source": [ 254 | "from ignite.engine import Events\n", 255 | "\n", 256 | "validate_every = 10\n", 257 | "\n", 258 | "\n", 259 | "@trainer.on(Events.EPOCH_COMPLETED(every=validate_every))\n", 260 | "def run_validation():\n", 261 | " evaluator.run(val_loader)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": { 267 | "id": "7bkte_sKb-vr" 268 | }, 269 | "source": [ 270 | "### Logging metrics\n", 271 | "\n", 272 | "Similarly, we can log the validation metrics in another handler or combine it with the above handler." 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": { 279 | "id": "ZExU6_CscHyf" 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "@trainer.on(Events.EPOCH_COMPLETED(every=validate_every))\n", 284 | "def log_validation():\n", 285 | " metrics = evaluator.state.metrics\n", 286 | " print(f\"Epoch: {trainer.state.epoch}, Accuracy: {metrics['accuracy']}\")" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": { 292 | "id": "sRgDrTgi5AU_" 293 | }, 294 | "source": [ 295 | "### Progress Bar\n", 296 | "\n", 297 | "We use a built-in wrapper around `tqdm` called [`ProgressBar()`](https://pytorch.org/ignite/generated/ignite.contrib.handlers.tqdm_logger.html#module-ignite.contrib.handlers.tqdm_logger)." 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": { 304 | "id": "0j79aG7ddmk6" 305 | }, 306 | "outputs": [], 307 | "source": [ 308 | "from ignite.contrib.handlers import ProgressBar\n", 309 | "\n", 310 | "ProgressBar().attach(trainer)" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": { 316 | "id": "vkqMcVnA5AZ3" 317 | }, 318 | "source": [ 319 | "### Checkpointing\n", 320 | "\n", 321 | "Instead of saving all models after `checkpoint_every` epoch:\n", 322 | "```python\n", 323 | "if epoch % checkpoint_every == 0:\n", 324 | " checkpoint(model, optimizer, \"checkpoint_dir\")\n", 325 | "```\n", 326 | "\n", 327 | "we can smartly save the best `n_saved` models (depending on `evaluator.state.metrics`), and the state of `optimizer` and `trainer` via the built-in [`Checkpoint()`](https://pytorch.org/ignite/generated/ignite.handlers.checkpoint.Checkpoint.html#checkpoint).\n" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": { 334 | "id": "VAkDj1fpoSij" 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "from ignite.handlers import Checkpoint\n", 339 | "\n", 340 | "checkpoint_every = 5\n", 341 | "checkpoint_dir = ...\n", 342 | "\n", 343 | "\n", 344 | "checkpointer = Checkpoint(\n", 345 | " to_save={'model': model, 'optimizer': optimizer, 'trainer': trainer},\n", 346 | " save_handler=checkpoint_dir, n_saved=2\n", 347 | ")\n", 348 | "trainer.add_event_handler(\n", 349 | " Events.EPOCH_COMPLETED(every=checkpoint_every), checkpointer\n", 350 | ")" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": { 356 | "id": "WbByMD6xYpgM" 357 | }, 358 | "source": [ 359 | "## Run for a number of epochs\n", 360 | "\n", 361 | "Finally, instead of:\n", 362 | "```python\n", 363 | "max_epochs = ...\n", 364 | "\n", 365 | "for epoch in range(max_epochs):\n", 366 | "```\n", 367 | "we begin training on `train_loader` via:\n", 368 | "```python\n", 369 | "trainer.run(train_loader, max_epochs)\n", 370 | "```" 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "An end-to-end example implementing the above principles can be found [here](https://pytorch-ignite.ai/tutorials/getting-started/#complete-code)." 378 | ] 379 | } 380 | ], 381 | "metadata": { 382 | "colab": { 383 | "collapsed_sections": [], 384 | "name": "convert-pytorch-to-ignite.ipynb", 385 | "provenance": [], 386 | "toc_visible": true 387 | }, 388 | "kernelspec": { 389 | "display_name": "Python 3 (ipykernel)", 390 | "language": "python", 391 | "name": "python3" 392 | }, 393 | "language_info": { 394 | "codemirror_mode": { 395 | "name": "ipython", 396 | "version": 3 397 | }, 398 | "file_extension": ".py", 399 | "mimetype": "text/x-python", 400 | "name": "python", 401 | "nbconvert_exporter": "python", 402 | "pygments_lexer": "ipython3", 403 | "version": "3.10.4" 404 | } 405 | }, 406 | "nbformat": 4, 407 | "nbformat_minor": 4 408 | } 409 | -------------------------------------------------------------------------------- /how-to-guides/04-fastai-lr-finder.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "5w-QlZE9mvdY" 7 | }, 8 | "source": [ 9 | "\n", 18 | "\n", 19 | "# How to use FastaiLRFinder with Ignite" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "This how-to guide demonstrates how we can leverage the [`FastaiLRFinder`](https://pytorch.org/ignite/generated/ignite.handlers.lr_finder.FastaiLRFinder.html) handler to find an optimal learning rate to train our model on. We will compare the results produced with and without using the handler for better understanding.\n", 27 | "\n", 28 | "\n", 29 | "\n", 30 | "In this example, we will be using a [ResNet18](https://pytorch.org/vision/stable/models.html#torchvision.models.resnet18) model on the [MNIST](https://pytorch.org/vision/stable/datasets.html#torchvision.datasets.MNIST) dataset. The base code is the same as used in the [Getting Started Guide](https://pytorch-ignite.ai/tutorials/getting-started/)." 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": { 36 | "id": "L_wmAdFgmvdx" 37 | }, 38 | "source": [ 39 | "## Basic Setup" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 1, 45 | "metadata": { 46 | "id": "lMphyBmmmvdw", 47 | "pycharm": { 48 | "is_executing": false 49 | } 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "import torch\n", 54 | "import torch.nn as nn\n", 55 | "from torch.utils.data import DataLoader\n", 56 | "from torchvision.datasets import MNIST\n", 57 | "from torchvision.models import resnet18\n", 58 | "from torchvision.transforms import Compose, Normalize, ToTensor\n", 59 | "\n", 60 | "from ignite.engine import create_supervised_trainer, create_supervised_evaluator\n", 61 | "from ignite.metrics import Accuracy, Loss\n", 62 | "from ignite.handlers import FastaiLRFinder" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 22, 68 | "metadata": { 69 | "id": "eZeKOgKymvdx" 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 74 | "\n", 75 | "\n", 76 | "class Net(nn.Module):\n", 77 | " def __init__(self):\n", 78 | " super(Net, self).__init__()\n", 79 | "\n", 80 | " self.model = resnet18(num_classes=10)\n", 81 | " self.model.conv1 = nn.Conv2d(\n", 82 | " 1, 64, kernel_size=3, padding=1, bias=False\n", 83 | " )\n", 84 | "\n", 85 | " def forward(self, x):\n", 86 | " return self.model(x)\n", 87 | "\n", 88 | "\n", 89 | "model = Net().to(device)\n", 90 | "\n", 91 | "data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])\n", 92 | "\n", 93 | "train_loader = DataLoader(\n", 94 | " MNIST(download=True, root=\".\", transform=data_transform, train=True),\n", 95 | " batch_size=128,\n", 96 | " shuffle=True,\n", 97 | ")\n", 98 | "\n", 99 | "test_loader = DataLoader(\n", 100 | " MNIST(download=True, root=\".\", transform=data_transform, train=False),\n", 101 | " batch_size=256,\n", 102 | " shuffle=False,\n", 103 | ")\n", 104 | "\n", 105 | "\n", 106 | "model = Net().to(device)\n", 107 | "optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-06)\n", 108 | "criterion = nn.CrossEntropyLoss()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": { 114 | "id": "b04erc67G8IK" 115 | }, 116 | "source": [ 117 | "We will first train the model with a fixed learning rate (lr) of 1e-06 and inspect our results. Let's save the initial state of the model and the optimizer to restore them later for comparison." 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 23, 123 | "metadata": { 124 | "id": "HglaeKrqFlkY" 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "init_model_state = model.state_dict()\n", 129 | "init_opt_state = optimizer.state_dict()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "## Without LR Finder" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 24, 142 | "metadata": { 143 | "colab": { 144 | "base_uri": "https://localhost:8080/" 145 | }, 146 | "id": "dKihuuH4A-sH", 147 | "outputId": "699eebb6-7446-40cd-9b91-ebdaea07eaeb" 148 | }, 149 | "outputs": [ 150 | { 151 | "data": { 152 | "text/plain": [ 153 | "State:\n", 154 | "\titeration: 1407\n", 155 | "\tepoch: 3\n", 156 | "\tepoch_length: 469\n", 157 | "\tmax_epochs: 3\n", 158 | "\toutput: 0.5554001927375793\n", 159 | "\tbatch: \n", 160 | "\tmetrics: \n", 161 | "\tdataloader: \n", 162 | "\tseed: \n", 163 | "\ttimes: " 164 | ] 165 | }, 166 | "execution_count": 24, 167 | "metadata": { 168 | "tags": [] 169 | }, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "trainer = create_supervised_trainer(model, optimizer, criterion, device=device)\n", 175 | "\n", 176 | "trainer.run(train_loader, max_epochs=3)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 25, 182 | "metadata": { 183 | "colab": { 184 | "base_uri": "https://localhost:8080/" 185 | }, 186 | "id": "mophHZUkICKI", 187 | "outputId": "378c7aac-b3c8-49ff-8be1-acc547730c12" 188 | }, 189 | "outputs": [ 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "{'Accuracy': 0.8655, 'Loss': 0.602867822265625}\n" 195 | ] 196 | } 197 | ], 198 | "source": [ 199 | "evaluator = create_supervised_evaluator(\n", 200 | " model, metrics={\"Accuracy\": Accuracy(), \"Loss\": Loss(criterion)}, device=device\n", 201 | ")\n", 202 | "evaluator.run(test_loader)\n", 203 | "\n", 204 | "print(evaluator.state.metrics)" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": { 210 | "id": "U_EHmN2bmvd2" 211 | }, 212 | "source": [ 213 | "Let's see how we can achieve better results by using the [`FastaiLRFinder`](https://pytorch.org/ignite/generated/ignite.handlers.lr_finder.FastaiLRFinder.html) handler. But first, let's restore the initial state of the model and optimizer so we can re-train them from scratch. " 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 26, 219 | "metadata": { 220 | "id": "CTGJPVI6mvd2" 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "model.load_state_dict(init_model_state)\n", 225 | "optimizer.load_state_dict(init_opt_state)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "## With LR Finder" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": { 238 | "id": "fLaAAE05GIFh" 239 | }, 240 | "source": [ 241 | "When attached to the `trainer`, this handler follows the same procedure used by [fastai](https://docs.fast.ai/callback.schedule.html#LRFinder). The model is trained for `num_iter` iterations while the learning rate is increased from `start_lr` (defaults to initial value specified by the optimizer, here 1e-06) to the upper bound called `end_lr`. This increase can be linear (`step_mode=\"linear\"`) or exponential (`step_mode=\"exp\"`). The default `step_mode` is exponential which is recommended for larger learning rate ranges while linear provides good results for small ranges." 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 27, 247 | "metadata": { 248 | "id": "qEA0T0af3iU1" 249 | }, 250 | "outputs": [], 251 | "source": [ 252 | "lr_finder = FastaiLRFinder()\n", 253 | "\n", 254 | "# To restore the model's and optimizer's states after running the LR Finder\n", 255 | "to_save = {\"model\": model, \"optimizer\": optimizer}\n", 256 | "\n", 257 | "with lr_finder.attach(trainer, to_save, end_lr=1e-02) as trainer_with_lr_finder:\n", 258 | " trainer_with_lr_finder.run(train_loader)" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": { 264 | "id": "lzhRX59cRDXO" 265 | }, 266 | "source": [ 267 | "Let's plot how the learning rate changes within our specified range and print the suggested learning rate." 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 28, 273 | "metadata": { 274 | "colab": { 275 | "base_uri": "https://localhost:8080/", 276 | "height": 300 277 | }, 278 | "id": "oN0VkPapmvd5", 279 | "outputId": "b2f3fc64-046e-43a4-cd7a-e604003b8c6f" 280 | }, 281 | "outputs": [ 282 | { 283 | "data": { 284 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdd3zU9f3A8dfnsncIWZAACRBGCBsDsgQnikKtoqJYRxVnbW1rqz8VR21rp1ZLnbXiKuJGRXFBlSUEZIMQwkiYCWTv3L1/f9wlJpBxCTcy3s/HI4/kvuve901y7/tsIyIopZTquizeDkAppZR3aSJQSqkuThOBUkp1cZoIlFKqi9NEoJRSXZwmAqWU6uJ8vR1Aa0VHR0tSUpK3w1BKqQ5l/fr1eSIS09i+DpcIkpKSyMjI8HYYSinVoRhj9je1T6uGlFKqi9NEoJRSXZwmAqWU6uLc2kZgjJkG/APwAV4UkcdP2t8bWABEOo65V0SWtPZ5qqurycnJoaKiwgVRK9U5BAYGkpiYiJ+fn7dDUe2c2xKBMcYHmA+cB+QA64wxi0Vke73DHgAWicgzxphUYAmQ1NrnysnJISwsjKSkJIwxLoheqY5NRDh+/Dg5OTkkJyd7OxzVzrmzaigdyBSRLBGpAhYCM086RoBwx88RwKG2PFFFRQXdu3fXJKCUgzGG7t27aylZOcWdiSAByK73OMexrb6HgTnGmBzspYGfNXYhY8xcY0yGMSYjNze30SfTJKBUQ/o/0T4cLapgb16pt8Nolrcbi2cDL4tIInAR8Kox5pSYROR5ERkjImNiYhodD9E6IrBmDbz3nv27rsmglHKxdftO8Pq3+xn7hy+5+KlvKKuq8XZITXJnIjgI9Kr3ONGxrb6fAosARGQ1EAhEuzEmWLIEeveG886D66+3f+/d2769k9i4cSNL2vB6pkyZ0uhgvaa2n2znzp2MGDGCkSNHsmfPnlY/v2qdP/7xj/Tv35+BAweydOlSb4ej6rHahFnPrub+97YCUFplZem2I16OqmnuTATrgBRjTLIxxh+4Clh80jEHgHMAjDGDsSeCxut+XGHJErj8csjJgZISKCqyf8/JsW/vJMmgrYmgtaxWa4PH77//Ppdffjnfffcd/fr1a/F8EcFms7ksnpoa733i8vRzb9++nYULF7Jt2zY+/fRTbr/99lN+H8p7snJL6n5edMuZJEQG8fHmLpgIRKQGuBNYCuzA3jtomzHmUWPMDMdhvwJuNsZsAv4LXC/uWjtTBObOhfLyxveXl8Mtt7Spmqi0tJTp06czfPhw0tLSePPNNwH7dBh5eXkAZGRkMGXKFAByc3M577zzGDJkCDfddBN9+vSpO+53v/sdAwcOZOLEicyePZu//vWvAOzZs4dp06YxevRoJk2axM6dOwF46623SEtLY/jw4UyePJmqqirmzZvHm2++yYgRI3jzzTcpLS3lxhtvJD09nZEjR/LBBx84XnI5V111FYMHD+bSSy+lvKl7U09oaCi/+tWvGD58OKtXr67bvmTJEp588kmeeeYZpk6dCsDf//530tLSSEtL48knnwRg3759DBw4kJ/85CekpaWRnZ3d4PpJSUn85je/YejQoaSnp5OZmQnAhx9+yNixYxk5ciTnnnsuR48eBeDhhx/m2muvZcKECVx77bXs27ePSZMmMWrUKEaNGsWqVasAWL58OWeddRYzZ86kb9++3Hvvvbz++uukp6czdOjQZkswe/bsYdy4cQwdOpQHHniA0NDQumtOmjSJGTNmkJqaSkVFBTfccANDhw5l5MiRLFu2DICXX36ZO++8s+56F198McuXL6+7n3fffTdDhgzhnHPOobYN7KmnniI1NZVhw4Zx1VVXnRLTBx98wFVXXUVAQADJycn079+ftWvXtvj7U56x9VAhAEt/MZn05CjSk6PYerDQy1E1Q0Q61Nfo0aPlZNu3bz9l2ylWrxYJDRWxv9U3/hUaKrJmTcvXOsnbb78tN910U93jgoICERHp06eP5ObmiojIunXr5KyzzhIRkTvuuEP+8Ic/iIjIJ598IoDk5ubK2rVrZfjw4VJeXi5FRUXSv39/+ctf/iIiImeffbbs2rVLRETWrFkjU6dOFRGRtLQ0ycnJERGR/Px8ERH5z3/+I3fccUddPPfdd5+8+uqrdcekpKRISUmJ/O1vf5MbbrhBREQ2bdokPj4+sm7dulNe31lnnVW3HZA333yz0fvw0EMP1cWbkZEhaWlpUlJSIsXFxZKamiobNmyQvXv3ijFGVq9e3eg1+vTpI4899piIiCxYsECmT58uIiInTpwQm80mIiIvvPCC/PKXv6x7zlGjRklZWZmIiJSWlkp5ebmIiOzatUtq/16WLVsmERERcujQIamoqJCePXvKvHnzRETkySeflJ///OeNxiMiMn36dHnjjTdEROSZZ56RkJCQumsGBwdLVlaWiIj89a9/rbufO3bskF69ekl5efkpv4/p06fLsmXL6u7na6+9JiIijzzySN1xPXr0kIqKChH54fda3x133FH3OxURufHGG+Wtt9465Tin/jeUyz2yeJsMfGCJVNdYRUTkha/3SJ/ffiR5xRVeiwnIkCbeV73dWOw5hw+DpYWXa7HAodb3YB06dCiff/45v/3tb/nmm2+IiIho9vgVK1bUfcqbNm0a3bp1A2DlypXMnDmTwMBAwsLCuOSSSwAoKSlh1apVzJo1ixEjRnDLLbdw+PBhACZMmMD111/PCy+80GTVwGeffcbjjz/OiBEjmDJlChUVFRw4cICvv/6aOXPmADBs2DCGDRvW4mv18fHhsssua/G4FStWcOmllxISEkJoaCg//vGP+eabbwDo06cP48aNa/Lc2bNn132vLXXk5ORwwQUXMHToUP7yl7+wbdu2uuNnzJhBUFAQYB9cePPNNzN06FBmzZrF9u0/DFs544wz6NGjBwEBAfTr14/zzz8fsP/+9u3b12Q8q1evZtasWQBcffXVDfalp6fX9dNfsWJF3f0cNGgQffr0YdeuXc3eJ4vFwpVXXgnAnDlzWLFiBWD/fVxzzTW89tpr+Pp2uLkhu7yN2fmk9YzA18f+npPaw95LftuhIm+G1aSukwh69ICW6qNtNujZs9WXHjBgABs2bKirOnj00UcB8PX1rasDP53+3DabjcjISDZu3Fj3tWPHDgCeffZZHnvsMbKzsxk9ejTHjx8/5XwR4Z133qk798CBAwwePLhNsQQGBuLj49Pm1wIQEhLS7P763R5rf/7Zz37GnXfeyZYtW3juueca3M/613viiSeIi4tj06ZNZGRkUFVVVbcvICCg7meLxVL32GKxtLmOv6XXAg3/DqD5v4Xa1/vxxx9zxx13sGHDBs4444xT4ktISGhQrZaTk0NCwsm9s5U3lFdZ2ZxTyJikqLptQ3pGYAx8seOoFyNrWtdJBGPHQguf1ImMhPT0Vl/60KFDBAcHM2fOHO655x42bNgA2Ou7169fD8A777xTd/yECRNYtGgRYP+0np+fX7f9ww8/pKKigpKSEj766CMAwsPDSU5O5q233gLsb+ybNm0C7PXXY8eO5dFHHyUmJobs7GzCwsIoLi6ue74LLriAp59+GnG0f3z33XcATJ48mTfeeAOArVu3snnz5la/9qZMmjSJ999/n7KyMkpLS3nvvfeYNGmSU+fWtrG8+eabnHnmmQAUFhbWvdEtWLCgyXMLCwvp0aMHFouFV1991SUNqOPGjav7/S1cuLDJ4yZNmsTrr78OwK5duzhw4AADBw4kKSmJjRs3YrPZyM7OblCXb7PZePvttwF44403mDhxYt1xU6dO5U9/+hOFhYWUlJQ0eK4ZM2awcOFCKisr2bt3L7t37ya9DX+7yvW+y86nxiakJ3er2xYR7Mc1Y3vzyur97Dpa3MzZ3tF1EoEx8Pzz4KhCOEVQEDz3nP24VtqyZQvp6emMGDGCRx55hAceeACAhx56iJ///OeMGTOmwafohx56iM8++4y0tDTeeust4uPjCQsL44wzzmDGjBkMGzaMCy+8kKFDh9ZVM73++uv8+9//Zvjw4QwZMqSuwfeee+5h6NChpKWlMX78eIYPH87UqVPZvn17XWPxgw8+SHV1NcOGDWPIkCE8+OCDANx2222UlJQwePBg5s2bx+jRo1v92psyatQorr/+etLT0xk7diw33XQTI0eOdOrc/Px8hg0bxj/+8Q+eeOIJwN4oPGvWLEaPHk10dNM9jG+//XYWLFjA8OHD2blzp1Of2Fvy5JNP8ve//51hw4aRmZnZZNXf7bffjs1mY+jQoVx55ZW8/PLLBAQEMGHCBJKTk0lNTeWuu+5i1KhRdeeEhISwdu1a0tLS+Oqrr5g3bx5Wq5U5c+bUNTrfddddREZGNniuIUOGcMUVV5Camsq0adOYP3/+aZfUlGtsO2iv/hnZq1uD7deM7QPA7qMlp5zjdU01HrTXrzY3Ftf6+GORxER7w3B4uP17YqJ9u4dUVFRIdXW1iIisWrVKhg8fXrevuLhYROyNnqNHj5b169d7LK72oH4De3tRWlpa11D93//+V2bMmOGya9c2PLuLNhZ73tNf7pI+v/1IqhwNxbVyiyukz28/kpdX7vVKXDTTWNz1WqEuuggOHIC1a+0Nwz172quDPDgc/8CBA1xxxRXYbDb8/f154YUX6vbNnTuX7du3U1FRwXXXXdfg06PyjvXr13PnnXciIkRGRvLSSy95OyTVjlkdzUGWk95TugX7YzGQW1zphaia1/USAdjf9MeO9drTp6Sk1NXTn6y2zr6raq73jrv9/ve/r2uHqTVr1izuv//+ujYZVzu57l91fFZHW5zlpM+WPhZD99AA8ko0EbiNiOgkW+q03H///dx///3eDsNlROfQ8gr7e1Hjk/5Ft9NE0CkaiwMDAzl+/Lj+4SvlII71CAIDA70dSpdjtQk+TXwojQ7116ohd0lMTCQnJ4empqhWqiuqXaFMeZZVBMvJ9UIOMWEBZOW2vympO0Ui8PPz01WYlFLtgsip7QO1YkIDyC2pbHdV2Z2iakgppdqL5quGAqiqsVFU0b7WJtBEoJRSLmRroWoIaHcNxpoIlFLKhWw2OWUMQa3oUHsiaG8NxpoIlFLKhawi+DRRIogO8we0RKCUUp2arYXGYoA8LREopVTn1VzVULdgf3wshlwtESilVOdla6ZqyGIxRIX4k1dc1eh+b9FEoJRSLmS1nTrhXH0x7XCaCU0ESinlQvbuo03vjw4L4Ji2ESilVOdlk6YHlAEkRAaRk1/mwYhapolAKaVcyNpMYzFAUvdg8suqKSyr9mBUzdNEoJRSLtTcyGKAPt3ty6fuP9F+Jp/TRKCUUi5ks9Fs1VBytD0R7DvefqqH3JoIjDHTjDHfG2MyjTH3NrL/CWPMRsfXLmNMgTvjUUopd7M6FqZpSu+oYAD257WfEoHbpqE2xvgA84HzgBxgnTFmsYhsrz1GRO6ud/zPgJHuikcppTxBmhlHABDk70OvqCA25RR6MKrmubNEkA5kikiWiFQBC4GZzRw/G/ivG+NRSim3a6mxGOCcQXF8szuX0sr2MR21OxNBApBd73GOY9spjDF9gGTgqyb2zzXGZBhjMnQVMqVUe2YVmm0sBpiWFk9ljY35yzLbxRK77aWx+CrgbRGxNrZTRJ4XkTEiMiYmJsbDoSmllPNEBJ8WFh9LT4rix6MS+NfyPTz60fbmD/YAdyaCg0Cveo8THdsacxVaLaSU6gScqRqyWAx/mzWcH49KYMGqfZRXNfoZ2GPcmQjWASnGmGRjjD/2N/vFJx9kjBkEdANWuzEWpZTyCKut+XEEtYwxTBsSj01g++EiD0TWNLclAhGpAe4ElgI7gEUiss0Y86gxZka9Q68CFkp7qChTSqnTJNL8OIL6hiVGArAlx7s9593WfRRARJYAS07aNu+kxw+7MwallPIkqwh+TpQIAOLCA4gODWDzQe92JW0vjcVKKdUp2KTlNoJaxhiGJUawVROBUkp1HjZb8wPKTjY0IYLMYyWUVXlvTIEmAqWUciFrK0oEYE8ENoHth7zXYKyJQCmlXMjWwgplJxuWGAHAun357gqpRZoIlFLKhextBM4fHxseSHpyFP9ekUWJl6ac0ESglFIu1Nzi9U35zQUDySupYsnmw26KqnmaCJRSyoWcHVBW3+g+3UiIDOKz7UfdFFXzNBEopZQL2aR1bQRg70Z6Xqp9RtLKGs9PN6GJQCmlXMjmxKRzjUntEU5ljY1jRZWuD6oFmgiUUsqF2lI1BBAXEQjAkaIKV4fUIk0ESinlQjYnZh9tTHy4IxEUaiJQSqkOzdaKSefqq00ER7VEoJRSHZtV2lY1FB7kS6CfRUsESinV0UkrB5TVMsYQFx7I0WJtLFZKqQ7N2spJ5+qLCw/kqJYIlFKqY3NmqcqmxIcHaq8hpZTq6KQNA8pqxUfYE4GnF2zURKCUUi5kFcGnje+sceGBVNXYKCirdm1QLdBEoJRSLnQ6VUNx4QGA5weVaSJQSikXEqFN3UfBe2MJNBEopZQLWUXaNKAM7FVDoIlAKaU6tNYuTFNfXN00E54dS6CJQCmlXERETqtqyN/XQnSoP4cLy10cWfM0ESillItYbfZun21tLAZI6h5CVm6pq0JyilsTgTFmmjHme2NMpjHm3iaOucIYs90Ys80Y84Y741FKqZN9vSuXT7e6ZolIRx5o88higP6xoWTmlrgkHme5LREYY3yA+cCFQCow2xiTetIxKcB9wAQRGQL8wl3xAJRXeX7lH6VU+/av5Zk8+uF2l1zLJqdfIugfG8qJ0ipOlFa5JCZnuLNEkA5kikiWiFQBC4GZJx1zMzBfRPIBROSYq4MQEX7z9iaGPryUwfM+5dU1+139FEqpDqygrJpDhRUcc0FPnR8SQduv0S82FICnv9rtsRHG7kwECUB2vcc5jm31DQAGGGNWGmPWGGOmNXYhY8xcY0yGMSYjNzfXqSfPPFZC5rESXvgmi0UZORRX1ADwyOJtfLLlMBc//Q0PvL/FK+uDKqXaj9pRvN9lF5z2tWrbCE6naii1RzgA/1m5j805hacdkzN8PfIszT9/CjAFSAS+NsYMFZEGvxEReR54HmDMmDEtpsgaq42Z/1xBqaMq6KKh8Zw7OI7yait/+HgHt72+AYCtB4uIDg3gF+cOcOVrUkp1IAXl9iqYjdkFXDAk/rSuZbPZv59O1VBceCDv3Daey55ZxYYD+QzvFXlaMTnDnYngINCr3uNEx7b6coBvRaQa2GuM2YU9Maxr7sLlVVaC/H2a3P/FjqOUVlmJCQvgslGJ/Ozs/oQE2F/q5JQYNuUUML5fNA++v5Vn/7eHCf2jOSMpqvWvUCnVoVVUW6motr97b3JBicAVVUMAo/t0Iz48kI0uiMkZ7qwaWgekGGOSjTH+wFXA4pOOeR97aQBjTDT2qqKs5i6aX1rFkIc+5d0NOZworeLllXvJPFaCzfZDQeHVNftJiAxizX3ncO+Fg+qSAECvqGAuHtaTqBB/HpqRSs+IIK57aS3r9+e74jUrpTqQ2mqhID8fNucU1lXttJVVTr9qqNbI3pGs35/vkXYCt5UIRKTGGHMnsBTwAV4SkW3GmEeBDBFZ7Nh3vjFmO2AF7hGR481dN6egnB4Cv1y0CWPs83oA+FoMKXFhHDheSmmVlXsuGNjiLyM2LJCFc8cxc/5K/vbZ97xx87jTft1KqY6jtlpofL/ufLnzGHtySxgQF9bm69V+IG3rgLL6pg6M5ZOtR8jYn+/2Ggu3jiMQkSUiMkBE+onI7x3b5jmSAGL3SxFJFZGhIrKwpWtGhfgzY3hP/HwM149P4sWfjGF2em+uH59ETFgAE1OiGd+vO7PTezsVY2x4IDNHJLB27wkKy+2fDmqsNo/PB66U8rz8Uvv//FkDYwDYcpqNs7UFitNpI6h18fAehAX68upq9/d09HZjcaslRAbx1OyRVFuH4+eY9Pvc1LjTuuZ5qbE8+789PPj+VgbGh/Hs8j3cclZf7jw7xRUhK6XaqUJHiWBEr0j8fAy7j53eQK66qiEXJIJgf18uG5XI69/uJ68klejQgNO+ZlM67BQTfm1d+aERI3t146cTk/lk62H+svR7iitr+OeyTAo9vDiEUsqz8h3/49GhASR1DyHzNBNBbdWQC/IAAHPG9aHaKrz/3cn9bFyrwyYCV7JYDA9enMqWhy/guwfP46OfTaTaKvz4mZVUVOs4A6U6q9rG4shgP/rHhrLnNKd2sLmwsRjso4zjwwPZfrjIJddriiaCegL9fOgW4k9aQgT/nD2SPbmlfL79qLfDUkq5SWF5NX4+hiA/H1JiQ9l/vPS0Pvy5YkDZyfrFhrDHzZPQaSJowgVD4ukREciijGxtOFaqkyquqCYs0A9jDP1iQ7EJ7Dve9jfd2sZi46q6IaBfTChZx0rc+j6kiaAJFou9V9I3u/P489LvdSoKpTqh4ooawgPtfWZSYu3dRk+nncDmwsbiWv1jQymurCG32H2L1WgiaMbNk/py2ahEnlm+h5++nKHJQKlOprZEANA3JgRjYPfRtieCH6qGXBIeYC8RAG6dmloTQTMsFsPfrhjO4z8eyorMPBauzW75JKVUh1FcUUOYo0QQ6OdD76jg03rDrS0RuLJqqHdUMAA5J9y3apkmAidceUYv0pOi+NfyzAYNSYsysln2vctnzlZKeUj9RADQPyaUPadTNeSYdM6VVUPxEYFYDOTkl7nsmifrcAPKvMEYwy/OS+HqF77lsY+3syWnkMSoYD7ebF/V6JUb05k8IKbueBFx6ScCpZR7FNWrGgJ7ffw3u/OosdrwbUP9jqu7j4J9zFR8eCA5Be4rEWgicNL4ftFM7B/Na2sOALApp5C+MSEUlVdzxxsbuH1Kf5K6B5NXWsU/vtjF4z8edtojnpVS7nVKiSA2lCqrjez8cpKjQ1p9Pau4dkBZrcRuweTkayJoF56ZM4o/fbqTC4bEM6ZPFP6+Fp77eg9//vR7/vTpzgbH3vrael75aTrj+0V7KVqlVHOsNqGksuaUEgHYew61JRHY3DCOACChWxBr955w6TXr00TQCmGBfjz2o6ENtt0wPpluwf6cPSiWXUeLWbBqP7dP7cev39rE1S98y9zJfbn73AHNrp+glPK8kkr7qoXhJ5UIAHYfK+a8NpToXTnpXH2J3YL4YGM51VabS6fXqaWNxacpyN+H2em9iQsPZFJKDC9eN4ZRvbvxfxcOBuD5r7P4bPuRBufUWG18ueMoizKytUuqUl5SXGGfXqJ+1VBYoB/x4YFt7kJaVWNvLfZ1cYmgd1QwNsFt1UNaInCTcwbH8sSVw7n7zU088P5Wqq3CoYJyjpdUsnxXLvuP23sAfJt1gr9dMdzL0SrV9dSuYx5er2oI7DORrt17ok2dPmp79vSMDHJNkA59HWMJsnLbVmXVEi0RuIkxhktHJnJ+ahzFFTX8+q1NPPHFLv67NpuSihqeuWYUcyf35Z0NOazYneftcJXqcorKa0sEDRPBpAHRHCwoZ+G61k8vs/9EGX4+xvWJwPHmn+WmOYe0ROBm9144iPTkKGLCAhgYH0ZydAgWY/DzsXD24FiWbDnMHz/ZwYf9JrpkVSOllHPKHGOCggMatt9NTrF3Bb/v3S0E+/swc0SC09fcf7yUXt2CXd5Y3C3En27BfmTluWd0sZYI3KxvTCg3TerLzBEJDIoPJ8DXp66xJ8DXh3suGMi2Q0Vc+swqXlm9r67XQX5plRejVqrzq63P9z+p8bVXVDDPzhkNwD+/ymxVqWBfXhm9uwe7Lsh6+saEum0WUk0EXnbJsJ5cPbY3BWVVzPtgG498uI0lWw4z6rHPWZmpVUZKuUulIxEE+p36NjgtLZ5HZw5h97ESpxtoRYQDJ8pI6u76OnyAgfFh7DhUVDefkStpIvAyi8Xwh0uHsvzXU/jpxGQWrN7P7a9vQAR+//EOSh1d3JRSrvVDiaDxrt0jekUCsNnJdYyLKmooqawhwcXtA7XOSOpGcWUNO4+4fpEapxKBMSbEGGNx/DzAGDPDGOPX0nnKecbYV0mbf/UozhoQwy1n9WXnkSJuWpBRV12klHKd2q7bAY2UCAAGxYfj72Nhc06BU9fLK7FPEx0T5p61hcf0iQIgY1++y6/tbGPx18AkY0w34DNgHXAlcI3LI+ripg/rwfRhPQBI7h7Cve9u4dGPtnP/9MFuGUiiVFfVVBtBLX9fC4N7hDldIshzrBfQPdTfNQGeJLFbELFhAWzKdi4xtYaz7yxGRMqAHwP/EpFZwBCXR6MauPKMXswZ15uXV+3j6S93ezscpTqVukTg2/TbYEpcmNM9dfJK7B08okPdUyIwxtA/NpSsPNc3GDudCIwxZ2IvAXzs2KZzJriZMYbHfjSUmSN68uzXWXx3IJ+jRRXUWG3eDk2pDq+2sTigmUTQOyqYo0WVTq1jfLzUXiJwVyIASI4OYa8XE8EvgPuA90RkmzGmL7CspZOMMdOMMd8bYzKNMfc2sv96Y0yuMWaj4+um1oXfNcy7OJX48EBufHkdEx7/iqtf+FYbkZU6TVU1NiyGZqeb7uPoCnrgRMtrAeQVV2IxEBXinqohsCeCwvJql3cvdyoRiMj/RGSGiPzJ0WicJyJ3NXeOMcYHmA9cCKQCs40xqY0c+qaIjHB8vdjaF9AVdA8N4MXrxlBebaXGJqzdd4LfL9nh7bCU6tCqrLZmq4Xgh9XBaqeEaU5uSRVRIf4uH0xWX+30Eq6uHnK219AbxphwY0wIsBXYboy5p4XT0oFMEckSkSpgITDz9MLtugbEhfH2reP55jdTmTu5L298e4CntN1AqTarrLYS4Nt8DXcfx5gAp0oEJZV0D3FftRBAkiMR7PNGIgBSRaQI+BHwCZAMXNvCOQlA/UV+cxzbTnaZMWazMeZtY0yvxi5kjJlrjMkwxmTk5uY6GXLnk5YQQa+oYH47bRAXDInj+a+znKq7VEqdypkSQbdgP0IDfDlwvOU33rySSqLD3FctBNRNX+HqdgJnE4GfY9zAj4DFIlINuKJz+4dAkogMAz4HFjR2kIg8LyJjRGRMTExMY4d0KT4Ww9xm5XsAACAASURBVDVj+1BSWcNyXTNZqTaprLE121AM9g4bvaOC2e9EieBYUSVxYYGuCq9R/r4WenULYq8Tiak1nE0EzwH7gBDga2NMH6Cl4W0Hgfqf8BMd2+qIyHERqXQ8fBEY7WQ8Xd6Z/boTHx7IXQs38uI3WW7pSaBUZ1ZZ03KJAOwNxgdaaCOw2oQjRRX0iHRvIgB79dBeF8855Gxj8VMikiAiF4ndfmBqC6etA1KMMcnGGH/gKmBx/QOMMT3qPZwBaAuok/x8LLx7+3hSe4Tz2Mc7uOgf35B5rITvjxRTY7Vx+TOr+NH8lSzKyOZYcYW3w1Wq3amqsTU5mKy+3lH29YKbm+Mnr6QSq02Ij3DP9BL1JUeHsO94aaunyG6OUyOLjTERwEPAZMem/wGPAk0OuRORGmPMncBS7GMOXnJ0PX0UyBCRxcBdxpgZQA1wAri+rS+kK+oZGcSCG9J5dc0+nv4qk5n/XEFpVcM2g43ZBYQG+PL6TWMZ7pg7RSllTwQBfi0Ph+rdPZgqq40jRRVNziN0qMA+MV3PCPeXCJKjQyirsnKsuJK4cOeeb+vB5kdHO1s19BJQDFzh+CoC/tPSSSKyREQGiEg/Efm9Y9s8RxJARO4TkSEiMlxEporIzuavqE4WEezHnWensHDuOIIDfOnu6MOcHB1C5u8vZNEtZ9ItxI+fLsjgSKGWDJSqVVljJcCJEkGfKHtPnf3N1MvX/m/FeygRQOsWqbnl1fXN7nd2rqF+InJZvcePGGM2Oh2FcruRvbux9v/OwRhTN/mVr4+F9OQoXrruDGbOX8kvF23kxevGEOyv6xEpVVVjIySg5f+FXlH2UkDOiXLo1/gxhxyJoKeHqoYA9h0v5cx+3Z06p6Cs+QFozpYIyo0xE2sfGGMmAO5ZRVm1We36qtGhAQ2GuafEhXHXOSms2nOc8Y9/pSUDpXB0H3WiRFBb/XKkqOn/myOF5QT6WYgMdv+kzD0jgvD3tTjdQaSqxnZKlfHJnP1oeCvwiqOtACAfuM7Jc1U7cPOkvkQE+XH/e1uY9exKfh1ewMWx4JOQAGPHQisX6Vaqo6usdq7XUKCfD1Eh/hxu5gPUocIKekQEtXqx+7awWAxJ3YOdTgSFjrWZm+NUIhCRTcBwY0y443GRMeYXwGanIlFe52MxzE7vTa/Vyxnw0K8JLivG6u+LjwEiI+G55+Cii7wdplIeU2VteRxBrfjwQI42UyI4XFBODw+0D9RKjg5xetnKwvKW5yVq1QT3IlLkGGEM8MvWnKvagSVLmHjfrcQW5hJaXYF/aQmUlEBODlx+OSxZ4u0IlfIYZ0sEAD0iApstERwprPBIQ3GtlNgw9uWVUuLE5JP5ZS2XCE5npROtS+hIRGDuXChvommnvBxuucV+nFJdgL1E4Nxs+vERgRwpbPx/x2oTjhZXeqShuNb4ft2psQnfZh1v8dgCNycCfcfoSL79FgpbWGmpoADWrvVMPEp5WZWTI4vBXjWUX1bd6NxeucW1g8k8VyIY1acbgX4Wvt7V8txrLfUYghYSgTGm2BhT1MhXMdDT6aiV9x0+DJbm/+jFYoFDhzwUkFLeVVljdToRJHSzf9pvbDrqQ46SQk8PTC9RK9DPh3MHx/HftdktrqnsTGNxs3dBRMJEJLyRrzAR0c7oHUmPHmBrfmWzispqqmLjPBSQUt5jswnVVnG6sXhU724ArN134pR9hwscg8nCPVc1BPDYj9Lw9TG8u+Fgs8cVlFW3uEaCrobeVYwdCxERzR6S7x/CM2XRHgpIKe+psra8XnF9fboHExce0Gid/PdHirCYHwZ6eUpksD8pcWHsOlrc7HH5ZVVEBjU/vkETQVdhDDz/PAQ1/qlFgoL44NZ5zF++h8xjzi3WrVRHVduAGhbo3AAwYwxjk7uzdu+pJYItBwtJiQ0jyN/zy7gPiA1ldwv/r3kllXRrYflMTQRdyUUXwdtvQ2IihIZCeLj9e2Ii5u23ufzBuQT6Wfi/d7dga2amRaU6utp5g2qXonTGiF6RHCuubDCeQETYcrCQtITmS9vukhIXSm5xZbMNwpnHSujbQmlFE0FXc9FFcOAAfPEFvPyy/fuBA3DRRcSEBXD/9MGs3XeCm1/J4IONzdc9KtVR1S492acViWBYov3NfkvOD73vjhZVkldSxdCEcNcG6KSU2DCAJksFlTVW9h0vY0BcWLPX0QbfrsgYe5tBI64Y04tDBRX848vdfPX9Mcb3iyYmzL3rsCrlaQdOlOFjMXW9gZyR2jMci4HNBws5N9XeqaK2ZNEvNtQtcbYkJc7+vLuOFnNGUtQp+/fmlWK1Sd1xTdESgWrAGMPd5w3g87snIwKPf7KT977LobJG10ZWncf+42X0jAzEz4lJ52oF+/syuEc4b2Vkc9Cx/kBOvv17YjfnSxau1DMiiGB/H3YfbbxEsMuxvaUSgSYC1aiUuDDSk6J4Z0MOd7+5iTte/46qmua7nyrVUew/UVa3zkBr/OmyYRwvqWLBqn3AD4nAk2MI6rNYDCmxoew+1njPoYOO+Pp0bz5RaSJQTXr95rGsu/9cHr4klS92HOWnC9bx8ebDjY6uVKojKS6vbtOU0WkJEQzvFVHXeygnv4y48ACnp6pwh/6xYU2WCArLq/H3sRDUwkpsmghUk/x8LMSEBXD9hGR+N3MIKzLzuOONDdz7zmaXrpeqlKc5u3B9Y85IimLrwULKqmrIyS/3WrVQrUHxYRwrrqxbLrO+wvIqIoL9WpweWxOBcsq1Zyax5K5JTB/Wg/c3HuLGl9fxzvocaqxaXaQ6ntZMQX2ycX3tE76lzlvK6qzjJLaiwdkdzh9ib7helJF9yr6CsuoWB5OB9hpSrTC4RzhPXzWSpO7BvLbmAMu+z+VQQTk/OyfF26Ep1SpVNc6tTtaYif2jmZQSzTe78xjVO5LLRye6OLrW6dM9hLHJUTz5xW5sNuGX5w+s21dQ5lwVmJYIVKtYLIZ7LhjExnnnMbF/NAvXZevgM9XhtGbm0ZNZLIbnrx3DRz+byLu3T2BSSoyLo2u9p2aP5OxBsTy9LLPBJHQF5dVEOFEi0ESg2sQYwxVn9OJgQTlPfbWb/NKWp7pVqr2osrY9EQAE+ft4bTRxY+LCA3nyqhEALNv5w9TUReXVRAQ1P70EaCJQp+GitHimDIzhyS92M/nPy/hmd8tzoyvlbVabYLUJ/j7e6+njDuGBfiR2C2rQlbSgrMr7VUPGmGnGmO+NMZnGmHubOe4yY4wYY8a4Mx7lWr4+Fl74yRheuTGd7qH+/HXp994OSakW1Y6HOZ0SQXs1oF5X0qoaG6VVVqcai912J4wxPsB84EIgFZhtjElt5Lgw4OfAt+6KRbmPn4+FyQNimDOuD5tyClucElcpb+vMiSAlLoysvBKqrba6BWkivFwiSAcyRSRLRKqAhcDMRo77HfAnoOmVoVW7N2NET0IDfLnmxW/ZcbjI2+Eo1aRKq31AZGdMBAPiQqm2CvuPl1JYbm+383ZjcQJQv2NrjmNbHWPMKKCXiHzsxjiUB8SGBfLObePxMYbZL6xh68EW1kdWyktqSwQBbew+2p7Vzim062gJRworAYgObXnSSK/dCWOMBfg78Csnjp1rjMkwxmTk5mqDZHs1MD6MRbecSYi/L7e9vl6nolDtUmeuGuoXE4oxsPtoCdsO2T+MDe7R8hTZ7rwTB4Fe9R4nOrbVCgPSgOXGmH3AOGBxYw3GIvK8iIwRkTExMd7vs6ua1rt7MH++fBjZJ8p54vNd3g5HqVO0dpnKjiTI34de3YLZdayYrYeKSIgMIqqF1cnAvYlgHZBijEk2xvgDVwGLa3eKSKGIRItIkogkAWuAGSKS4caYlAdM6B/NNWN789zXWXy+/ai3w1GqgboSQSesGgJ7O8GuI8VsO1jIkJ7OLZjjtjshIjXAncBSYAewSES2GWMeNcbMcNfzqvbhoUuGMCg+jAfe31LXe0Gp9qAzVw0BjO4Txe5jJWTllTK2b3enznHrXEMisgRYctK2eU0cO8WdsSjP8ve18OfLh/Gj+Sv5/cfbmdA/mnMGxxEaoNNbKe/q7Ing5knJnCitxM/HwvXjk5w6R/8rldsMS4zk5kl9ee7rLBZl5HD9+CQenjHE22GpLq6yE7cRgH2g5/3TTxmy1azOeSdUu3H3eQPqFv3+bNsRnaBOeV1nbyNoC70Tyq0C/Xz44I4J/OuaURwqrODprzK9HZLq4jp71VBb6J1QbmeM4cK0eGaO6MnTX+3meEmlt0NSXZiWCE6ld0J5hDGG26f0p8YmfLT5sLfDUV1YZx5H0FZ6J5THDIwPI7VHOAtW7aOyRkcddyXvbsjhj5/s8HYYgFYNNUbvhPKoe6YNJCuvlH+v2OvtUJSHiAi/XLSJ5/6Xxao9ed4ORxNBI/ROKI+aOjCW81PjePrLTA4XliOivYg6u60Hf5iN9tn/ZXkxEru6qiFtI6ijd0J53IMXp1JltXHmH79izr+/xapdSju1FZn2UsDs9N6szMzjeEkly3Ye89r0I5XaWHwKvRPK43pFBXN+ahwAKzOP869lmTq+oBPbk1tCbFgA143vg9UmfLzlMDe8vI6bX8kgK7fE4/FU1djw8zFYLMbjz91eaSJQXvH4ZcN47trRnDs4lr99vosrn19NaWWNt8NSbrAnt4R+MaEMig9nQFxogyVNn/3fHo/HU1Vj09LASfRuKK+ICPLjgiHxvPCTMfxu5hDW7cvn4y3arbSzERH2HCuhf2woADOG96Sowp7wx/WN4rPtR6l21Nl7SpXVqg3FJ9G7obzKGMOccX1I6h7M2+tz2HaokPIq7VraWeSWVFJUUUO/mBAArh7bh2vH9eHeCwdx44RkCsqqWb3nuEdjqqqxEeDr49HnbO80ESivq00Ga/eeYPpTK/hLvaoD1XHZbMID720FYETvbgBEhfjzux+lcetZ/Zg8IIYQfx+WeLgkWFppJThAE0F9mghUu/DTicn84twUAD7cfEi7lXYC/16xl8+2H2XexamM6BV5yv5APx/OGRzH0m1HqPFg9VBpVY1Oh34STQSqXTDG8ItzB/Dny4eRW1zJun353g5JnaY31h5gfL/u3DAhqcljzh8SR35ZNVsOFnosrtLKGkL8NRHUp4lAtSsXpsUTHx7Ife9upqJa2wo6qoKyKvbmlTIxJRpjmu6meaZjBa1VHmwnKKm0EqJVQw1oIlDtSligH3+dNZw9uaUMevBT7nxjAwVlVa2+js0mrN9/QquYvGRzjv0T/ojEU6uE6useGsCg+DA+236UsirPdB8uq6ohRKuGGtBEoNqdiSnR3H3uAEb1juTTrUf4x5e7W32NdzbkcNkzq3l3w0E3RKhasjmnAIA0x6JEzZmd3ptN2QU8vHibu8MCHFVDmgga0ESg2qWfn5vCu7dPYMbwnixal01ucSWbsgu4+ZUMth8qavK8oopqPth4kN++sxmAP36yk6KKak+FrRyOFlXSLdiP8EC/Fo+9bnwSl45M4PPtRz0y3UhJpTYWn0wTgWrXbp3Sj9IqK2f8/gtmzl/J59uP8pOXvuXt9TkNjjtUUM7M+SsZ94cv+fnCjdjE3t5wvLSSRz/crvMZeVhxRTXhQS0ngVpTBsaQX1bNJkdJwl2sNqGi2kawv7YR1KdpUbVrA+LC+N3MIbyz4SDnD4ljYFwYT32Vya/f2sSnWw8zsX80hwor+P5IMZuyCxjRK5LfThuEiDCqTzf6fZXJP5dl0jsqmLvOSfH2y+kyiipqCAt0/u3lrAExBPn5MO+Drbx1y3iC3PRGXepoh9ASQUN6N1S7d+2ZSVx7ZlLd4ykDY5m/LJMFq/bxxY5j+FoMNTbhhglJPHTJkAbn/vqCgWzKKeDNddncObW/TjTmIUXl1U5VC9WKDPbnqdkjufmVDBas3setZ/VzS1y181lpG0FDejdUh+NjMdx1Tgq3ntWPvXml9IsJ4UhRBXHhgY0ef9moRH7x5kbW7TvBWEd3ReVexRU1JEUHt+qc81LjOGtADM/9bw/XnZnkllJBaaW9S7JWDTXk1jYCY8w0Y8z3xphMY8y9jey/1RizxRiz0RizwhiT6s54VOfi72thYHwYvj4WErsF49fEjJLnD4kj2N+H9zdqDyJPKapoXYmg1m1T+pFfVs2Hmw45fU5VjY0PNh6kxInZa2tLBFo11JDbEoExxgeYD1wIpAKzG3mjf0NEhorICODPwN/dFY/quoL9fZk2JJ6PNh926s1Cnb6i8tY1FtcamxzFwLgwXl97oNH9x4ormPfBVtbuPVG3bcGqffx84UYueOJrih09xESE/3tvCwtW7WtwvlYNNc6dJYJ0IFNEskSkClgIzKx/gIjU7wcYAmjXDuUWPxmfREllDY9/skMHmblZjdVGaZW1VY3FtYwxXDY6gU3ZBezLKz1l/x2vb+CV1fv5x5e7AHsvoJdW2te/PlRYzh+W7OS1NftJvm8Jb3x7gIcWb6tboxig1DGzrZYIGnJnIkgAsus9znFsa8AYc4cxZg/2EsFdboxHdWEjekVyw/hkXltzgLsWbuREaetHKyvn1Ja62lI1BHDxsJ4A/OhfK9lbLxlkHiupm4Nq3b58yqpqOHCijMOFFfz5smHMndSX/649wAPvb8VioFdUEABf7PhhScySSnuJQdsIGvL6OAIRmS8i/YDfAg80dowxZq4xJsMYk5Gbm+vZAFWn8cD0wdx1dn8+3XqYP32y09vhdFpF5fZE0JYSAUDPyCCevHIEBWXVPFdvBbMPNx3CYuAfV42gqsbGFzuOsTfPvtRlv9gQ7rlgINed2YdbzurLpofOZ/mvp9IjIpA319k/j24/VMT8ZXvoFuxHfETjHQu6KncmgoNAr3qPEx3bmrIQ+FFjO0TkeREZIyJjYmJiXBii6kosFsMvzx/I1em9eWdDDgcLyr0dUqdUO5K7LW0EtX40MoGrzujF+xsPkpNfBsC3e4+TlhDBxcN6khwdwgtfZ5GVay8xJEeH4utj4ZGZadx34WDCAv3wsRguH53I17tzWbbzGJc/u4pjRRXMv2YUwTr7aAPuTATrgBRjTLIxxh+4Clhc/wBjTP0RPtOB1k8qo1QrzT2rH8bQ4NOmcp26RNDGqqFat03ph5/Fwt1vbqSqxsZ3BwoY0ycKH4vhpxOT2XKwkA83HSI80JduwY0/15xxfYgPD+SGl9dRVmVl8Z0TGd8v+rTi6ozclhZFpMYYcyewFPABXhKRbcaYR4EMEVkM3GmMOReoBvKB69wVj1K1EiKDuHx0Iq9/e4Diihr25JZw77RBjOzdjWqb7bTfwLq6onJ7Imhr1VCtPt1D+NX5A3j4w+0MeOATAM5Isq90dsmwnjy8eBubcgoZ3iuyyamu48IDee/2Cfx7RRZJ0SEkRYecVkydlVvLRyKyBFhy0rZ59X7+uTufX6mm3D89ldziKt77zl5befWL3+JjMVhtwqD4MCb2j2bDgXxmjenFRUN7EHEa1RxdzdGiSgBiwwNO+1qXjkrk4Q+3A/aBgZMH2KuGI4L9mDools+3H2V4CzOcxkcEcv90HaLUHNPRutKNGTNGMjIyvB2G6gREhOXf5xIZ7MfSbUfx8zEE+vmwbOcxNhzIp/48dX++bBhXnNGL4opq9uWVMdSJ6ZW7qj99upMXvs5i12MXumRKj29259It2J+0hIb3/HhJJfuOlzI8MRLfJgYTqh8YY9aLyJjG9mmLieqyjDFMHRQLwEjH4uoAd0ztj80mHDhRxq2vrcfXx/Cbdzbz+toDbMouwBh497bxDc5RPzhaaJ/uw1XzOk1KabyDSPfQALqHnn6pQ7WD7qNKtUcWiyEpOoRPfzGZd2+bwM2Tkil0rJQW6OvDwx9u14FpTbDP+6Rv0B2JlgiUaoG/r4X7p6dy34WDycor5du9x7n/va2syTrBmf10EruTHSmqYFB8mLfDUK2gJQKlnGSxGPrHhnLZqERiwwJ4aPFWyh1TFqgf1FYNqY5DE4FSrRTo58NfZw1n19ESXluz39vheNTxkkp2Hy1ucn9RRTWlVVbiNRF0KJoIlGqDyQNiSE+K4pU1+7rUMpj3vbuF8574mj8s2dHo/mU7jwEwLDHSk2Gp06SJQKk2unFiEtknyuvGInRmIsKqzDyWfW9/o3/+6yzufnMjd7y+gUP1pup4/7uD9IgIZGxylLdCVW2giUCpNrpgSDzDEyP422ffU1HdudsKPtt+lKtf/JZqq/C3WcM5d3As7313kKXbjnDpv1ayL68Uq01Yk3WC81PjdEnQDkYTgVJtZIzh3gsHc7iw4pQFUDqb2hk8ASalRPPctWNYctckPr5rEhXVNn7zzmb25pVSXm1lSIIOtutoNBEodRrO7NedcX2jWLguu9ONK7DZhJdW7OWaF9fw1c5j3HJWX5b+YjKx4YH4WAypPcMZGB/GPRcMZO3eEzz4/lYAhvQM93LkqrU0ESh1mi4e1pO9eaV830xvmo7oX8szefSj7azMPA7A3El9GdjI+IDZ6b2ZPqwHq7OO4+djSInVMQQdjQ4oU+o0XTAknkc/2s4tr67nrVvPJDas43edPFhQzlNfZjJ9WA8mp0QTGuDX5HQOPhbDk1eOAKCssgZ/X/182dFoIlDqNMWEBfDKjenMefFbfvfRDuZO6kvfmJAOvUD6f1bsxSrC/100mITIoBaP9/OxMP/qUZ2ueqyr0NStlAuM69udq9J78eGmQ1zyzxXc9+4Wb4fUZqv3HOeV1fu5ZFgPp5JAfU2tC6Dat477kUWpdubhS4YwOSWGfy3fw4ebD3HgRBlRIf78+7oxHeoN8t8rsuge6s8jM9O8HYryEC0RKOUivj4Wzh8Sz3+uP4OeEUFszC7gq53HWP59rrdDa5VDBRWk9gjXxXi6EE0ESrlYtxB/ltw1iWfnjKZHRCB/Wfo91Vabt8Ny2uHCcnpEdvwGb+U8TQRKuUFEsB/T0uJ5eMYQth8u4v73tlDgWM+gPauotpJfVk2PiNa1DaiOTROBUm50wZB4bpncl0UZOUz80zKWbjvCZc+s4rsD+XXHHC+p9GKEDR0prADQ2UO7GE0ESrnZvRcO4tWfphPs78Mtr65n/f58frVoE8eKK3j92/2MfuwLFq49QFZuCdknyrwa66FC+wRyPSI0EXQl2mtIKTczxjApJYa3bx3PE1/sYk9uCZtzCkn//ZcA+PkYHlq8DWOgotrGX2cN5/LRiS557mNFFYQH+RHo5+PU8bUlgh6t7DaqOjZNBEp5SO/uwTxx5QiqamyszMxjx5EifC2G81LjueK51ZworSI5OoR5H2zFZhOuOKPXKdeorLES4Ovcm3q11cZFT60gLjyARbec6dQAt9oppbVqqGvRRKCUh/n7Wpg6KJapg2Lrtr13+3gqqm2EBfpy13+/4zfvbMZiMVw2KoFqq7App4C/f7aL77Lz+fiuSfSLCW3xeVbvOU5eSSV5JZX8fOF3zL9mVItJZP/xMmLDAgjydy7ZqM7BrYnAGDMN+AfgA7woIo+ftP+XwE1ADZAL3CgiXWvtP6WAxG7BdT+/dtNYrv/PWu57dzP/WbmX7YeLEIHo0ACqrcI5f/sfI3tHkhwdQnL3ELqHBvDqmv0UlFVx//TBXDysJwCfbD1MaIAvvzg3hcc+3sHkPy/jyjG9uPu8AU0OcDtwoozeUcGN7lOdl9sSgTHGB5gPnAfkAOuMMYtFZHu9w74DxohImTHmNuDPwJXuikmpjsDPx8K/rhnN7OfXsO1QEYPiw5id3psrxvTiiS92sXDtAfx9LHy54xiF5dUADE+MICzQlzvf+I6ySis/HpXA0m1HOXtQLDc55j568Zu9PPVVJn4+Fn52TkqD59x9tJiQAF+yT5Qxrl93b7xs5UXuLBGkA5kikgVgjFkIzATqEoGILKt3/BpgjhvjUarDiAjy461bz+Tz7UeZlhZf19h777RB/HbaIHwcK4D9YckODhdW8PcrhmO1CTctyGDe4q1sO1TIidIqLhoaD8DZg+KYOjCWuxZu5KmvdnPh0B70jw1lS04hv3prI7uOlhAXHsDRokotEXRB7kwECUB2vcc5wNhmjv8p8Ikb41GqQwkJ8OVHIxMabDt5Ccj/u2hw3c9+PvDXWcOZOX8FC1bvJyEyiLMG/NAOYYzh4UtS+XLHUZ74YhcVVVa+3HmMYH8fpgyMqZsKQxNB19MuGouNMXOAMcBZTeyfC8wF6N27twcjU6pjiY8I5JvfnI1NhABfyyltAd1DA7j2zD4897+sum3zrx7F1EGxfLDxIG+uy+ZMrRrqctyZCA4C9fu/JTq2NWCMORe4HzhLRBodYikizwPPA4wZM0YnPFeqGS0tDHP3uQMor7JSbRX+cGlaXbKYOSKBmSMSmj1XdU7uTATrgBRjTDL2BHAVcHX9A4wxI4HngGkicsyNsSilHAL9fHhUp5hW9bhtigkRqQHuBJYCO4BFIrLNGPOoMWaG47C/AKHAW8aYjcaYxe6KRymlVOPc2kYgIkuAJSdtm1fv53Pd+fxKKaVappPOKaVUF6eJQCmlujhNBEop1cVpIlBKqS5OE4FSSnVxmgiUUqqLMyIda6CuMSYXcNVU1RFAoYfOd+bYlo5pan9j253ZFg3ktRCTq3jyXjt7fHPHdOR73djzu/N8T97rxrbrvXbumD4iEtPoGSLSZb+A5z11vjPHtnRMU/sb2+7MNiCjM95rV9zvjnyvPX2/PXmvm7i3eq9PM56uXjX0oQfPd+bYlo5pan9j253d5imevNfOHt/cMR35Xrvi+T35t92ae93Ydr3Xbb8e0AGrhpTrGGMyRGSMt+PoCvRee47e69br6iWCru55bwfQhei99hy9162kJQKllOritESglFJdnCYCpZTq4jQRKKVUF6eJQDXKGDPFGPONMeZZY8wUb8fT2RljQowxGcaYi70dS2dmjBns+Jt+2xhzZ9ckAwAABORJREFUm7fjaS80EXRCxpiXjDHHjDFbT9o+zRjzvTEm0xhzbwuXEaAECARy3BVrR+eiew3wW2CRe6LsHFxxr0Vkh4jcClwBTHBnvB2J9hrqhIwxk7G/ib8iImmObT7ALuA87G/s64DZgA/wx5MucSOQJyI2Y0wc8HcRucZT8XckLrrXw4Hu2JNunoh85JnoOxZX3GsROeZYKvc24FURecNT8bdnbl2qUnmHiHxtjEk6aXM6kCkiWQDGmIXATBH5I9BcdUQ+EOCOODsDV9xrR9VbCJAKlBtjloiIzZ1xd0Su+rsWkcXAYmPMx4AmAjQRdCUJQHa9xznA2KYONsb8GLgAiAT+6d7QOp1W3WsRuR/AGHM9jpKYW6PrXFr7dz0F+DH2DzdLmjquq9FEoBolIu8C73o7jq5ERF72dgydnYgsB5Z7OYx2RxuLu46DQK96jxMd25Tr6b32HL3XLqCJoOtYB6QYY5KNMf7AVcBiL8fUWem99hy91y6giaATMsb8F1gNDDTG5BhjfioiNcCdwFJgB7BIRLZ5M87OQO+15+i9dh/tPqqUUl2clgiUUqqL00SglFJdnCYCpZTq4jQRKKVUF6eJQCmlujhNBEop1cVpIlCdhjGmxMPPt8rDzxdpjLndk8+pugZNBEo1wRjT7FxcIjLew88ZCWgiUC6niUB1asaYfsaYT40x6x0rrg1ybL/EGPOtMeY7Y8wXjnUXMMY8bIx51RizEnjV8fglY8xyY0yWMeauetcucXyf4tj/tjFmpzHmdWOMcey7yLFtvTHmKWPMKWsNGGOuN8YsNsZ8BXxpjAk1xnxpjNlgjNlijJnpOPRxoJ8xZqMx5i+Oc+8xxqwzxmw2xjziznupOjER0S/96hRfQEkj274EUhw/jwW+cvzcjR9G1t8E/M3x88PAeiCo3uNV2KctjgaOA371nw+YAhRin/DMgn0ahInYF5rJBpIdx/0X+KiRGK/HPn1ylOOxLxDu+DkayAQM8P/t3b1rFEEcxvHvE9BCTlJpEAVfwMJKwc5CsMl/IBYKgqD/gZ2FlXZaiEjsRSVdVFQEC0EEEVSMoAjGwurAF/RCOMQ8Fjsha7zIiTmit88HBm5vZnbmOO5+O7PLzDZgulZvHLhc8kaAm8D+1f4ekv6/lGWoY2hJagH7gMlygQ6Lm+xsAa5L2gSsBWZqVadsz9WOb9nuAl1JbWCMX7fvfGz7fWn3GdWfdgd4a3vh3FeBE8t0957tjwtdB86UHbnmqdbcH+tRZ7ykp+W4BewEHizTRkRPCQQxzEaAz7b39Mi7QLUF51TZrOR0LW92Sdlu7fV3ev9u+inzO/U2DwMbgL22v0l6RzW6WErAWdsTf9hWxE9yjyCGlu0vwIykgwCq7C7ZoyyuW390QF14Deyoba94qM96o0C7BIEDwNby/ldgfa3cXeBYGfkgabOkjX/d62icjAhimKyTVJ+yOUd1dX1J0ilgDXANeE41ApiU9Am4D2xf6c7YniuPe96RNEu1dn4/rgA3JL0AngCvyvk+SHooaRq4bfukpF3AozL11QGOAO2V/iwx3LIMdcQASWrZ7pSniC4Cb2yfX+1+RdRlaihisI6Xm8cvqaZ8Mp8f/5yMCCIiGi4jgoiIhksgiIhouASCiIiGSyCIiGi4BIKIiIZLIIiIaLgfKFx/TS6PY/4AAAAASUVORK5CYII=", 285 | "text/plain": [ 286 | "
" 287 | ] 288 | }, 289 | "metadata": { 290 | "needs_background": "light", 291 | "tags": [] 292 | }, 293 | "output_type": "display_data" 294 | }, 295 | { 296 | "name": "stdout", 297 | "output_type": "stream", 298 | "text": [ 299 | "Suggested LR 1.0148376909312998e-05\n" 300 | ] 301 | } 302 | ], 303 | "source": [ 304 | "lr_finder.plot()\n", 305 | "\n", 306 | "print(\"Suggested LR\", lr_finder.lr_suggestion())" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": { 312 | "id": "NcT19wqkmvd6" 313 | }, 314 | "source": [ 315 | "Now we will apply the suggested learning rate to the optimizer, and train the model again with optimal learning rate." 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 29, 321 | "metadata": { 322 | "colab": { 323 | "base_uri": "https://localhost:8080/" 324 | }, 325 | "id": "GtEYvL92RDXU", 326 | "outputId": "fdc40ae4-8c0e-473f-8ae5-1d44d4cb8f9f" 327 | }, 328 | "outputs": [ 329 | { 330 | "name": "stdout", 331 | "output_type": "stream", 332 | "text": [ 333 | "1.0148376909312998e-05\n" 334 | ] 335 | } 336 | ], 337 | "source": [ 338 | "lr_finder.apply_suggested_lr(optimizer)\n", 339 | "print(optimizer.param_groups[0][\"lr\"])" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 30, 345 | "metadata": { 346 | "colab": { 347 | "base_uri": "https://localhost:8080/" 348 | }, 349 | "id": "DJqgyaFnmvd7", 350 | "outputId": "bb1f62d5-c72e-45a7-b547-5a44b05f9efe" 351 | }, 352 | "outputs": [ 353 | { 354 | "data": { 355 | "text/plain": [ 356 | "State:\n", 357 | "\titeration: 1407\n", 358 | "\tepoch: 3\n", 359 | "\tepoch_length: 469\n", 360 | "\tmax_epochs: 3\n", 361 | "\toutput: 0.09644963592290878\n", 362 | "\tbatch: \n", 363 | "\tmetrics: \n", 364 | "\tdataloader: \n", 365 | "\tseed: \n", 366 | "\ttimes: " 367 | ] 368 | }, 369 | "execution_count": 30, 370 | "metadata": { 371 | "tags": [] 372 | }, 373 | "output_type": "execute_result" 374 | } 375 | ], 376 | "source": [ 377 | "trainer.run(train_loader, max_epochs=3)" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 31, 383 | "metadata": { 384 | "colab": { 385 | "base_uri": "https://localhost:8080/" 386 | }, 387 | "id": "MU8E7PpleaNm", 388 | "outputId": "e308fcaa-92af-462b-9148-e64ec1532a34" 389 | }, 390 | "outputs": [ 391 | { 392 | "name": "stdout", 393 | "output_type": "stream", 394 | "text": [ 395 | "{'Accuracy': 0.9715, 'Loss': 0.0908882568359375}\n" 396 | ] 397 | } 398 | ], 399 | "source": [ 400 | "# Calculate the new metrics after using the optimal lr\n", 401 | "evaluator.run(test_loader)\n", 402 | "print(evaluator.state.metrics)" 403 | ] 404 | }, 405 | { 406 | "cell_type": "markdown", 407 | "metadata": { 408 | "id": "ejVpTNh3MJc2" 409 | }, 410 | "source": [ 411 | "As we saw the accuracy increased and loss decreased on the test dataset when we trained our model for the same number of epochs with an optimal learning rate." 412 | ] 413 | } 414 | ], 415 | "metadata": { 416 | "accelerator": "GPU", 417 | "colab": { 418 | "name": "fastai-lr-finder.ipynb", 419 | "provenance": [] 420 | }, 421 | "kernelspec": { 422 | "display_name": "Python 3 (ipykernel)", 423 | "language": "python", 424 | "name": "python3" 425 | }, 426 | "language_info": { 427 | "codemirror_mode": { 428 | "name": "ipython", 429 | "version": 3 430 | }, 431 | "file_extension": ".py", 432 | "mimetype": "text/x-python", 433 | "name": "python", 434 | "nbconvert_exporter": "python", 435 | "pygments_lexer": "ipython3", 436 | "version": "3.8.10" 437 | }, 438 | "pycharm": { 439 | "stem_cell": { 440 | "cell_type": "raw", 441 | "metadata": { 442 | "collapsed": false 443 | }, 444 | "source": [] 445 | } 446 | } 447 | }, 448 | "nbformat": 4, 449 | "nbformat_minor": 4 450 | } 451 | -------------------------------------------------------------------------------- /how-to-guides/05-gradient-accumulation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8a652d9a", 6 | "metadata": {}, 7 | "source": [ 8 | "\n", 17 | "# How to effectively increase batch size on limited compute\n", 18 | "\n", 19 | "To effectively increase the batch size on limited GPU resources, follow\n", 20 | "this simple best practice.\n", 21 | "\n", 22 | "" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "id": "dd80e226", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "from ignite.engine import Engine\n", 33 | "\n", 34 | "accumulation_steps = 4\n", 35 | "\n", 36 | "def update_fn(engine, batch):\n", 37 | " model.train()\n", 38 | "\n", 39 | " x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)\n", 40 | " y_pred = model(x)\n", 41 | " loss = criterion(y_pred, y) / accumulation_steps\n", 42 | " loss.backward()\n", 43 | "\n", 44 | " if engine.state.iteration % accumulation_steps == 0:\n", 45 | " optimizer.step()\n", 46 | " optimizer.zero_grad()\n", 47 | "\n", 48 | " return loss.item()\n", 49 | "\n", 50 | "trainer = Engine(update_fn)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "id": "c74f90a0", 56 | "metadata": {}, 57 | "source": [ 58 | "If you prefer to use the PyTorch-Ignite helper functions for supervised training mentioned [here](https://pytorch.org/ignite/engine.html#helper-methods-to-define-supervised-trainer-and-evaluator), they also support Gradient Accumulation through the ``gradient_accumulation_steps`` parameter. \n", 59 | "For example \n", 60 | "\n", 61 | "```python\n", 62 | "update_fn = supervised_training_step(model, optimizer, criterion, gradient_accumulation_steps=4)\n", 63 | "trainer = Engine(update_fn)\n", 64 | "```\n", 65 | "would result in the same Engine as above." 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "id": "a7c4ea05", 71 | "metadata": {}, 72 | "source": [ 73 | "## Resources\n", 74 | "\n", 75 | "1. [Training Neural Nets on Larger Batches: Practical Tips for 1-GPU,\n", 76 | " Multi-GPU & Distributed\n", 77 | " setups](https://medium.com/huggingface/training-larger-batches-practical-tips-on-1-gpu-multi-gpu-distributed-setups-ec88c3e51255)\n", 78 | "2. [Code](https://gist.github.com/thomwolf/ac7a7da6b1888c2eeac8ac8b9b05d3d3#file-gradient_accumulation-py)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "id": "4fcc2d56", 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [] 88 | } 89 | ], 90 | "metadata": { 91 | "kernelspec": { 92 | "display_name": "Python 3 (ipykernel)", 93 | "language": "python", 94 | "name": "python3" 95 | }, 96 | "language_info": { 97 | "codemirror_mode": { 98 | "name": "ipython", 99 | "version": 3 100 | }, 101 | "file_extension": ".py", 102 | "mimetype": "text/x-python", 103 | "name": "python", 104 | "nbconvert_exporter": "python", 105 | "pygments_lexer": "ipython3", 106 | "version": "3.10.4" 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 5 111 | } 112 | -------------------------------------------------------------------------------- /how-to-guides/06-data-iterator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "55c7fada", 6 | "metadata": {}, 7 | "source": [ 8 | "\n", 19 | "# How to work with data iterators\n", 20 | "\n", 21 | "When the data provider for training or validation is an iterator\n", 22 | "(infinite or finite with known or unknown size), here are some basic\n", 23 | "examples of how to setup trainer or evaluator.\n", 24 | "\n", 25 | "" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "id": "e97045a5", 31 | "metadata": {}, 32 | "source": [ 33 | "## Infinite iterator for training\n", 34 | "\n", 35 | "Let’s use an infinite data iterator as training dataflow" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 1, 41 | "id": "5b5f175a", 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "1/3 : 1 - 63.862\n", 49 | "1/3 : 2 - 64.042\n", 50 | "1/3 : 3 - 63.936\n", 51 | "1/3 : 4 - 64.141\n", 52 | "1/3 : 5 - 64.767\n", 53 | "2/3 : 6 - 63.791\n", 54 | "2/3 : 7 - 64.565\n", 55 | "2/3 : 8 - 63.602\n", 56 | "2/3 : 9 - 63.995\n", 57 | "2/3 : 10 - 63.943\n", 58 | "3/3 : 11 - 63.831\n", 59 | "3/3 : 12 - 64.276\n", 60 | "3/3 : 13 - 64.148\n", 61 | "3/3 : 14 - 63.920\n", 62 | "3/3 : 15 - 64.226\n" 63 | ] 64 | }, 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "State:\n", 69 | "\titeration: 15\n", 70 | "\tepoch: 3\n", 71 | "\tepoch_length: 5\n", 72 | "\tmax_epochs: 3\n", 73 | "\toutput: \n", 74 | "\tbatch: \n", 75 | "\tmetrics: \n", 76 | "\tdataloader: \n", 77 | "\tseed: \n", 78 | "\ttimes: " 79 | ] 80 | }, 81 | "execution_count": 1, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "import torch\n", 88 | "from ignite.engine import Engine, Events\n", 89 | "\n", 90 | "torch.manual_seed(12)\n", 91 | "\n", 92 | "def infinite_iterator(batch_size):\n", 93 | " while True:\n", 94 | " batch = torch.rand(batch_size, 3, 32, 32)\n", 95 | " yield batch\n", 96 | "\n", 97 | "def train_step(trainer, batch):\n", 98 | " # ...\n", 99 | " s = trainer.state\n", 100 | " print(\n", 101 | " f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch.norm():.3f}\"\n", 102 | " )\n", 103 | "\n", 104 | "trainer = Engine(train_step)\n", 105 | "\n", 106 | "# We need to specify epoch_length to define the epoch\n", 107 | "trainer.run(infinite_iterator(4), epoch_length=5, max_epochs=3)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "id": "a755b048", 113 | "metadata": {}, 114 | "source": [ 115 | "If we do not specify **epoch_length**, we can stop the training explicitly by calling [`terminate()`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine). In this case, there will be only a single epoch defined." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 2, 121 | "id": "d48531dd", 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "name": "stdout", 126 | "output_type": "stream", 127 | "text": [ 128 | "1/1 : 1 - 63.862\n", 129 | "1/1 : 2 - 64.042\n", 130 | "1/1 : 3 - 63.936\n", 131 | "1/1 : 4 - 64.141\n", 132 | "1/1 : 5 - 64.767\n", 133 | "1/1 : 6 - 63.791\n", 134 | "1/1 : 7 - 64.565\n", 135 | "1/1 : 8 - 63.602\n", 136 | "1/1 : 9 - 63.995\n", 137 | "1/1 : 10 - 63.943\n", 138 | "1/1 : 11 - 63.831\n", 139 | "1/1 : 12 - 64.276\n", 140 | "1/1 : 13 - 64.148\n", 141 | "1/1 : 14 - 63.920\n", 142 | "1/1 : 15 - 64.226\n" 143 | ] 144 | }, 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "State:\n", 149 | "\titeration: 15\n", 150 | "\tepoch: 1\n", 151 | "\tepoch_length: \n", 152 | "\tmax_epochs: 1\n", 153 | "\toutput: \n", 154 | "\tbatch: \n", 155 | "\tmetrics: \n", 156 | "\tdataloader: \n", 157 | "\tseed: \n", 158 | "\ttimes: " 159 | ] 160 | }, 161 | "execution_count": 2, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "import torch\n", 168 | "from ignite.engine import Engine, Events\n", 169 | "\n", 170 | "torch.manual_seed(12)\n", 171 | "\n", 172 | "def infinite_iterator(batch_size):\n", 173 | " while True:\n", 174 | " batch = torch.rand(batch_size, 3, 32, 32)\n", 175 | " yield batch\n", 176 | "\n", 177 | "def train_step(trainer, batch):\n", 178 | " # ...\n", 179 | " s = trainer.state\n", 180 | " print(\n", 181 | " f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch.norm():.3f}\"\n", 182 | " )\n", 183 | "\n", 184 | "trainer = Engine(train_step)\n", 185 | "\n", 186 | "@trainer.on(Events.ITERATION_COMPLETED(once=15))\n", 187 | "def stop_training():\n", 188 | " trainer.terminate()\n", 189 | "\n", 190 | "trainer.run(infinite_iterator(4))" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "id": "30d63d14", 196 | "metadata": {}, 197 | "source": [ 198 | "Same code can be used for validating models." 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "id": "37190708", 204 | "metadata": {}, 205 | "source": [ 206 | "## Finite iterator with unknown length\n", 207 | "\n", 208 | "Let's use a finite data iterator but with unknown length (for user). In\n", 209 | "case of training, we would like to perform several passes over the\n", 210 | "dataflow and thus we need to restart the data iterator when it is\n", 211 | "exhausted. In the code, we do not specify `epoch_length` which will be automatically\n", 212 | "determined." 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 3, 218 | "id": "199087b1", 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "name": "stdout", 223 | "output_type": "stream", 224 | "text": [ 225 | "1/5 : 1 - 0.000\n", 226 | "1/5 : 2 - 1.000\n", 227 | "1/5 : 3 - 2.000\n", 228 | "1/5 : 4 - 3.000\n", 229 | "1/5 : 5 - 4.000\n", 230 | "1/5 : 6 - 5.000\n", 231 | "1/5 : 7 - 6.000\n", 232 | "1/5 : 8 - 7.000\n", 233 | "1/5 : 9 - 8.000\n", 234 | "1/5 : 10 - 9.000\n", 235 | "1/5 : 11 - 10.000\n", 236 | "2/5 : 12 - 0.000\n", 237 | "2/5 : 13 - 1.000\n", 238 | "2/5 : 14 - 2.000\n", 239 | "2/5 : 15 - 3.000\n", 240 | "2/5 : 16 - 4.000\n", 241 | "2/5 : 17 - 5.000\n", 242 | "2/5 : 18 - 6.000\n", 243 | "2/5 : 19 - 7.000\n", 244 | "2/5 : 20 - 8.000\n", 245 | "2/5 : 21 - 9.000\n", 246 | "2/5 : 22 - 10.000\n", 247 | "3/5 : 23 - 0.000\n", 248 | "3/5 : 24 - 1.000\n", 249 | "3/5 : 25 - 2.000\n", 250 | "3/5 : 26 - 3.000\n", 251 | "3/5 : 27 - 4.000\n", 252 | "3/5 : 28 - 5.000\n", 253 | "3/5 : 29 - 6.000\n", 254 | "3/5 : 30 - 7.000\n", 255 | "3/5 : 31 - 8.000\n", 256 | "3/5 : 32 - 9.000\n", 257 | "3/5 : 33 - 10.000\n", 258 | "4/5 : 34 - 0.000\n", 259 | "4/5 : 35 - 1.000\n", 260 | "4/5 : 36 - 2.000\n", 261 | "4/5 : 37 - 3.000\n", 262 | "4/5 : 38 - 4.000\n", 263 | "4/5 : 39 - 5.000\n", 264 | "4/5 : 40 - 6.000\n", 265 | "4/5 : 41 - 7.000\n", 266 | "4/5 : 42 - 8.000\n", 267 | "4/5 : 43 - 9.000\n", 268 | "4/5 : 44 - 10.000\n", 269 | "5/5 : 45 - 0.000\n", 270 | "5/5 : 46 - 1.000\n", 271 | "5/5 : 47 - 2.000\n", 272 | "5/5 : 48 - 3.000\n", 273 | "5/5 : 49 - 4.000\n", 274 | "5/5 : 50 - 5.000\n", 275 | "5/5 : 51 - 6.000\n", 276 | "5/5 : 52 - 7.000\n", 277 | "5/5 : 53 - 8.000\n", 278 | "5/5 : 54 - 9.000\n", 279 | "5/5 : 55 - 10.000\n" 280 | ] 281 | }, 282 | { 283 | "data": { 284 | "text/plain": [ 285 | "State:\n", 286 | "\titeration: 55\n", 287 | "\tepoch: 5\n", 288 | "\tepoch_length: 11\n", 289 | "\tmax_epochs: 5\n", 290 | "\toutput: \n", 291 | "\tbatch: 10\n", 292 | "\tmetrics: \n", 293 | "\tdataloader: \n", 294 | "\tseed: \n", 295 | "\ttimes: " 296 | ] 297 | }, 298 | "execution_count": 3, 299 | "metadata": {}, 300 | "output_type": "execute_result" 301 | } 302 | ], 303 | "source": [ 304 | "import torch\n", 305 | "from ignite.engine import Engine, Events\n", 306 | "\n", 307 | "torch.manual_seed(12)\n", 308 | "\n", 309 | "def finite_unk_size_data_iter():\n", 310 | " for i in range(11):\n", 311 | " yield i\n", 312 | "\n", 313 | "def train_step(trainer, batch):\n", 314 | " # ...\n", 315 | " s = trainer.state\n", 316 | " print(\n", 317 | " f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch:.3f}\"\n", 318 | " )\n", 319 | "\n", 320 | "trainer = Engine(train_step)\n", 321 | "\n", 322 | "@trainer.on(Events.DATALOADER_STOP_ITERATION)\n", 323 | "def restart_iter():\n", 324 | " trainer.state.dataloader = finite_unk_size_data_iter()\n", 325 | "\n", 326 | "data_iter = finite_unk_size_data_iter()\n", 327 | "trainer.run(data_iter, max_epochs=5)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "id": "ee068ac8", 333 | "metadata": {}, 334 | "source": [ 335 | "In case of validation, the code is simply" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 4, 341 | "id": "beae6490", 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "name": "stdout", 346 | "output_type": "stream", 347 | "text": [ 348 | "1/1 : 1 - 0.000\n", 349 | "1/1 : 2 - 1.000\n", 350 | "1/1 : 3 - 2.000\n", 351 | "1/1 : 4 - 3.000\n", 352 | "1/1 : 5 - 4.000\n", 353 | "1/1 : 6 - 5.000\n", 354 | "1/1 : 7 - 6.000\n", 355 | "1/1 : 8 - 7.000\n", 356 | "1/1 : 9 - 8.000\n", 357 | "1/1 : 10 - 9.000\n", 358 | "1/1 : 11 - 10.000\n" 359 | ] 360 | }, 361 | { 362 | "data": { 363 | "text/plain": [ 364 | "State:\n", 365 | "\titeration: 11\n", 366 | "\tepoch: 1\n", 367 | "\tepoch_length: 11\n", 368 | "\tmax_epochs: 1\n", 369 | "\toutput: \n", 370 | "\tbatch: \n", 371 | "\tmetrics: \n", 372 | "\tdataloader: \n", 373 | "\tseed: \n", 374 | "\ttimes: " 375 | ] 376 | }, 377 | "execution_count": 4, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "import torch\n", 384 | "from ignite.engine import Engine, Events\n", 385 | "\n", 386 | "torch.manual_seed(12)\n", 387 | "\n", 388 | "def finite_unk_size_data_iter():\n", 389 | " for i in range(11):\n", 390 | " yield i\n", 391 | "\n", 392 | "def val_step(evaluator, batch):\n", 393 | " # ...\n", 394 | " s = evaluator.state\n", 395 | " print(\n", 396 | " f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch:.3f}\"\n", 397 | " )\n", 398 | "\n", 399 | "evaluator = Engine(val_step)\n", 400 | "\n", 401 | "data_iter = finite_unk_size_data_iter()\n", 402 | "evaluator.run(data_iter)" 403 | ] 404 | }, 405 | { 406 | "cell_type": "markdown", 407 | "id": "5d1abaa7", 408 | "metadata": {}, 409 | "source": [ 410 | "## Finite iterator with known length\n", 411 | "\n", 412 | "Let's use a finite data iterator with known size for training or validation. If we need to restart the data iterator, we can do this either as in case of unknown size by attaching the restart handler on `@trainer.on(Events.DATALOADER_STOP_ITERATION)`, but here we will do this explicitly on iteration:" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 5, 418 | "id": "a7f519ac", 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "name": "stdout", 423 | "output_type": "stream", 424 | "text": [ 425 | "1/5 : 1 - 0.000\n", 426 | "1/5 : 2 - 1.000\n", 427 | "1/5 : 3 - 2.000\n", 428 | "1/5 : 4 - 3.000\n", 429 | "1/5 : 5 - 4.000\n", 430 | "1/5 : 6 - 5.000\n", 431 | "1/5 : 7 - 6.000\n", 432 | "1/5 : 8 - 7.000\n", 433 | "1/5 : 9 - 8.000\n", 434 | "1/5 : 10 - 9.000\n", 435 | "1/5 : 11 - 10.000\n", 436 | "2/5 : 12 - 0.000\n", 437 | "2/5 : 13 - 1.000\n", 438 | "2/5 : 14 - 2.000\n", 439 | "2/5 : 15 - 3.000\n", 440 | "2/5 : 16 - 4.000\n", 441 | "2/5 : 17 - 5.000\n", 442 | "2/5 : 18 - 6.000\n", 443 | "2/5 : 19 - 7.000\n", 444 | "2/5 : 20 - 8.000\n", 445 | "2/5 : 21 - 9.000\n", 446 | "2/5 : 22 - 10.000\n", 447 | "3/5 : 23 - 0.000\n", 448 | "3/5 : 24 - 1.000\n", 449 | "3/5 : 25 - 2.000\n", 450 | "3/5 : 26 - 3.000\n", 451 | "3/5 : 27 - 4.000\n", 452 | "3/5 : 28 - 5.000\n", 453 | "3/5 : 29 - 6.000\n", 454 | "3/5 : 30 - 7.000\n", 455 | "3/5 : 31 - 8.000\n", 456 | "3/5 : 32 - 9.000\n", 457 | "3/5 : 33 - 10.000\n", 458 | "4/5 : 34 - 0.000\n", 459 | "4/5 : 35 - 1.000\n", 460 | "4/5 : 36 - 2.000\n", 461 | "4/5 : 37 - 3.000\n", 462 | "4/5 : 38 - 4.000\n", 463 | "4/5 : 39 - 5.000\n", 464 | "4/5 : 40 - 6.000\n", 465 | "4/5 : 41 - 7.000\n", 466 | "4/5 : 42 - 8.000\n", 467 | "4/5 : 43 - 9.000\n", 468 | "4/5 : 44 - 10.000\n", 469 | "5/5 : 45 - 0.000\n", 470 | "5/5 : 46 - 1.000\n", 471 | "5/5 : 47 - 2.000\n", 472 | "5/5 : 48 - 3.000\n", 473 | "5/5 : 49 - 4.000\n", 474 | "5/5 : 50 - 5.000\n", 475 | "5/5 : 51 - 6.000\n", 476 | "5/5 : 52 - 7.000\n", 477 | "5/5 : 53 - 8.000\n", 478 | "5/5 : 54 - 9.000\n", 479 | "5/5 : 55 - 10.000\n" 480 | ] 481 | }, 482 | { 483 | "data": { 484 | "text/plain": [ 485 | "State:\n", 486 | "\titeration: 55\n", 487 | "\tepoch: 5\n", 488 | "\tepoch_length: 11\n", 489 | "\tmax_epochs: 5\n", 490 | "\toutput: \n", 491 | "\tbatch: 10\n", 492 | "\tmetrics: \n", 493 | "\tdataloader: \n", 494 | "\tseed: \n", 495 | "\ttimes: " 496 | ] 497 | }, 498 | "execution_count": 5, 499 | "metadata": {}, 500 | "output_type": "execute_result" 501 | } 502 | ], 503 | "source": [ 504 | "import torch\n", 505 | "from ignite.engine import Engine, Events\n", 506 | "\n", 507 | "torch.manual_seed(12)\n", 508 | "\n", 509 | "size = 11\n", 510 | "\n", 511 | "def finite_size_data_iter(size):\n", 512 | " for i in range(size):\n", 513 | " yield i\n", 514 | "\n", 515 | "def train_step(trainer, batch):\n", 516 | " # ...\n", 517 | " s = trainer.state\n", 518 | " print(\n", 519 | " f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch:.3f}\"\n", 520 | " )\n", 521 | "\n", 522 | "trainer = Engine(train_step)\n", 523 | "\n", 524 | "@trainer.on(Events.ITERATION_COMPLETED(every=size))\n", 525 | "def restart_iter():\n", 526 | " trainer.state.dataloader = finite_size_data_iter(size)\n", 527 | "\n", 528 | "data_iter = finite_size_data_iter(size)\n", 529 | "trainer.run(data_iter, max_epochs=5)" 530 | ] 531 | }, 532 | { 533 | "cell_type": "markdown", 534 | "id": "a518b014", 535 | "metadata": {}, 536 | "source": [ 537 | "In case of validation, the code is simply" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": 6, 543 | "id": "d1402c18", 544 | "metadata": {}, 545 | "outputs": [ 546 | { 547 | "name": "stdout", 548 | "output_type": "stream", 549 | "text": [ 550 | "1/1 : 1 - 0.000\n", 551 | "1/1 : 2 - 1.000\n", 552 | "1/1 : 3 - 2.000\n", 553 | "1/1 : 4 - 3.000\n", 554 | "1/1 : 5 - 4.000\n", 555 | "1/1 : 6 - 5.000\n", 556 | "1/1 : 7 - 6.000\n", 557 | "1/1 : 8 - 7.000\n", 558 | "1/1 : 9 - 8.000\n", 559 | "1/1 : 10 - 9.000\n", 560 | "1/1 : 11 - 10.000\n" 561 | ] 562 | }, 563 | { 564 | "data": { 565 | "text/plain": [ 566 | "State:\n", 567 | "\titeration: 11\n", 568 | "\tepoch: 1\n", 569 | "\tepoch_length: 11\n", 570 | "\tmax_epochs: 1\n", 571 | "\toutput: \n", 572 | "\tbatch: \n", 573 | "\tmetrics: \n", 574 | "\tdataloader: \n", 575 | "\tseed: \n", 576 | "\ttimes: " 577 | ] 578 | }, 579 | "execution_count": 6, 580 | "metadata": {}, 581 | "output_type": "execute_result" 582 | } 583 | ], 584 | "source": [ 585 | "import torch\n", 586 | "from ignite.engine import Engine, Events\n", 587 | "\n", 588 | "torch.manual_seed(12)\n", 589 | "\n", 590 | "size = 11\n", 591 | "\n", 592 | "def finite_size_data_iter(size):\n", 593 | " for i in range(size):\n", 594 | " yield i\n", 595 | "\n", 596 | "def val_step(evaluator, batch):\n", 597 | " # ...\n", 598 | " s = evaluator.state\n", 599 | " print(\n", 600 | " f\"{s.epoch}/{s.max_epochs} : {s.iteration} - {batch:.3f}\"\n", 601 | " )\n", 602 | "\n", 603 | "evaluator = Engine(val_step)\n", 604 | "\n", 605 | "data_iter = finite_size_data_iter(size)\n", 606 | "evaluator.run(data_iter)" 607 | ] 608 | } 609 | ], 610 | "metadata": { 611 | "interpreter": { 612 | "hash": "668c1b3fdfcad7da09e9c177fb24f18a657bbc5f55005750960a78843b3807f7" 613 | }, 614 | "kernelspec": { 615 | "display_name": "Python 3 (ipykernel)", 616 | "language": "python", 617 | "name": "python3" 618 | }, 619 | "language_info": { 620 | "codemirror_mode": { 621 | "name": "ipython", 622 | "version": 3 623 | }, 624 | "file_extension": ".py", 625 | "mimetype": "text/x-python", 626 | "name": "python", 627 | "nbconvert_exporter": "python", 628 | "pygments_lexer": "ipython3", 629 | "version": "3.8.10" 630 | } 631 | }, 632 | "nbformat": 4, 633 | "nbformat_minor": 5 634 | } 635 | -------------------------------------------------------------------------------- /how-to-guides/08-custom-events.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "YQCt0TA0uaNc", 6 | "metadata": { 7 | "id": "YQCt0TA0uaNc" 8 | }, 9 | "source": [ 10 | "\n", 19 | "# How to create Custom Events based on Forward or Backward Pass" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "id": "aMOY2iPOuaNk", 25 | "metadata": { 26 | "id": "aMOY2iPOuaNk" 27 | }, 28 | "source": [ 29 | "This guide demonstrates how you can create [custom events](https://pytorch-ignite.ai/concepts/02-events-and-handlers#custom-events) that depend on the loss calculated and backward pass.\n", 30 | "\n", 31 | "In this example, we will be using a ResNet18 model on the MNIST dataset. The base code is the same as used in the [Getting Started Guide](https://pytorch-ignite.ai/tutorials/getting-started/)." 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "THcUNAgpWMDF", 37 | "metadata": { 38 | "id": "THcUNAgpWMDF" 39 | }, 40 | "source": [ 41 | "## Basic Setup" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 7, 47 | "id": "Y0sJP9iFa1TB", 48 | "metadata": { 49 | "id": "Y0sJP9iFa1TB", 50 | "pycharm": { 51 | "is_executing": false 52 | } 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "import pandas as pd\n", 57 | "\n", 58 | "import torch\n", 59 | "from torch import nn\n", 60 | "from torch.utils.data import DataLoader\n", 61 | "from torchvision.datasets import MNIST\n", 62 | "from torchvision.models import resnet18\n", 63 | "from torchvision.transforms import Compose, Normalize, ToTensor\n", 64 | "\n", 65 | "from ignite.engine import Engine, EventEnum, Events, create_supervised_evaluator\n", 66 | "from ignite.metrics import Accuracy, Loss\n", 67 | "from ignite.handlers import Timer\n", 68 | "from ignite.contrib.handlers import BasicTimeProfiler, HandlersTimeProfiler" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "id": "iK_9cOP6a1TI", 75 | "metadata": { 76 | "id": "iK_9cOP6a1TI" 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 81 | "\n", 82 | "\n", 83 | "class Net(nn.Module):\n", 84 | " def __init__(self):\n", 85 | " super(Net, self).__init__()\n", 86 | "\n", 87 | " self.model = resnet18(num_classes=10)\n", 88 | " self.model.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1, bias=False)\n", 89 | "\n", 90 | " def forward(self, x):\n", 91 | " return self.model(x)\n", 92 | "\n", 93 | "\n", 94 | "model = Net().to(device)\n", 95 | "\n", 96 | "data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])\n", 97 | "\n", 98 | "train_loader = DataLoader(\n", 99 | " MNIST(download=True, root=\".\", transform=data_transform, train=True),\n", 100 | " batch_size=128,\n", 101 | " shuffle=True,\n", 102 | ")\n", 103 | "\n", 104 | "optimizer = torch.optim.RMSprop(model.parameters(), lr=0.005)\n", 105 | "criterion = nn.CrossEntropyLoss()" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "id": "Q_u0IS8q9IY-", 111 | "metadata": { 112 | "id": "Q_u0IS8q9IY-" 113 | }, 114 | "source": [ 115 | "## Create Custom Events\n", 116 | "\n", 117 | "First let's create a few custom events based on backpropogation. All user-defined custom events should inherit from the base class [`EventEnum`](https://pytorch.org/ignite/generated/ignite.engine.events.EventEnum.html#ignite.engine.events.EventEnum)." 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 27, 123 | "id": "TbEoK_H8yIAj", 124 | "metadata": { 125 | "id": "TbEoK_H8yIAj" 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "class BackpropEvents(EventEnum):\n", 130 | " BACKWARD_STARTED = 'backward_started'\n", 131 | " BACKWARD_COMPLETED = 'backward_completed'\n", 132 | " OPTIM_STEP_COMPLETED = 'optim_step_completed'" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "id": "9lwr621Y9Lnx", 138 | "metadata": { 139 | "id": "9lwr621Y9Lnx" 140 | }, 141 | "source": [ 142 | "## Create `trainer`\n", 143 | "\n", 144 | "Then we define the `train_step` function to be applied on all batches. Within this, we use [`fire_event`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine.fire_event) to execute all handlers related to a specific event at that point." 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 28, 150 | "id": "8aqUFTEdxxvz", 151 | "metadata": { 152 | "id": "8aqUFTEdxxvz" 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "def train_step(engine, batch):\n", 157 | " model.train()\n", 158 | " optimizer.zero_grad()\n", 159 | " x, y = batch[0].to(device), batch[1].to(device)\n", 160 | " y_pred = model(x)\n", 161 | " loss = criterion(y_pred, y)\n", 162 | " \n", 163 | " engine.fire_event(BackpropEvents.BACKWARD_STARTED)\n", 164 | " loss.backward()\n", 165 | " engine.fire_event(BackpropEvents.BACKWARD_COMPLETED)\n", 166 | "\n", 167 | " optimizer.step()\n", 168 | " engine.fire_event(BackpropEvents.OPTIM_STEP_COMPLETED)\n", 169 | "\n", 170 | " return loss.item()\n", 171 | "\n", 172 | "\n", 173 | "trainer = Engine(train_step)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "id": "eiLRGHAK9Q12", 179 | "metadata": { 180 | "id": "eiLRGHAK9Q12" 181 | }, 182 | "source": [ 183 | "## Register Custom Events in `trainer`\n", 184 | "\n", 185 | "Finally, to make sure our events can be fired, we register them in `trainer` using [`register_events`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine.register_events)." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "id": "4byi6J6N9d4K", 192 | "metadata": { 193 | "id": "4byi6J6N9d4K" 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "trainer.register_events(*BackpropEvents)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "id": "WZbJwRUD9e-d", 203 | "metadata": { 204 | "id": "WZbJwRUD9e-d" 205 | }, 206 | "source": [ 207 | "## Attach handlers to Custom Events\n", 208 | "\n", 209 | "And now we can easily attach handlers to be executed when a particular event like `BACKWARD_COMPLETED` is fired." 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 29, 215 | "id": "9Dp6QBfQysOq", 216 | "metadata": { 217 | "id": "9Dp6QBfQysOq" 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "@trainer.on(BackpropEvents.BACKWARD_COMPLETED)\n", 222 | "def function_before_backprop(engine):\n", 223 | " print(f\"Iter[{engine.state.iteration}] Function fired after backward pass\")" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "id": "XMKXagQk-VLl", 229 | "metadata": { 230 | "id": "XMKXagQk-VLl" 231 | }, 232 | "source": [ 233 | "And finally you can run the `trainer` for some epochs. " 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "id": "3G9DV6h767fj", 240 | "metadata": { 241 | "id": "3G9DV6h767fj" 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "trainer.run(train_loader, max_epochs=3)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "id": "x031SkP2-Lg9", 251 | "metadata": { 252 | "id": "x031SkP2-Lg9" 253 | }, 254 | "source": [ 255 | "## Additional Links\n", 256 | "\n", 257 | "You can also checkout the source code of [TBPTT Trainer](https://pytorch.org/ignite/_modules/ignite/contrib/engines/tbptt.html#create_supervised_tbptt_trainer) for a detailed explanation." 258 | ] 259 | } 260 | ], 261 | "metadata": { 262 | "accelerator": "GPU", 263 | "colab": { 264 | "name": "08-custom-events.ipynb", 265 | "provenance": [] 266 | }, 267 | "kernelspec": { 268 | "display_name": "Python 3 (ipykernel)", 269 | "language": "python", 270 | "name": "python3" 271 | }, 272 | "language_info": { 273 | "codemirror_mode": { 274 | "name": "ipython", 275 | "version": 3 276 | }, 277 | "file_extension": ".py", 278 | "mimetype": "text/x-python", 279 | "name": "python", 280 | "nbconvert_exporter": "python", 281 | "pygments_lexer": "ipython3", 282 | "version": "3.10.4" 283 | } 284 | }, 285 | "nbformat": 4, 286 | "nbformat_minor": 5 287 | } 288 | -------------------------------------------------------------------------------- /how-to-guides/09-switch-data-training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "OuJ-YDzWCgEI", 6 | "metadata": { 7 | "id": "OuJ-YDzWCgEI" 8 | }, 9 | "source": [ 10 | "\n", 19 | "\n", 20 | "# How to switch data provider during training" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "id": "zi2JOUi1CgEO", 26 | "metadata": { 27 | "id": "zi2JOUi1CgEO" 28 | }, 29 | "source": [ 30 | "In this example, we will see how one can easily switch the data provider during the training using\n", 31 | "[`set_data()`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine.set_data). " 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "wJKPRmQZIV_S", 37 | "metadata": { 38 | "id": "wJKPRmQZIV_S" 39 | }, 40 | "source": [ 41 | "## Basic Setup" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "id": "QwpM9M-XI23h", 47 | "metadata": { 48 | "id": "QwpM9M-XI23h" 49 | }, 50 | "source": [ 51 | "### Required Dependencies" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "id": "H_UgUurNIb53", 58 | "metadata": { 59 | "id": "H_UgUurNIb53" 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "!pip install pytorch-ignite" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "id": "Z2Yo1WSWI6vr", 69 | "metadata": { 70 | "id": "Z2Yo1WSWI6vr" 71 | }, 72 | "source": [ 73 | "### Import" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 2, 79 | "id": "-2Ai1Ht_HWiB", 80 | "metadata": { 81 | "id": "-2Ai1Ht_HWiB" 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "from ignite.engine import Engine, Events" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "id": "Q9tTpXjmI9R_", 91 | "metadata": { 92 | "id": "Q9tTpXjmI9R_" 93 | }, 94 | "source": [ 95 | "### Data Providers" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 3, 101 | "id": "g7ctwsy3Han_", 102 | "metadata": { 103 | "id": "g7ctwsy3Han_" 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "data1 = [1, 2, 3]\n", 108 | "data2 = [11, 12, 13]" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "id": "S-aQnuihJbZz", 114 | "metadata": { 115 | "id": "S-aQnuihJbZz" 116 | }, 117 | "source": [ 118 | "## Create dummy `trainer`\n", 119 | "\n", 120 | "Let's create a dummy `train_step` which will print the current iteration and batch of data. " 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 17, 126 | "id": "2Skq9nmSHnce", 127 | "metadata": { 128 | "id": "2Skq9nmSHnce" 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "def train_step(engine, batch):\n", 133 | " print(f\"Iter[{engine.state.iteration}] Current datapoint = \", batch)\n", 134 | "\n", 135 | "trainer = Engine(train_step)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "id": "YIBlmaO6JW9c", 141 | "metadata": { 142 | "id": "YIBlmaO6JW9c" 143 | }, 144 | "source": [ 145 | "## Attach handler to switch data\n", 146 | "\n", 147 | "Now we have to decide when to switch the data provider. It can be after an epoch, iteration or something custom. Below, we are going to switch data after some specific iteration. And then we attach a handler to `trainer` that will be executed once after `switch_iteration` and use `set_data()` so that when:\n", 148 | "\n", 149 | "* iteration <= `switch_iteration`, batch is from `data1`\n", 150 | "* iteration > `switch_iteration`, batch is from `data2`" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 18, 156 | "id": "RaMkWUwnCgEQ", 157 | "metadata": { 158 | "id": "RaMkWUwnCgEQ" 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "switch_iteration = 5\n", 163 | "\n", 164 | "\n", 165 | "@trainer.on(Events.ITERATION_COMPLETED(once=switch_iteration))\n", 166 | "def switch_dataloader():\n", 167 | " print(\"<------- Switch Data ------->\")\n", 168 | " trainer.set_data(data2)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "id": "BvJ2qms6M44n", 174 | "metadata": { 175 | "id": "BvJ2qms6M44n" 176 | }, 177 | "source": [ 178 | "And finally we run the `trainer` for some epochs." 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 19, 184 | "id": "8W-WFdZ8HzJU", 185 | "metadata": { 186 | "colab": { 187 | "base_uri": "https://localhost:8080/" 188 | }, 189 | "id": "8W-WFdZ8HzJU", 190 | "outputId": "7c2c5a36-f657-4d75-8086-ec3fd1fdf10e" 191 | }, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "Iter[1] Current datapoint = 1\n", 198 | "Iter[2] Current datapoint = 2\n", 199 | "Iter[3] Current datapoint = 3\n", 200 | "Iter[4] Current datapoint = 1\n", 201 | "Iter[5] Current datapoint = 2\n", 202 | "<------- Switch Data ------->\n", 203 | "Iter[6] Current datapoint = 11\n", 204 | "Iter[7] Current datapoint = 12\n", 205 | "Iter[8] Current datapoint = 13\n", 206 | "Iter[9] Current datapoint = 11\n", 207 | "Iter[10] Current datapoint = 12\n", 208 | "Iter[11] Current datapoint = 13\n", 209 | "Iter[12] Current datapoint = 11\n", 210 | "Iter[13] Current datapoint = 12\n", 211 | "Iter[14] Current datapoint = 13\n", 212 | "Iter[15] Current datapoint = 11\n" 213 | ] 214 | }, 215 | { 216 | "data": { 217 | "text/plain": [ 218 | "State:\n", 219 | "\titeration: 15\n", 220 | "\tepoch: 5\n", 221 | "\tepoch_length: 3\n", 222 | "\tmax_epochs: 5\n", 223 | "\toutput: \n", 224 | "\tbatch: 11\n", 225 | "\tmetrics: \n", 226 | "\tdataloader: \n", 227 | "\tseed: \n", 228 | "\ttimes: " 229 | ] 230 | }, 231 | "execution_count": 19, 232 | "metadata": {}, 233 | "output_type": "execute_result" 234 | } 235 | ], 236 | "source": [ 237 | "trainer.run(data1, max_epochs=5)" 238 | ] 239 | } 240 | ], 241 | "metadata": { 242 | "colab": { 243 | "name": "switch-data-training.ipynb", 244 | "provenance": [] 245 | }, 246 | "kernelspec": { 247 | "display_name": "Python 3 (ipykernel)", 248 | "language": "python", 249 | "name": "python3" 250 | }, 251 | "language_info": { 252 | "codemirror_mode": { 253 | "name": "ipython", 254 | "version": 3 255 | }, 256 | "file_extension": ".py", 257 | "mimetype": "text/x-python", 258 | "name": "python", 259 | "nbconvert_exporter": "python", 260 | "pygments_lexer": "ipython3", 261 | "version": "3.8.8" 262 | } 263 | }, 264 | "nbformat": 4, 265 | "nbformat_minor": 5 266 | } 267 | -------------------------------------------------------------------------------- /how-to-guides/10-loggers.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 17 | "\n", 18 | "# How to use Loggers" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "This how-to guide demonstrates the usage of loggers with Ignite. As part of this guide, we will be using the [ClearML](https://clear.ml/docs/latest/docs/fundamentals/logger/) logger and also highlight how this code can be easily modified to make use of other loggers. You can see all the other loggers supported [here](https://pytorch.org/ignite/contrib/handlers.html#loggers).\n", 26 | "\n", 27 | "\n", 28 | "\n", 29 | "In this example, we will be using a simple convolutional network on the [MNIST](https://pytorch.org/vision/stable/datasets.html#torchvision.datasets.MNIST) dataset to show how logging works in Ignite.\n", 30 | "\n", 31 | "## Prerequisities\n", 32 | "- Refer to the [installation-guide](https://pytorch-ignite.ai/how-to-guides/01-installation/) to install Ignite (and Pytorch).\n", 33 | "- To get started with ClearML create your account [here](https://app.community.clear.ml/profile). Then create a credential: Profile > Create new credentials > Copy to clipboard." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Install dependencies" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 6, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "%%capture\n", 50 | "! pip install torchvision" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 7, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "%%capture\n", 60 | "! pip install clearml" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 1, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "%%capture\n", 70 | "! clearml-init # You may want to run this command on your terminal separately and paste what you copied in the step above." 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "## Imports " 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 1, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "\n", 87 | "import torch\n", 88 | "import torch.nn.functional as F\n", 89 | "from torch import nn\n", 90 | "from torch.optim import SGD\n", 91 | "from torch.utils.data import DataLoader\n", 92 | "from torchvision.datasets import MNIST\n", 93 | "from torchvision.transforms import Compose, Normalize, ToTensor\n", 94 | "\n", 95 | "from ignite.contrib.handlers.clearml_logger import (\n", 96 | " ClearMLLogger,\n", 97 | " ClearMLSaver,\n", 98 | " GradsHistHandler,\n", 99 | " GradsScalarHandler,\n", 100 | " WeightsHistHandler,\n", 101 | " WeightsScalarHandler,\n", 102 | " global_step_from_engine,\n", 103 | ")\n", 104 | "\n", 105 | "from ignite.engine import Events, create_supervised_evaluator, create_supervised_trainer\n", 106 | "from ignite.handlers import Checkpoint\n", 107 | "from ignite.metrics import Accuracy, Loss\n", 108 | "from ignite.utils import setup_logger" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "## Model" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 2, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "class Net(nn.Module):\n", 125 | " def __init__(self):\n", 126 | " super(Net, self).__init__()\n", 127 | " self.conv1 = nn.Conv2d(1, 10, kernel_size=5)\n", 128 | " self.conv2 = nn.Conv2d(10, 20, kernel_size=5)\n", 129 | " self.conv2_drop = nn.Dropout2d()\n", 130 | " self.fc1 = nn.Linear(320, 50)\n", 131 | " self.fc2 = nn.Linear(50, 10)\n", 132 | "\n", 133 | " def forward(self, x):\n", 134 | " x = F.relu(F.max_pool2d(self.conv1(x), 2))\n", 135 | " x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))\n", 136 | " x = x.view(-1, 320)\n", 137 | " x = F.relu(self.fc1(x))\n", 138 | " x = F.dropout(x, training=self.training)\n", 139 | " x = self.fc2(x)\n", 140 | " return F.log_softmax(x, dim=-1)\n" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "## Dataloader" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 8, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "def get_data_loaders(train_batch_size, val_batch_size):\n", 157 | " data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])\n", 158 | "\n", 159 | " train_loader = DataLoader(\n", 160 | " MNIST(download=True, root=\".\", transform=data_transform, train=True), batch_size=train_batch_size, shuffle=True\n", 161 | " )\n", 162 | "\n", 163 | " val_loader = DataLoader(\n", 164 | " MNIST(download=False, root=\".\", transform=data_transform, train=False), batch_size=val_batch_size, shuffle=False\n", 165 | " )\n", 166 | " return train_loader, val_loader" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "## Training\n", 174 | "\n", 175 | "Ignite makes use of handlers to configure what we want to log. Each handler takes takes in some common attributes like:\n", 176 | "\n", 177 | "- Engine Object, which could for example be the trainer if we are interested in training logs\n", 178 | "- Event Name, through which we tell when do we want the information to be logged, for example `event_name=Event.ITERATION_COMPLETED(every=100)` would mean that we want the information to be logged every 100 iterations.\n", 179 | "- args (or kwargs), using which you pass some metadata and provide information of what is to be logged, for example to log the 'loss' we could pass `output_transform=lambda loss: {\"batchloss\": loss}`\n", 180 | "- Ignite also provides the flexibility to execute custom event handlers, these can be set with `log_handler` attribute of the `attach_output_handler`. For example, `log_handler=WeightsScalarHandler(model)` would log the norm of model's weights." 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 4, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "def run(train_batch_size, val_batch_size, epochs, lr, momentum):\n", 190 | " train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size)\n", 191 | " model = Net()\n", 192 | " device = \"cpu\"\n", 193 | "\n", 194 | " if torch.cuda.is_available():\n", 195 | " device = \"cuda\"\n", 196 | "\n", 197 | " model.to(device) \n", 198 | " optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)\n", 199 | " criterion = nn.CrossEntropyLoss()\n", 200 | " trainer = create_supervised_trainer(model, optimizer, criterion, device=device)\n", 201 | " trainer.logger = setup_logger(\"Trainer\")\n", 202 | "\n", 203 | " metrics = {\"accuracy\": Accuracy(), \"loss\": Loss(criterion)}\n", 204 | "\n", 205 | " train_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)\n", 206 | " train_evaluator.logger = setup_logger(\"Train Evaluator\")\n", 207 | " validation_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)\n", 208 | " validation_evaluator.logger = setup_logger(\"Val Evaluator\")\n", 209 | "\n", 210 | " @trainer.on(Events.EPOCH_COMPLETED)\n", 211 | " def compute_metrics(engine):\n", 212 | " train_evaluator.run(train_loader)\n", 213 | " validation_evaluator.run(val_loader)\n", 214 | " \n", 215 | " # To utilize other loggers we need to change the object here\n", 216 | " clearml_logger = ClearMLLogger(project_name=\"examples\", task_name=\"ignite\") \n", 217 | "\n", 218 | " # Attach the logger to the trainer to log training loss \n", 219 | " clearml_logger.attach_output_handler(\n", 220 | " trainer,\n", 221 | " event_name=Events.ITERATION_COMPLETED(every=100),\n", 222 | " tag=\"training\",\n", 223 | " output_transform=lambda loss: {\"batchloss\": loss},\n", 224 | " )\n", 225 | " \n", 226 | " # Attach the logger to log loss and accuracy for both training and validation\n", 227 | " for tag, evaluator in [(\"training metrics\", train_evaluator), (\"validation metrics\", validation_evaluator)]:\n", 228 | " clearml_logger.attach_output_handler(\n", 229 | " evaluator,\n", 230 | " event_name=Events.EPOCH_COMPLETED,\n", 231 | " tag=tag,\n", 232 | " metric_names=[\"loss\", \"accuracy\"],\n", 233 | " global_step_transform=global_step_from_engine(trainer),\n", 234 | " )\n", 235 | "\n", 236 | " # Attach the logger to the trainer to log optimizer's parameters, e.g. learning rate \n", 237 | " clearml_logger.attach_opt_params_handler(\n", 238 | " trainer, event_name=Events.ITERATION_COMPLETED(every=100), optimizer=optimizer\n", 239 | " )\n", 240 | "\n", 241 | " # Attach the logger to the trainer to log model's weights norm\n", 242 | " clearml_logger.attach(\n", 243 | " trainer, log_handler=WeightsScalarHandler(model), event_name=Events.ITERATION_COMPLETED(every=100)\n", 244 | " )\n", 245 | "\n", 246 | " # Attach the logger to the trainer to log model's weights as a histogram \n", 247 | " clearml_logger.attach(trainer, log_handler=WeightsHistHandler(model), event_name=Events.EPOCH_COMPLETED(every=100))\n", 248 | "\n", 249 | " # Attach the logger to the trainer to log model’s gradients as scalars\n", 250 | " clearml_logger.attach(\n", 251 | " trainer, log_handler=GradsScalarHandler(model), event_name=Events.ITERATION_COMPLETED(every=100)\n", 252 | " )\n", 253 | "\n", 254 | " #Attach the logger to the trainer to log model's gradients as a histogram \n", 255 | " clearml_logger.attach(trainer, log_handler=GradsHistHandler(model), event_name=Events.EPOCH_COMPLETED(every=100))\n", 256 | "\n", 257 | " handler = Checkpoint(\n", 258 | " {\"model\": model},\n", 259 | " ClearMLSaver(),\n", 260 | " n_saved=1,\n", 261 | " score_function=lambda e: e.state.metrics[\"accuracy\"],\n", 262 | " score_name=\"val_acc\",\n", 263 | " filename_prefix=\"best\",\n", 264 | " global_step_transform=global_step_from_engine(trainer),\n", 265 | " )\n", 266 | " validation_evaluator.add_event_handler(Events.EPOCH_COMPLETED, handler)\n", 267 | "\n", 268 | " trainer.run(train_loader, max_epochs=epochs)\n", 269 | "\n", 270 | " clearml_logger.close()\n" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 5, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "batch_size=64\n", 280 | "val_batch_size=1000\n", 281 | "epochs=5 \n", 282 | "lr=0.01\n", 283 | "momentum=0.5" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 6, 289 | "metadata": {}, 290 | "outputs": [ 291 | { 292 | "name": "stdout", 293 | "output_type": "stream", 294 | "text": [ 295 | "ClearML Task: created new task id=575b4d9b5c8a47589ac7edb7e5e0bb59\n", 296 | "ClearML results page: https://app.community.clear.ml/projects/4d6b8ac509bc46da91607e83011248fb/experiments/575b4d9b5c8a47589ac7edb7e5e0bb59/output/log\n" 297 | ] 298 | }, 299 | { 300 | "name": "stderr", 301 | "output_type": "stream", 302 | "text": [ 303 | "/home/anirudh/miniconda3/envs/ignite/lib/python3.9/site-packages/ignite/contrib/handlers/clearml_logger.py:659: UserWarning: ClearMLSaver created a temporary checkpoints directory: /tmp/ignite_checkpoints_2021_10_25_20_21_50_gkx2f03c\n", 304 | " warnings.warn(f\"ClearMLSaver created a temporary checkpoints directory: {dirname}\")\n", 305 | "2021-10-25 20:21:50,778 Trainer INFO: Engine run starting with max_epochs=5.\n", 306 | "2021-10-25 20:22:08,993 Train Evaluator INFO: Engine run starting with max_epochs=1.\n", 307 | "2021-10-25 20:22:18,656 Train Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:10\n", 308 | "2021-10-25 20:22:18,657 Train Evaluator INFO: Engine run complete. Time taken: 00:00:10\n", 309 | "2021-10-25 20:22:18,658 Val Evaluator INFO: Engine run starting with max_epochs=1.\n", 310 | "2021-10-25 20:22:29,442 Val Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:11\n", 311 | "2021-10-25 20:22:29,443 Val Evaluator INFO: Engine run complete. Time taken: 00:00:11\n", 312 | "2021-10-25 20:22:29,444 Trainer INFO: Epoch[1] Complete. Time taken: 00:00:39\n", 313 | "2021-10-25 20:22:46,879 Train Evaluator INFO: Engine run starting with max_epochs=1.\n", 314 | "2021-10-25 20:22:57,516 Train Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:11\n", 315 | "2021-10-25 20:22:57,518 Train Evaluator INFO: Engine run complete. Time taken: 00:00:11\n", 316 | "2021-10-25 20:22:57,519 Val Evaluator INFO: Engine run starting with max_epochs=1.\n", 317 | "2021-10-25 20:23:12,853 Val Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:15\n", 318 | "2021-10-25 20:23:12,854 Val Evaluator INFO: Engine run complete. Time taken: 00:00:15\n", 319 | "2021-10-25 20:23:12,855 Trainer INFO: Epoch[2] Complete. Time taken: 00:00:43\n", 320 | "2021-10-25 20:23:29,609 Train Evaluator INFO: Engine run starting with max_epochs=1.\n", 321 | "2021-10-25 20:23:40,388 Train Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:11\n", 322 | "2021-10-25 20:23:40,390 Train Evaluator INFO: Engine run complete. Time taken: 00:00:11\n", 323 | "2021-10-25 20:23:40,390 Val Evaluator INFO: Engine run starting with max_epochs=1.\n", 324 | "2021-10-25 20:23:55,842 Val Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:15\n", 325 | "2021-10-25 20:23:55,845 Val Evaluator INFO: Engine run complete. Time taken: 00:00:15\n", 326 | "2021-10-25 20:23:55,845 Trainer INFO: Epoch[3] Complete. Time taken: 00:00:43\n", 327 | "2021-10-25 20:24:13,223 Train Evaluator INFO: Engine run starting with max_epochs=1.\n", 328 | "2021-10-25 20:24:23,924 Train Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:11\n", 329 | "2021-10-25 20:24:23,925 Train Evaluator INFO: Engine run complete. Time taken: 00:00:11\n", 330 | "2021-10-25 20:24:23,925 Val Evaluator INFO: Engine run starting with max_epochs=1.\n", 331 | "2021-10-25 20:24:39,658 Val Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:16\n", 332 | "2021-10-25 20:24:39,661 Val Evaluator INFO: Engine run complete. Time taken: 00:00:16\n", 333 | "2021-10-25 20:24:39,662 Trainer INFO: Epoch[4] Complete. Time taken: 00:00:44\n", 334 | "2021-10-25 20:24:57,385 Train Evaluator INFO: Engine run starting with max_epochs=1.\n", 335 | "2021-10-25 20:25:07,264 Train Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:10\n", 336 | "2021-10-25 20:25:07,265 Train Evaluator INFO: Engine run complete. Time taken: 00:00:10\n", 337 | "2021-10-25 20:25:07,267 Val Evaluator INFO: Engine run starting with max_epochs=1.\n", 338 | "2021-10-25 20:25:22,536 Val Evaluator INFO: Epoch[1] Complete. Time taken: 00:00:15\n", 339 | "2021-10-25 20:25:22,537 Val Evaluator INFO: Engine run complete. Time taken: 00:00:15\n", 340 | "2021-10-25 20:25:22,538 Trainer INFO: Epoch[5] Complete. Time taken: 00:00:43\n", 341 | "2021-10-25 20:25:22,539 Trainer INFO: Engine run complete. Time taken: 00:03:32\n" 342 | ] 343 | } 344 | ], 345 | "source": [ 346 | "run(batch_size, val_batch_size, epochs, lr, momentum)" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "If you followed along, Congratulations! You can take a look at some of the visualisations from the results page mentioned in you logs above (`ClearML results page`). Here's an example!" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "![Clear ML Dashboard](assets/clearml-dashboard.png)" 361 | ] 362 | } 363 | ], 364 | "metadata": { 365 | "accelerator": "GPU", 366 | "kernelspec": { 367 | "display_name": "Python 3 (ipykernel)", 368 | "language": "python", 369 | "name": "python3" 370 | }, 371 | "language_info": { 372 | "codemirror_mode": { 373 | "name": "ipython", 374 | "version": 3 375 | }, 376 | "file_extension": ".py", 377 | "mimetype": "text/x-python", 378 | "name": "python", 379 | "nbconvert_exporter": "python", 380 | "pygments_lexer": "ipython3", 381 | "version": "3.10.4" 382 | } 383 | }, 384 | "nbformat": 4, 385 | "nbformat_minor": 4 386 | } 387 | -------------------------------------------------------------------------------- /how-to-guides/assets/clearml-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch-ignite/examples/3cde534fdff10d9ae5dd90799301d9d0a6c07868/how-to-guides/assets/clearml-dashboard.png -------------------------------------------------------------------------------- /how-to-guides/assets/convert-pytorch2ignite.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch-ignite/examples/3cde534fdff10d9ae5dd90799301d9d0a6c07868/how-to-guides/assets/convert-pytorch2ignite.gif -------------------------------------------------------------------------------- /tests/test_generate.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | import subprocess 5 | from datetime import datetime 6 | 7 | import pytest 8 | 9 | today = datetime.now().strftime("%Y-%m-%d") 10 | 11 | new_notebook_empty = {"cells": [], "metadata": {}, "nbformat": 4, "nbformat_minor": 4} 12 | new_notebook_empty = json.dumps(new_notebook_empty, indent=4) 13 | 14 | cell_front_matter = { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "` tag below to provide summary for this notebook, " 24 | "and delete the other>\n" 25 | "tags:\n", 26 | " - \n", 27 | "--- -->\n", 28 | "\n", 29 | "# title-placeholder\n", 30 | "\n", 31 | "\n", 33 | "` below.>", 34 | "\n", 35 | "", 36 | ], 37 | } 38 | 39 | 40 | @pytest.mark.parametrize("name", ["dummy_notebook", "dummy_notebook.ipynb"]) 41 | def test_new_notebook_creation(name, tmp_path): 42 | notebook_path = os.path.join(tmp_path, name) 43 | 44 | output = subprocess.check_output(["python", "generate.py", notebook_path]).decode("utf-8") 45 | 46 | if not notebook_path.endswith(".ipynb"): 47 | notebook_path = notebook_path + ".ipynb" 48 | 49 | assert output == f"Generated {notebook_path}\n" 50 | 51 | 52 | def test_existing_blank_notebook(tmp_path): 53 | notebook_path = os.path.join(tmp_path, "dummy_notebook_empty.ipynb") 54 | with open(notebook_path, "w") as f: 55 | f.write(new_notebook_empty) 56 | 57 | output = subprocess.check_output(["python", "generate.py", notebook_path]).decode("utf-8") 58 | 59 | assert output == f"Added frontmatter to {notebook_path}\n" 60 | 61 | 62 | def test_existing_non_empty_notebook(tmp_path): 63 | notebook_name = "01-getting-started.ipynb" 64 | notebook_path = os.path.join(tmp_path, notebook_name) 65 | shutil.copyfile(os.path.join("./tutorials/beginner", notebook_name), notebook_path) 66 | 67 | output = subprocess.check_output(["python", "generate.py", notebook_path]).decode("utf-8") 68 | 69 | assert output == f"Added frontmatter to {notebook_path}\n" 70 | 71 | # Check to make sure its added as the first cell 72 | with open(notebook_path) as fp: 73 | content = json.load(fp) 74 | assert content["cells"][0] == cell_front_matter 75 | 76 | 77 | @pytest.mark.parametrize("name", ["dummy_notebook_empty", "dummy_notebook_empty.ipynb"]) 78 | def test_front_matter_multiple_times(name, tmp_path): 79 | notebook_path = os.path.join(tmp_path, name) 80 | 81 | # This will create a notebook with frontmatter 82 | _ = subprocess.check_output(["python", "generate.py", notebook_path]) 83 | 84 | # Second call should not add frontmatter again 85 | output = subprocess.check_output(["python", "generate.py", notebook_path]).decode("utf-8") 86 | 87 | if not notebook_path.endswith(".ipynb"): 88 | notebook_path = notebook_path + ".ipynb" 89 | 90 | assert output == f"Frontmatter cell already exists in {notebook_path}. Exiting\n" 91 | # Check to make sure only added once. 92 | with open(notebook_path) as fp: 93 | content = json.load(fp) 94 | 95 | if len(content["cells"]) > 1: 96 | assert content["cells"][0] != content["cells"][1] 97 | -------------------------------------------------------------------------------- /tutorials/advanced/01-collective-communication.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "5w-QlZE9mvdY" 7 | }, 8 | "source": [ 9 | "\n", 22 | "\n", 23 | "# Collective Communication with Ignite" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "id": "vJgTaKWU8Doq" 30 | }, 31 | "source": [ 32 | "In this tutorial, we will see how to use advanced distributed functions like `all_reduce()`, `all_gather()`, `broadcast()` and `barrier()`. We will discuss unique use cases for all of them and represent them visually.\n", 33 | "\n", 34 | "" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "id": "Qhiy_ylcn2GD" 41 | }, 42 | "source": [ 43 | "## Required Dependencies" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "id": "1zevsoVQ4nx7" 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "!pip install pytorch-ignite" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": { 60 | "id": "TrvIsRKQn42e" 61 | }, 62 | "source": [ 63 | "## Imports" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 2, 69 | "metadata": { 70 | "id": "lMphyBmmmvdw", 71 | "pycharm": { 72 | "is_executing": false 73 | } 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "import torch\n", 78 | "\n", 79 | "import ignite.distributed as idist" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": { 85 | "id": "Y2KPTliNC2r9" 86 | }, 87 | "source": [ 88 | "## All Reduce\n", 89 | "\n", 90 | "![All Reduce Diagram](https://github.com/pytorch-ignite/examples/blob/main/tutorials/assets/all-reduce.png?raw=1)\n", 91 | "\n", 92 | "The [`all_reduce()`](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.all_reduce) method is used to collect specified tensors from each process and make them available on every node then perform a specified operation (sum, product, min, max, etc) on them. Let's spawn 3 processes with ranks 0, 1 and 2 and define a `tensor` on all of them. If we performed `all_reduce` with the operation SUM on `tensor` then `tensor` on all ranks will be gathered, added and stored in `tensor` as shown below:" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 3, 98 | "metadata": { 99 | "id": "GHT6EftPOwUq" 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "def all_reduce_example(local_rank):\n", 104 | " tensor = torch.arange(2, dtype=torch.int64) + 1 + 2 * local_rank\n", 105 | " print(f\"Rank {local_rank}, Initial value: {tensor}\")\n", 106 | "\n", 107 | " idist.all_reduce(tensor, op=\"SUM\")\n", 108 | " print(f\"Rank {local_rank}, After performing all_reduce: {tensor}\")" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": { 114 | "id": "uz5mYRS76gjm" 115 | }, 116 | "source": [ 117 | "We can use [idist.spawn](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.spawn) to spawn 3 processes (`nproc_per_node`) and execute the above function." 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 12, 123 | "metadata": { 124 | "colab": { 125 | "base_uri": "https://localhost:8080/" 126 | }, 127 | "id": "yCc1OxDg3X38", 128 | "outputId": "97640f5c-aae7-45d8-b4de-21b5ff49ce3d" 129 | }, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "Rank 0, Initial value: tensor([1, 2])\n", 136 | "Rank 2, Initial value: tensor([5, 6])\n", 137 | "Rank 1, Initial value: tensor([3, 4])\n", 138 | "Rank 0, After performing all_reduce: tensor([ 9, 12])\n", 139 | "Rank 1, After performing all_reduce: tensor([ 9, 12])\n", 140 | "Rank 2, After performing all_reduce: tensor([ 9, 12])\n" 141 | ] 142 | } 143 | ], 144 | "source": [ 145 | "idist.spawn(backend=\"gloo\", fn=all_reduce_example, args=(), nproc_per_node=3)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": { 151 | "id": "FLntezo0NhQg" 152 | }, 153 | "source": [ 154 | "Now let's assume a more real world scenario - You need to find the average of all the gradients available on different processes. \n", 155 | "\n", 156 | "> First, we get the number of GPUs available, with the get_world_size method. Then, for every model parameter, we do the following:\n", 157 | ">\n", 158 | "> 1. Gather the gradients on each process\n", 159 | "> 2. Apply the sum operation on the gradients\n", 160 | "> 3. Divide by the world size to average them\n", 161 | ">\n", 162 | "> Finally, we can go on to update the model parameters using the averaged gradients!\n", 163 | ">\n", 164 | "> -- [Distributed Deep Learning 101: Introduction](https://towardsdatascience.com/distributed-deep-learning-101-introduction-ebfc1bcd59d9)\n", 165 | "\n", 166 | "You can get the number of GPUs (processes) available using another helper method [`idist.get_world_size()`](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.get_world_size) and then use `all_reduce()` to collect the gradients and apply the SUM operation." 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 7, 172 | "metadata": { 173 | "id": "0j_ErUWhHpTl" 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "def average_gradients(model):\n", 178 | " num_processes = idist.get_world_size()\n", 179 | " for param in model.parameters():\n", 180 | " idist.all_reduce(param.grad.data, op=\"SUM\")\n", 181 | " param.grad.data = param.grad.data / num_processes" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": { 187 | "id": "7w9oIcIiC6_4" 188 | }, 189 | "source": [ 190 | "## All Gather\n", 191 | "\n", 192 | "![All Gather Diagram](https://github.com/pytorch-ignite/examples/blob/main/tutorials/assets/all-gather.png?raw=1)\n", 193 | "\n", 194 | "The [`all_gather()`](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.all_gather) method is used when you just want to collect a tensor, number or string across all participating processes. As a basic example, suppose you have to collect all the different values stored in `num` on all ranks. You can achieve this by using `all_gather` as below:" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 8, 200 | "metadata": { 201 | "id": "Q1ZD4jPK5CVm" 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "def all_gather_example(local_rank):\n", 206 | " num = 2.0 * idist.get_rank()\n", 207 | " print(f\"Rank {local_rank}, Initial value: {num}\")\n", 208 | "\n", 209 | " all_nums = idist.all_gather(num)\n", 210 | " print(f\"Rank {local_rank}, After performing all_gather: {all_nums}\")" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 11, 216 | "metadata": { 217 | "colab": { 218 | "base_uri": "https://localhost:8080/" 219 | }, 220 | "id": "DyGu-S7I5Uzw", 221 | "outputId": "50c9e712-819e-4e6e-ef53-305e27787804" 222 | }, 223 | "outputs": [ 224 | { 225 | "name": "stdout", 226 | "output_type": "stream", 227 | "text": [ 228 | "Rank 0, Initial value: 0.0\n", 229 | "Rank 2, Initial value: 4.0\n", 230 | "Rank 1, Initial value: 2.0\n", 231 | "Rank 2, After performing all_gather: [0.0, 2.0, 4.0]\n", 232 | "Rank 0, After performing all_gather: [0.0, 2.0, 4.0]\n", 233 | "Rank 1, After performing all_gather: [0.0, 2.0, 4.0]\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "idist.spawn(backend=\"gloo\", fn=all_gather_example, args=(), nproc_per_node=3)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": { 244 | "id": "FdTcvz4pStfD" 245 | }, 246 | "source": [ 247 | "Now let's assume you need to gather the predicted values which are distributed across all the processes on the main process so you could store them to a file. Here is how you can do it: " 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 13, 253 | "metadata": { 254 | "id": "NRvgasbLC8Ne" 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "def write_preds_to_file(predictions, filename):\n", 259 | " prediction_tensor = torch.tensor(predictions)\n", 260 | " prediction_tensor = idist.all_gather(prediction_tensor)\n", 261 | "\n", 262 | " if idist.get_rank() == 0:\n", 263 | " torch.save(prediction_tensor, filename)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": { 269 | "id": "Ib9PfbieNMw_" 270 | }, 271 | "source": [ 272 | "**Note:** In the above example, only the main process required the gathered values and not all the processes. This can also be done via the `gather()` method." 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": { 278 | "id": "0fsu-NybC8t1" 279 | }, 280 | "source": [ 281 | "## Broadcast\n", 282 | "\n", 283 | "![Broadcast Diagram](https://github.com/pytorch-ignite/examples/blob/main/tutorials/assets/broadcast.png?raw=1)\n", 284 | "\n", 285 | "The [`broadcast()`](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.broadcast) method copies a tensor, float or string from a source process to all the other processes. For example, you need to send a message from rank 0 to all other ranks. You can do this by creating the actual message on rank 0 and a placeholder on all other ranks, then broadcast the message mentioning a source rank. You can also use `safe_mode=True` in case the placeholder is not defined on all ranks. " 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 14, 291 | "metadata": { 292 | "id": "TWowyRRw55XM" 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "def broadcast_example(local_rank):\n", 297 | " message = f\"hello from rank {idist.get_rank()}\"\n", 298 | " print(f\"Rank {local_rank}, Initial value: {message}\")\n", 299 | "\n", 300 | " message = idist.broadcast(message, src=0)\n", 301 | " print(f\"Rank {local_rank}, After performing broadcast: {message}\")" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 15, 307 | "metadata": { 308 | "colab": { 309 | "base_uri": "https://localhost:8080/" 310 | }, 311 | "id": "SYXfJFBfUYiV", 312 | "outputId": "3d81fbed-4531-4fb3-c671-775d4649653d" 313 | }, 314 | "outputs": [ 315 | { 316 | "name": "stdout", 317 | "output_type": "stream", 318 | "text": [ 319 | "Rank 1, Initial value: hello from rank 1\n", 320 | "Rank 2, Initial value: hello from rank 2\n", 321 | "Rank 0, Initial value: hello from rank 0\n", 322 | "Rank 2, After performing broadcast: hello from rank 0\n", 323 | "Rank 0, After performing broadcast: hello from rank 0\n", 324 | "Rank 1, After performing broadcast: hello from rank 0\n" 325 | ] 326 | } 327 | ], 328 | "source": [ 329 | "idist.spawn(backend=\"gloo\", fn=broadcast_example, args=(), nproc_per_node=3)" 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": { 335 | "id": "cVXVi2rcTz9X" 336 | }, 337 | "source": [ 338 | "For a real world use case, let's assume you need to gather the predicted and actual values from all the processes on rank 0 for computing a metric and avoiding a memory error. You can do this by first using `all_gather()`, then computing the metric and finally using `broadcast()` to share the result with all processes. `src` below refers to the rank of the source process." 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": { 345 | "id": "7aXwKnrTC96T" 346 | }, 347 | "outputs": [], 348 | "source": [ 349 | "def compute_metric(prediction_tensor, target_tensor):\n", 350 | "\n", 351 | " prediction_tensor = idist.all_gather(prediction_tensor)\n", 352 | " target_tensor = idist.all_gather(target_tensor)\n", 353 | "\n", 354 | " result = 0.0\n", 355 | " if idist.get_rank() == 0:\n", 356 | " result = compute_fn(prediction_tensor, target_tensor)\n", 357 | "\n", 358 | " result = idist.broadcast(result, src=0)\n", 359 | "\n", 360 | " return result" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": { 366 | "id": "p5ma7l5cDIuC" 367 | }, 368 | "source": [ 369 | "## Barrier\n", 370 | "\n", 371 | "The [`barrier()`](https://pytorch.org/ignite/distributed.html#ignite.distributed.utils.barrier) method helps synchronize all processes. For example - while downloading data during training, we have to make sure only the main process (`rank = 0`) downloads the datasets to prevent the sub processes (`rank > 0`) from downloading the same file to the same path at the same time. This way all sub processes get a copy of this already downloaded dataset. This is where we can utilize `barrier()` to make the sub processes wait until the main process downloads the datasets. Once that is done, all the subprocesses instantiate the datasets, while the main process waits. Finally, all the processes are synced up." 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": { 378 | "id": "XInr0zlhDJl6" 379 | }, 380 | "outputs": [], 381 | "source": [ 382 | "def get_datasets(config):\n", 383 | " if idist.get_local_rank() > 0:\n", 384 | " idist.barrier()\n", 385 | "\n", 386 | " train_dataset, test_dataset = get_train_test_datasets(config[\"data_path\"])\n", 387 | "\n", 388 | " if idist.get_local_rank() == 0:\n", 389 | " idist.barrier()\n", 390 | "\n", 391 | " return train_dataset, test_dataset" 392 | ] 393 | } 394 | ], 395 | "metadata": { 396 | "colab": { 397 | "collapsed_sections": [], 398 | "name": "idist-collective-communication.ipynb", 399 | "provenance": [] 400 | }, 401 | "kernelspec": { 402 | "display_name": "Python 3 (ipykernel)", 403 | "language": "python", 404 | "name": "python3" 405 | }, 406 | "language_info": { 407 | "codemirror_mode": { 408 | "name": "ipython", 409 | "version": 3 410 | }, 411 | "file_extension": ".py", 412 | "mimetype": "text/x-python", 413 | "name": "python", 414 | "nbconvert_exporter": "python", 415 | "pygments_lexer": "ipython3", 416 | "version": "3.10.4" 417 | }, 418 | "pycharm": { 419 | "stem_cell": { 420 | "cell_type": "raw", 421 | "metadata": { 422 | "collapsed": false 423 | }, 424 | "source": [] 425 | } 426 | } 427 | }, 428 | "nbformat": 4, 429 | "nbformat_minor": 4 430 | } 431 | -------------------------------------------------------------------------------- /tutorials/assets/all-gather.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch-ignite/examples/3cde534fdff10d9ae5dd90799301d9d0a6c07868/tutorials/assets/all-gather.png -------------------------------------------------------------------------------- /tutorials/assets/all-reduce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch-ignite/examples/3cde534fdff10d9ae5dd90799301d9d0a6c07868/tutorials/assets/all-reduce.png -------------------------------------------------------------------------------- /tutorials/assets/broadcast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytorch-ignite/examples/3cde534fdff10d9ae5dd90799301d9d0a6c07868/tutorials/assets/broadcast.png -------------------------------------------------------------------------------- /tutorials/beginner/01-getting-started.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "HgljXEAJEcFq", 6 | "metadata": { 7 | "id": "HgljXEAJEcFq" 8 | }, 9 | "source": [ 10 | "\n", 18 | "\n", 19 | "# Getting Started" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "id": "P9VjKOirEcFu", 25 | "metadata": { 26 | "id": "P9VjKOirEcFu" 27 | }, 28 | "source": [ 29 | "Welcome to **PyTorch-Ignite**’s quick start guide that covers the\n", 30 | "essentials of getting a project up and running while walking through\n", 31 | "basic concepts of Ignite. In just a few lines of code, you can get your\n", 32 | "model trained and validated. The complete code can be found at the end\n", 33 | "of this guide.\n", 34 | "\n", 35 | "" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "id": "1QNvbg3SEcFw", 41 | "metadata": { 42 | "id": "1QNvbg3SEcFw" 43 | }, 44 | "source": [ 45 | "## Prerequisites\n", 46 | "\n", 47 | "This tutorial assumes you are familiar with the:\n", 48 | "\n", 49 | "1. Basics of Python and deep learning\n", 50 | "2. Structure of PyTorch code" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "id": "XTHzzYyoEcFy", 56 | "metadata": { 57 | "id": "XTHzzYyoEcFy" 58 | }, 59 | "source": [ 60 | "## Installation\n", 61 | "\n", 62 | "From `pip`\n", 63 | "\n", 64 | "``` shell\n", 65 | "pip install pytorch-ignite\n", 66 | "```\n", 67 | "\n", 68 | "From `conda`\n", 69 | "\n", 70 | "``` shell\n", 71 | "conda install ignite -c pytorch\n", 72 | "```\n", 73 | "\n", 74 | "See [here](https://pytorch-ignite.ai/how-to-guides/installation/) for other installation\n", 75 | "options." 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "id": "DcnSr5sGEcFz", 81 | "metadata": { 82 | "id": "DcnSr5sGEcFz" 83 | }, 84 | "source": [ 85 | "## Code\n", 86 | "\n", 87 | "Import the following:" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "Saizk3heEcFz", 94 | "metadata": { 95 | "id": "Saizk3heEcFz" 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "import torch\n", 100 | "from torch import nn\n", 101 | "from torch.utils.data import DataLoader\n", 102 | "from torchvision.datasets import MNIST\n", 103 | "from torchvision.models import resnet18\n", 104 | "from torchvision.transforms import Compose, Normalize, ToTensor\n", 105 | "\n", 106 | "from ignite.engine import Engine, Events, create_supervised_trainer, create_supervised_evaluator\n", 107 | "from ignite.metrics import Accuracy, Loss\n", 108 | "from ignite.handlers import ModelCheckpoint\n", 109 | "from ignite.contrib.handlers import TensorboardLogger, global_step_from_engine" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "id": "ecMYtJF7OvgT", 115 | "metadata": { 116 | "id": "ecMYtJF7OvgT" 117 | }, 118 | "source": [ 119 | "Speed things up by setting [device](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device) to `cuda` if available else `cpu`." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "id": "sdjDKcFhOuQn", 126 | "metadata": { 127 | "id": "sdjDKcFhOuQn" 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "id": "4r_PUH1yEcF1", 137 | "metadata": { 138 | "id": "4r_PUH1yEcF1" 139 | }, 140 | "source": [ 141 | "Define a class of your model or use the predefined ResNet18 model (modified for MNIST) below, instantiate it and move it to device:" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "id": "dVSVAT0OEcF1", 148 | "metadata": { 149 | "id": "dVSVAT0OEcF1" 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "class Net(nn.Module):\n", 154 | "\n", 155 | " def __init__(self):\n", 156 | " super(Net, self).__init__()\n", 157 | " \n", 158 | " # Changed the output layer to output 10 classes instead of 1000 classes\n", 159 | " self.model = resnet18(num_classes=10)\n", 160 | "\n", 161 | " # Changed the input layer to take grayscale images for MNIST instead of RGB images\n", 162 | " self.model.conv1 = nn.Conv2d(\n", 163 | " 1, 64, kernel_size=3, padding=1, bias=False\n", 164 | " )\n", 165 | "\n", 166 | " def forward(self, x):\n", 167 | " return self.model(x)\n", 168 | "\n", 169 | "\n", 170 | "model = Net().to(device)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "id": "DDIW2zedEcF3", 176 | "metadata": { 177 | "id": "DDIW2zedEcF3" 178 | }, 179 | "source": [ 180 | "Now let us define the training and validation datasets (as\n", 181 | "[torch.utils.data.DataLoader](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader))\n", 182 | "and store them in `train_loader` and `val_loader` respectively. We have\n", 183 | "used the [MNIST](https://pytorch.org/vision/stable/datasets.html#mnist)\n", 184 | "dataset for ease of understanding.\n" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "id": "PFNgx_-TEcF4", 191 | "metadata": { 192 | "id": "PFNgx_-TEcF4" 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])\n", 197 | "\n", 198 | "train_loader = DataLoader(\n", 199 | " MNIST(download=True, root=\".\", transform=data_transform, train=True), batch_size=128, shuffle=True\n", 200 | ")\n", 201 | "\n", 202 | "val_loader = DataLoader(\n", 203 | " MNIST(download=True, root=\".\", transform=data_transform, train=False), batch_size=256, shuffle=False\n", 204 | ")" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "id": "VC9BUtWXEcF6", 210 | "metadata": { 211 | "id": "VC9BUtWXEcF6" 212 | }, 213 | "source": [ 214 | "Finally, we will specify the optimizer and the loss function:" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "id": "6VkGmtVZEcF7", 221 | "metadata": { 222 | "id": "6VkGmtVZEcF7" 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "optimizer = torch.optim.RMSprop(model.parameters(), lr=0.005)\n", 227 | "criterion = nn.CrossEntropyLoss()" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "id": "cb-ak9gEEcF7", 233 | "metadata": { 234 | "id": "cb-ak9gEEcF7" 235 | }, 236 | "source": [ 237 | "And we’re done with setting up the important parts of the project.\n", 238 | "PyTorch-Ignite will handle all other boilerplate code as we will see\n", 239 | "below. Next we have to define a trainer engine by passing our model,\n", 240 | "optimizer and loss function to\n", 241 | "[`create_supervised_trainer`](https://pytorch.org/ignite/generated/ignite.engine.create_supervised_trainer.html),\n", 242 | "and two evaluator engines by passing Ignite’s out-of-the-box\n", 243 | "[metrics](https://pytorch.org/ignite/metrics.html#complete-list-of-metrics)\n", 244 | "and the model to\n", 245 | "[`create_supervised_evaluator`](https://pytorch.org/ignite/generated/ignite.engine.create_supervised_evaluator.html#create-supervised-evaluator). We have defined separate evaluator engines for training and validation because they will serve different functions as we will see later in this tutorial:" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "id": "NufcPqJaEcF8", 252 | "metadata": { 253 | "id": "NufcPqJaEcF8" 254 | }, 255 | "outputs": [], 256 | "source": [ 257 | "trainer = create_supervised_trainer(model, optimizer, criterion, device)\n", 258 | "\n", 259 | "val_metrics = {\n", 260 | " \"accuracy\": Accuracy(),\n", 261 | " \"loss\": Loss(criterion)\n", 262 | "}\n", 263 | "\n", 264 | "train_evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)\n", 265 | "val_evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "id": "S7YThetiEcF8", 271 | "metadata": { 272 | "id": "S7YThetiEcF8" 273 | }, 274 | "source": [ 275 | "The objects `trainer`, `train_evaluator` and `val_evaluator` are all instances of\n", 276 | "[`Engine`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine) - the main component of Ignite, which is essentially an abstraction over\n", 277 | "the training or validation loop.\n", 278 | "\n", 279 | "If you need more control over your training and validation loops, you\n", 280 | "can create custom `trainer`, `train_evaluator` and `val_evaluator` objects by wrapping the step\n", 281 | "logic in `Engine` :\n", 282 | "\n", 283 | "```python\n", 284 | "def train_step(engine, batch):\n", 285 | " model.train()\n", 286 | " optimizer.zero_grad()\n", 287 | " x, y = batch[0].to(device), batch[1].to(device)\n", 288 | " y_pred = model(x)\n", 289 | " loss = criterion(y_pred, y)\n", 290 | " loss.backward()\n", 291 | " optimizer.step()\n", 292 | " return loss.item()\n", 293 | "\n", 294 | "trainer = Engine(train_step)\n", 295 | "\n", 296 | "def validation_step(engine, batch):\n", 297 | " model.eval()\n", 298 | " with torch.no_grad():\n", 299 | " x, y = batch[0].to(device), batch[1].to(device)\n", 300 | " y_pred = model(x)\n", 301 | " return y_pred, y\n", 302 | "\n", 303 | "train_evaluator = Engine(validation_step)\n", 304 | "val_evaluator = Engine(validation_step)\n", 305 | "\n", 306 | "# Attach metrics to the evaluators\n", 307 | "for name, metric in val_metrics.items():\n", 308 | " metric.attach(train_evaluator, name)\n", 309 | "\n", 310 | "for name, metric in val_metrics.items():\n", 311 | " metric.attach(val_evaluator, name)\n", 312 | "```" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "id": "Sw90sOK9EcF9", 318 | "metadata": { 319 | "id": "Sw90sOK9EcF9" 320 | }, 321 | "source": [ 322 | "We can customize the code further by adding all kinds of event handlers.\n", 323 | "`Engine` allows adding handlers on various events that are triggered\n", 324 | "during the run. When an event is triggered, attached handlers\n", 325 | "(functions) are executed. Thus, for logging purposes we add a function\n", 326 | "to be executed at the end of every `log_interval`-th iteration:" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "id": "YGm_-loUEcF9", 333 | "metadata": { 334 | "id": "YGm_-loUEcF9" 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "# How many batches to wait before logging training status\n", 339 | "log_interval = 100" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "id": "V3xpFBI6EcF9", 346 | "metadata": { 347 | "id": "V3xpFBI6EcF9" 348 | }, 349 | "outputs": [], 350 | "source": [ 351 | "@trainer.on(Events.ITERATION_COMPLETED(every=log_interval))\n", 352 | "def log_training_loss(engine):\n", 353 | " print(f\"Epoch[{engine.state.epoch}], Iter[{engine.state.iteration}] Loss: {engine.state.output:.2f}\")" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "id": "O6uwwXO8EcF-", 359 | "metadata": { 360 | "id": "O6uwwXO8EcF-" 361 | }, 362 | "source": [ 363 | "or equivalently without the decorator but attaching the handler function\n", 364 | "to the `trainer` via\n", 365 | "[`add_event_handler`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine.add_event_handler)\n", 366 | "\n", 367 | "``` python\n", 368 | "def log_training_loss(engine):\n", 369 | " print(f\"Epoch[{engine.state.epoch}], Iter[{engine.state.iteration}] Loss: {engine.state.output:.2f}\")\n", 370 | "\n", 371 | "trainer.add_event_handler(Events.ITERATION_COMPLETED, log_training_loss)\n", 372 | "```" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "id": "quQzbAv6EcF-", 378 | "metadata": { 379 | "id": "quQzbAv6EcF-" 380 | }, 381 | "source": [ 382 | "After an epoch ends during training, we can compute the training and\n", 383 | "validation metrics by running `train_evaluator` on `train_loader` and `val_evaluator` on\n", 384 | "`val_loader` respectively. Hence we will attach two additional handlers to `trainer`\n", 385 | "when an epoch completes:" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "id": "eCE552PFEcF_", 392 | "metadata": { 393 | "id": "eCE552PFEcF_" 394 | }, 395 | "outputs": [], 396 | "source": [ 397 | "@trainer.on(Events.EPOCH_COMPLETED)\n", 398 | "def log_training_results(trainer):\n", 399 | " train_evaluator.run(train_loader)\n", 400 | " metrics = train_evaluator.state.metrics\n", 401 | " print(f\"Training Results - Epoch[{trainer.state.epoch}] Avg accuracy: {metrics['accuracy']:.2f} Avg loss: {metrics['loss']:.2f}\")\n", 402 | "\n", 403 | "\n", 404 | "@trainer.on(Events.EPOCH_COMPLETED)\n", 405 | "def log_validation_results(trainer):\n", 406 | " val_evaluator.run(val_loader)\n", 407 | " metrics = val_evaluator.state.metrics\n", 408 | " print(f\"Validation Results - Epoch[{trainer.state.epoch}] Avg accuracy: {metrics['accuracy']:.2f} Avg loss: {metrics['loss']:.2f}\")" 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "id": "7JRLbhiw903w", 414 | "metadata": { 415 | "id": "7JRLbhiw903w" 416 | }, 417 | "source": [ 418 | "We can use [`ModelCheckpoint()`](https://pytorch.org/ignite/generated/ignite.handlers.checkpoint.ModelCheckpoint.html#modelcheckpoint) as shown below to save the `n_saved` best models determined by a metric (here accuracy) after each epoch is completed. We attach `model_checkpoint` to `val_evaluator` because we want the two models with the highest accuracies on the validation dataset rather than the training dataset. This is why we defined two separate evaluators (`val_evaluator` and `train_evaluator`) before." 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "id": "W6Zd7vKn1LLO", 425 | "metadata": { 426 | "id": "W6Zd7vKn1LLO" 427 | }, 428 | "outputs": [], 429 | "source": [ 430 | "# Score function to return current value of any metric we defined above in val_metrics\n", 431 | "def score_function(engine):\n", 432 | " return engine.state.metrics[\"accuracy\"]\n", 433 | "\n", 434 | "# Checkpoint to store n_saved best models wrt score function\n", 435 | "model_checkpoint = ModelCheckpoint(\n", 436 | " \"checkpoint\",\n", 437 | " n_saved=2,\n", 438 | " filename_prefix=\"best\",\n", 439 | " score_function=score_function,\n", 440 | " score_name=\"accuracy\",\n", 441 | " global_step_transform=global_step_from_engine(trainer), # helps fetch the trainer's state\n", 442 | ")\n", 443 | " \n", 444 | "# Save the model after every epoch of val_evaluator is completed\n", 445 | "val_evaluator.add_event_handler(Events.COMPLETED, model_checkpoint, {\"model\": model})" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "id": "KAB0QtrgiTLK", 451 | "metadata": { 452 | "id": "KAB0QtrgiTLK" 453 | }, 454 | "source": [ 455 | "We will use [`TensorboardLogger()`](https://pytorch.org/ignite/generated/ignite.contrib.handlers.tensorboard_logger.html#ignite.contrib.handlers.tensorboard_logger.TensorboardLogger) to log trainer's loss, and training and validation metrics separately." 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "id": "Rdt6AE6oeh6k", 462 | "metadata": { 463 | "id": "Rdt6AE6oeh6k" 464 | }, 465 | "outputs": [], 466 | "source": [ 467 | "# Define a Tensorboard logger\n", 468 | "tb_logger = TensorboardLogger(log_dir=\"tb-logger\")\n", 469 | "\n", 470 | "# Attach handler to plot trainer's loss every 100 iterations\n", 471 | "tb_logger.attach_output_handler(\n", 472 | " trainer,\n", 473 | " event_name=Events.ITERATION_COMPLETED(every=log_interval),\n", 474 | " tag=\"training\",\n", 475 | " output_transform=lambda loss: {\"batch_loss\": loss},\n", 476 | ")\n", 477 | "\n", 478 | "# Attach handler for plotting both evaluators' metrics after every epoch completes\n", 479 | "for tag, evaluator in [(\"training\", train_evaluator), (\"validation\", val_evaluator)]:\n", 480 | " tb_logger.attach_output_handler(\n", 481 | " evaluator,\n", 482 | " event_name=Events.EPOCH_COMPLETED,\n", 483 | " tag=tag,\n", 484 | " metric_names=\"all\",\n", 485 | " global_step_transform=global_step_from_engine(trainer),\n", 486 | " )" 487 | ] 488 | }, 489 | { 490 | "cell_type": "markdown", 491 | "id": "Aq0qwiZrEcF_", 492 | "metadata": { 493 | "id": "Aq0qwiZrEcF_" 494 | }, 495 | "source": [ 496 | "Finally, we start the engine on the training dataset and run it for 5\n", 497 | "epochs:" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "id": "qnmTh4FeEcGA", 504 | "metadata": { 505 | "colab": { 506 | "base_uri": "https://localhost:8080/" 507 | }, 508 | "id": "qnmTh4FeEcGA", 509 | "outputId": "f444d98f-8f45-44ea-bd82-9cecb6971bbe" 510 | }, 511 | "outputs": [ 512 | { 513 | "name": "stdout", 514 | "output_type": "stream", 515 | "text": [ 516 | "Epoch[1], Iter[100] Loss: 0.19\n", 517 | "Epoch[1], Iter[200] Loss: 0.13\n", 518 | "Epoch[1], Iter[300] Loss: 0.08\n", 519 | "Epoch[1], Iter[400] Loss: 0.11\n", 520 | "Training Results - Epoch[1] Avg accuracy: 0.97 Avg loss: 0.09\n", 521 | "Validation Results - Epoch[1] Avg accuracy: 0.97 Avg loss: 0.08\n", 522 | "Epoch[2], Iter[500] Loss: 0.07\n", 523 | "Epoch[2], Iter[600] Loss: 0.04\n", 524 | "Epoch[2], Iter[700] Loss: 0.09\n", 525 | "Epoch[2], Iter[800] Loss: 0.07\n", 526 | "Epoch[2], Iter[900] Loss: 0.16\n", 527 | "Training Results - Epoch[2] Avg accuracy: 0.93 Avg loss: 0.20\n", 528 | "Validation Results - Epoch[2] Avg accuracy: 0.93 Avg loss: 0.20\n", 529 | "Epoch[3], Iter[1000] Loss: 0.02\n", 530 | "Epoch[3], Iter[1100] Loss: 0.02\n", 531 | "Epoch[3], Iter[1200] Loss: 0.05\n", 532 | "Epoch[3], Iter[1300] Loss: 0.06\n", 533 | "Epoch[3], Iter[1400] Loss: 0.06\n", 534 | "Training Results - Epoch[3] Avg accuracy: 0.94 Avg loss: 0.20\n", 535 | "Validation Results - Epoch[3] Avg accuracy: 0.94 Avg loss: 0.23\n", 536 | "Epoch[4], Iter[1500] Loss: 0.08\n", 537 | "Epoch[4], Iter[1600] Loss: 0.02\n", 538 | "Epoch[4], Iter[1700] Loss: 0.08\n", 539 | "Epoch[4], Iter[1800] Loss: 0.07\n", 540 | "Training Results - Epoch[4] Avg accuracy: 0.98 Avg loss: 0.06\n", 541 | "Validation Results - Epoch[4] Avg accuracy: 0.98 Avg loss: 0.07\n", 542 | "Epoch[5], Iter[1900] Loss: 0.02\n", 543 | "Epoch[5], Iter[2000] Loss: 0.11\n", 544 | "Epoch[5], Iter[2100] Loss: 0.05\n", 545 | "Epoch[5], Iter[2200] Loss: 0.02\n", 546 | "Epoch[5], Iter[2300] Loss: 0.01\n", 547 | "Training Results - Epoch[5] Avg accuracy: 0.99 Avg loss: 0.02\n", 548 | "Validation Results - Epoch[5] Avg accuracy: 0.99 Avg loss: 0.03\n" 549 | ] 550 | }, 551 | { 552 | "data": { 553 | "text/plain": [ 554 | "State:\n", 555 | "\titeration: 2345\n", 556 | "\tepoch: 5\n", 557 | "\tepoch_length: 469\n", 558 | "\tmax_epochs: 5\n", 559 | "\toutput: 0.005351857747882605\n", 560 | "\tbatch: \n", 561 | "\tmetrics: \n", 562 | "\tdataloader: \n", 563 | "\tseed: \n", 564 | "\ttimes: " 565 | ] 566 | }, 567 | "execution_count": 85, 568 | "metadata": { 569 | "tags": [] 570 | }, 571 | "output_type": "execute_result" 572 | } 573 | ], 574 | "source": [ 575 | "trainer.run(train_loader, max_epochs=5)" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": null, 581 | "id": "ZXhL1-vDgBeT", 582 | "metadata": { 583 | "id": "ZXhL1-vDgBeT" 584 | }, 585 | "outputs": [], 586 | "source": [ 587 | "# Let's close the logger and inspect our results\n", 588 | "tb_logger.close()\n", 589 | "\n", 590 | "%load_ext tensorboard\n", 591 | "\n", 592 | "%tensorboard --logdir=." 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": null, 598 | "id": "_xj4NMjdArYh", 599 | "metadata": { 600 | "colab": { 601 | "base_uri": "https://localhost:8080/" 602 | }, 603 | "id": "_xj4NMjdArYh", 604 | "outputId": "3291ca3b-809a-4ed4-d657-0b83eeb45bc5" 605 | }, 606 | "outputs": [ 607 | { 608 | "name": "stdout", 609 | "output_type": "stream", 610 | "text": [ 611 | "'best_model_4_accuracy=0.9856.pt' 'best_model_5_accuracy=0.9857.pt'\n" 612 | ] 613 | } 614 | ], 615 | "source": [ 616 | "# At last we can view our best models\n", 617 | "!ls checkpoints" 618 | ] 619 | }, 620 | { 621 | "cell_type": "markdown", 622 | "id": "wJ9k2coEEcGD", 623 | "metadata": { 624 | "id": "wJ9k2coEEcGD" 625 | }, 626 | "source": [ 627 | "## Next Steps\n", 628 | "\n", 629 | "1. Check out [tutorials](https://pytorch-ignite.ai/tutorials) if you want to continue\n", 630 | " learning more about PyTorch-Ignite.\n", 631 | "2. Head over to [how-to guides](https://pytorch-ignite.ai/how-to-guides) if you’re looking\n", 632 | " for a specific solution.\n", 633 | "3. If you want to set-up a PyTorch-Ignite project, visit [Code\n", 634 | " Generator](https://code-generator.pytorch-ignite.ai/) to get a variety of\n", 635 | " easily customizable templates and out-of-the-box features." 636 | ] 637 | }, 638 | { 639 | "cell_type": "markdown", 640 | "id": "vya75pqVEcGE", 641 | "metadata": { 642 | "id": "vya75pqVEcGE", 643 | "tags": [] 644 | }, 645 | "source": [ 646 | "## Complete Code\n", 647 | "\n", 648 | "``` python\n", 649 | "import torch\n", 650 | "from torch import nn\n", 651 | "from torch.utils.data import DataLoader\n", 652 | "from torchvision.datasets import MNIST\n", 653 | "from torchvision.models import resnet18\n", 654 | "from torchvision.transforms import Compose, Normalize, ToTensor\n", 655 | "\n", 656 | "from ignite.engine import Engine, Events, create_supervised_trainer, create_supervised_evaluator\n", 657 | "from ignite.metrics import Accuracy, Loss\n", 658 | "from ignite.handlers import ModelCheckpoint\n", 659 | "from ignite.contrib.handlers import TensorboardLogger, global_step_from_engine\n", 660 | "\n", 661 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 662 | "\n", 663 | "\n", 664 | "class Net(nn.Module):\n", 665 | "\n", 666 | " def __init__(self):\n", 667 | " super(Net, self).__init__()\n", 668 | " \n", 669 | " self.model = resnet18(num_classes=10)\n", 670 | "\n", 671 | " self.model.conv1 = self.model.conv1 = nn.Conv2d(\n", 672 | " 1, 64, kernel_size=3, padding=1, bias=False\n", 673 | " )\n", 674 | "\n", 675 | " def forward(self, x):\n", 676 | " return self.model(x)\n", 677 | "\n", 678 | "\n", 679 | "model = Net().to(device)\n", 680 | "\n", 681 | "data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])\n", 682 | "\n", 683 | "train_loader = DataLoader(\n", 684 | " MNIST(download=True, root=\".\", transform=data_transform, train=True), batch_size=128, shuffle=True\n", 685 | ")\n", 686 | "\n", 687 | "val_loader = DataLoader(\n", 688 | " MNIST(download=True, root=\".\", transform=data_transform, train=False), batch_size=256, shuffle=False\n", 689 | ")\n", 690 | "\n", 691 | "optimizer = torch.optim.RMSprop(model.parameters(), lr=0.005)\n", 692 | "criterion = nn.CrossEntropyLoss()\n", 693 | "\n", 694 | "trainer = create_supervised_trainer(model, optimizer, criterion, device)\n", 695 | "\n", 696 | "val_metrics = {\n", 697 | " \"accuracy\": Accuracy(),\n", 698 | " \"loss\": Loss(criterion)\n", 699 | "}\n", 700 | "\n", 701 | "train_evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)\n", 702 | "val_evaluator = create_supervised_evaluator(model, metrics=val_metrics, device=device)\n", 703 | "\n", 704 | "log_interval = 100\n", 705 | "\n", 706 | "@trainer.on(Events.ITERATION_COMPLETED(every=log_interval))\n", 707 | "def log_training_loss(engine):\n", 708 | " print(f\"Epoch[{engine.state.epoch}], Iter[{engine.state.iteration}] Loss: {engine.state.output:.2f}\")\n", 709 | "\n", 710 | "@trainer.on(Events.EPOCH_COMPLETED)\n", 711 | "def log_training_results(trainer):\n", 712 | " train_evaluator.run(train_loader)\n", 713 | " metrics = train_evaluator.state.metrics\n", 714 | " print(f\"Training Results - Epoch[{trainer.state.epoch}] Avg accuracy: {metrics['accuracy']:.2f} Avg loss: {metrics['loss']:.2f}\")\n", 715 | "\n", 716 | "\n", 717 | "@trainer.on(Events.EPOCH_COMPLETED)\n", 718 | "def log_validation_results(trainer):\n", 719 | " val_evaluator.run(val_loader)\n", 720 | " metrics = val_evaluator.state.metrics\n", 721 | " print(f\"Validation Results - Epoch[{trainer.state.epoch}] Avg accuracy: {metrics['accuracy']:.2f} Avg loss: {metrics['loss']:.2f}\")\n", 722 | "\n", 723 | "\n", 724 | "def score_function(engine):\n", 725 | " return engine.state.metrics[\"accuracy\"]\n", 726 | "\n", 727 | "\n", 728 | "model_checkpoint = ModelCheckpoint(\n", 729 | " \"checkpoint\",\n", 730 | " n_saved=2,\n", 731 | " filename_prefix=\"best\",\n", 732 | " score_function=score_function,\n", 733 | " score_name=\"accuracy\",\n", 734 | " global_step_transform=global_step_from_engine(trainer),\n", 735 | ")\n", 736 | " \n", 737 | "val_evaluator.add_event_handler(Events.COMPLETED, model_checkpoint, {\"model\": model})\n", 738 | "\n", 739 | "tb_logger = TensorboardLogger(log_dir=\"tb-logger\")\n", 740 | "\n", 741 | "tb_logger.attach_output_handler(\n", 742 | " trainer,\n", 743 | " event_name=Events.ITERATION_COMPLETED(every=log_interval),\n", 744 | " tag=\"training\",\n", 745 | " output_transform=lambda loss: {\"batch_loss\": loss},\n", 746 | ")\n", 747 | "\n", 748 | "for tag, evaluator in [(\"training\", train_evaluator), (\"validation\", val_evaluator)]:\n", 749 | " tb_logger.attach_output_handler(\n", 750 | " evaluator,\n", 751 | " event_name=Events.EPOCH_COMPLETED,\n", 752 | " tag=tag,\n", 753 | " metric_names=\"all\",\n", 754 | " global_step_transform=global_step_from_engine(trainer),\n", 755 | " )\n", 756 | "\n", 757 | "trainer.run(train_loader, max_epochs=5)\n", 758 | "\n", 759 | "tb_logger.close()\n", 760 | "```" 761 | ] 762 | } 763 | ], 764 | "metadata": { 765 | "accelerator": "GPU", 766 | "colab": { 767 | "collapsed_sections": [], 768 | "name": "getting-started.ipynb", 769 | "provenance": [] 770 | }, 771 | "kernelspec": { 772 | "display_name": "Python 3 (ipykernel)", 773 | "language": "python", 774 | "name": "python3" 775 | }, 776 | "language_info": { 777 | "codemirror_mode": { 778 | "name": "ipython", 779 | "version": 3 780 | }, 781 | "file_extension": ".py", 782 | "mimetype": "text/x-python", 783 | "name": "python", 784 | "nbconvert_exporter": "python", 785 | "pygments_lexer": "ipython3", 786 | "version": "3.10.4" 787 | } 788 | }, 789 | "nbformat": 4, 790 | "nbformat_minor": 5 791 | } 792 | -------------------------------------------------------------------------------- /tutorials/beginner/02-transformers-text-classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "HCS-d1T3znj2" 7 | }, 8 | "source": [ 9 | "\n", 21 | "# Transformers for Text Classification with IMDb Reviews" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": { 27 | "id": "rjZMYxFoznj9" 28 | }, 29 | "source": [ 30 | "In this tutorial we will fine tune a model from the Transformers library for text classification using PyTorch-Ignite. We will be following the [Fine-tuning a pretrained model](https://huggingface.co/transformers/training.html) tutorial for preprocessing text and defining the model, optimizer and dataloaders. Then we are going to use Ignite for:\n", 31 | "* Training and evaluating the model\n", 32 | "* Computing metrics\n", 33 | "* Setting up experiments and monitoring the model\n", 34 | "\n", 35 | "According to the tutorial, we will use the [IMDb Movie Reviews Dataset](https://ai.stanford.edu/~amaas/data/sentiment/) to classify a review as either positive or negative." 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "id": "sovYyC0Zznj-" 42 | }, 43 | "source": [ 44 | "## Required Dependencies " 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "id": "7XHAD9x7znj_" 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "!pip install pytorch-ignite transformers datasets" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "id": "I80XSUXpvk1i" 62 | }, 63 | "source": [ 64 | "Before we dive in, we will seed everything using [`manual_seed`](https://pytorch.org/ignite/utils.html#ignite.utils.manual_seed)." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 2, 70 | "metadata": { 71 | "id": "enczLgLTznkH" 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "from ignite.utils import manual_seed\n", 76 | "\n", 77 | "manual_seed(42)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": { 83 | "id": "WZYyXYB5znkH" 84 | }, 85 | "source": [ 86 | "## Basic Setup\n", 87 | "\n", 88 | "Next we will follow the tutorial and load up our dataset and tokenizer to preprocess the data." 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "id": "gCVrdHOVgjtH" 95 | }, 96 | "source": [ 97 | "### Data Preprocessing" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "id": "PQ80tXFPwAnR" 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "from datasets import load_dataset\n", 109 | "\n", 110 | "raw_datasets = load_dataset(\"imdb\")" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "id": "inP62g7LwV5n" 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "from transformers import AutoTokenizer\n", 122 | "\n", 123 | "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-cased\")" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "id": "aQgWjFq1yESe" 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "def tokenize_function(examples):\n", 135 | " return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True)\n", 136 | "\n", 137 | "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": { 143 | "id": "xnLbsx2Jhurm" 144 | }, 145 | "source": [ 146 | "We move towards the end of the tutorial for PyTorch specific instructions. Here we are extracting a larger subset of our original datasets. We also don't need to provide a seed since we seeded everything at the beginning." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 6, 152 | "metadata": { 153 | "id": "iPQy_FMcxBy3" 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "tokenized_datasets = tokenized_datasets.remove_columns([\"text\"])\n", 158 | "tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n", 159 | "tokenized_datasets.set_format(\"torch\")\n", 160 | "\n", 161 | "small_train_dataset = tokenized_datasets[\"train\"].shuffle().select(range(5000))\n", 162 | "small_eval_dataset = tokenized_datasets[\"test\"].shuffle().select(range(5000))" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": { 168 | "id": "0JvnKjqRiEFP" 169 | }, 170 | "source": [ 171 | "### Dataloaders" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 7, 177 | "metadata": { 178 | "id": "APNr5lgsygtw" 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "from torch.utils.data import DataLoader\n", 183 | "\n", 184 | "train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)\n", 185 | "eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": { 191 | "id": "ax8ToOAviGGS" 192 | }, 193 | "source": [ 194 | "### Model" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "id": "XVS15FELytIj" 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "from transformers import AutoModelForSequenceClassification\n", 206 | "\n", 207 | "model = AutoModelForSequenceClassification.from_pretrained(\"bert-base-cased\", num_labels=2)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": { 213 | "id": "QOI8dLDDiI7c" 214 | }, 215 | "source": [ 216 | "### Optimizer" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 9, 222 | "metadata": { 223 | "id": "RIihFsPryvI_" 224 | }, 225 | "outputs": [], 226 | "source": [ 227 | "from transformers import AdamW\n", 228 | "\n", 229 | "optimizer = AdamW(model.parameters(), lr=5e-5)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": { 235 | "id": "0-Yd47-UiLFs" 236 | }, 237 | "source": [ 238 | "### LR Scheduler\n", 239 | "\n", 240 | "We will use the built-in Ignite alternative of `linear` scheduler which is [`PiecewiseLinear`](https://pytorch.org/ignite/generated/ignite.handlers.param_scheduler.PiecewiseLinear.html#piecewiselinear). We will also increase the number of epochs." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 10, 246 | "metadata": { 247 | "id": "U5gBJL-uS9WG" 248 | }, 249 | "outputs": [], 250 | "source": [ 251 | "from ignite.contrib.handlers import PiecewiseLinear\n", 252 | "\n", 253 | "num_epochs = 10\n", 254 | "num_training_steps = num_epochs * len(train_dataloader)\n", 255 | "\n", 256 | "milestones_values = [\n", 257 | " (0, 5e-5),\n", 258 | " (num_training_steps, 0.0),\n", 259 | " ]\n", 260 | "lr_scheduler = PiecewiseLinear(\n", 261 | " optimizer, param_name=\"lr\", milestones_values=milestones_values\n", 262 | " )" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": { 268 | "id": "vEPNZEO8jRSf" 269 | }, 270 | "source": [ 271 | "### Set Device" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": { 278 | "id": "A0WXnFD7bg2C" 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "import torch\n", 283 | "\n", 284 | "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n", 285 | "model.to(device)" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": { 291 | "id": "elcEG2gojTsI" 292 | }, 293 | "source": [ 294 | "## Create Trainer\n", 295 | "\n", 296 | "Ignite's [`Engine`](https://pytorch-ignite.ai/concepts/01-engine/) allows users to define a `process_function` to process a given batch of data. This function is applied to all the batches of the dataset. This is a general class that can be applied to train and validate models. A `process_function` has two parameters `engine` and `batch`." 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": { 302 | "id": "iVoVnJTlWau5" 303 | }, 304 | "source": [ 305 | "The code for processing a batch of training data in the tutorial is as follows:\n", 306 | "\n", 307 | "```python\n", 308 | "for batch in train_dataloader:\n", 309 | " batch = {k: v.to(device) for k, v in batch.items()}\n", 310 | " outputs = model(**batch)\n", 311 | " loss = outputs.loss\n", 312 | " loss.backward()\n", 313 | "\n", 314 | " optimizer.step()\n", 315 | " lr_scheduler.step()\n", 316 | " optimizer.zero_grad()\n", 317 | " progress_bar.update(1)\n", 318 | "```\n", 319 | "\n", 320 | "Therefore we will define a `process_function` (called `train_step` below) to do the above tasks:\n", 321 | "\n", 322 | "* Set `model` in train mode. \n", 323 | "* Move items of the `batch` to `device`.\n", 324 | "* Perform forward pass and generate `output`.\n", 325 | "* Extract loss.\n", 326 | "* Perform backward pass using loss to calculate gradients for the model parameters.\n", 327 | "* Optimize model parameters using gradients and optimizer.\n", 328 | "\n", 329 | "Finally, we choose to return the `loss` so we can utilize it for further processing.\n", 330 | "\n", 331 | "You will also notice that we do not update the `lr_scheduler` and `progress_bar` in `train_step`. This is because Ignite automatically takes care of it as we will see later in this tutorial." 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 37, 337 | "metadata": { 338 | "id": "Q4ncIcYcznkQ" 339 | }, 340 | "outputs": [], 341 | "source": [ 342 | "def train_step(engine, batch): \n", 343 | " model.train()\n", 344 | " \n", 345 | " batch = {k: v.to(device) for k, v in batch.items()}\n", 346 | " outputs = model(**batch)\n", 347 | " loss = outputs.loss\n", 348 | " loss.backward()\n", 349 | "\n", 350 | " optimizer.step()\n", 351 | " optimizer.zero_grad()\n", 352 | "\n", 353 | " return loss" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": { 359 | "id": "n1rXDRO4Y_dp" 360 | }, 361 | "source": [ 362 | "And then we create a model `trainer` by attaching the `train_step` to the training engine. Later, we will use `trainer` for looping over the training dataset for `num_epochs`." 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 38, 368 | "metadata": { 369 | "id": "LFWgXnX4cWV1" 370 | }, 371 | "outputs": [], 372 | "source": [ 373 | "from ignite.engine import Engine\n", 374 | "\n", 375 | "trainer = Engine(train_step)" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": { 381 | "id": "U4Wx_3rrns0a" 382 | }, 383 | "source": [ 384 | "The `lr_scheduler` we defined previously was a handler. \n", 385 | "\n", 386 | "[Handlers](https://pytorch-ignite.ai/concepts/02-events-and-handlers/#handlers) can be any type of function (lambda functions, class methods, etc.). On top of that, Ignite provides several built-in handlers to reduce redundant code. We attach these handlers to engine which is triggered at a specific [event](https://pytorch-ignite.ai/concepts/02-events-and-handlers/). These events can be anything like the start of an iteration or the end of an epoch. [Here](https://pytorch.org/ignite/generated/ignite.engine.events.Events.html#events) is a complete list of built-in events.\n", 387 | "\n", 388 | "Therefore, we will attach the `lr_scheduler` (handler) to the `trainer` (`engine`) via [`add_event_handler()`](https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html#ignite.engine.engine.Engine.add_event_handler) so it can be triggered at `Events.ITERATION_STARTED` (start of an iteration) automatically." 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": { 395 | "id": "LauRCX2Vi8Su" 396 | }, 397 | "outputs": [], 398 | "source": [ 399 | "from ignite.engine import Events\n", 400 | "\n", 401 | "trainer.add_event_handler(Events.ITERATION_STARTED, lr_scheduler)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": { 407 | "id": "vPXFO_3dc2ll" 408 | }, 409 | "source": [ 410 | "This is the reason we did not include `lr_scheduler.step()` in `train_step()`." 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": { 416 | "id": "bNSOB05LjfVB" 417 | }, 418 | "source": [ 419 | "## Progress Bar\n", 420 | "\n", 421 | "Next we create an instance of Ignite's [`ProgessBar()`](https://pytorch.org/ignite/generated/ignite.contrib.handlers.tqdm_logger.html#ignite.contrib.handlers.tqdm_logger.ProgressBar) and attach it to the trainer to replace `progress_bar.update(1)`." 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 40, 427 | "metadata": { 428 | "id": "PxSVmnLica2c" 429 | }, 430 | "outputs": [], 431 | "source": [ 432 | "from ignite.contrib.handlers import ProgressBar\n", 433 | "\n", 434 | "pbar = ProgressBar()" 435 | ] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "metadata": { 440 | "id": "X9KSg4s5eLJn" 441 | }, 442 | "source": [ 443 | "We can either, simply track the progress:" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": 29, 449 | "metadata": { 450 | "id": "C7t_K2MOd6qp" 451 | }, 452 | "outputs": [], 453 | "source": [ 454 | "pbar.attach(trainer)" 455 | ] 456 | }, 457 | { 458 | "cell_type": "markdown", 459 | "metadata": { 460 | "id": "sxwfbhkCeRf0" 461 | }, 462 | "source": [ 463 | "Or also track the output of `trainer` (or `train_step`):" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 41, 469 | "metadata": { 470 | "id": "Ib1uF2GHeQlt" 471 | }, 472 | "outputs": [], 473 | "source": [ 474 | "pbar.attach(trainer, output_transform=lambda x: {'loss': x})" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": { 480 | "id": "Bf7_CjXvjj0K" 481 | }, 482 | "source": [ 483 | "## Create Evaluator\n", 484 | "\n", 485 | "Similar to the training `process_function`, we setup a function to evaluate a single batch of train/validation/test data.\n", 486 | "\n", 487 | "```python\n", 488 | "model.eval()\n", 489 | "for batch in eval_dataloader:\n", 490 | " batch = {k: v.to(device) for k, v in batch.items()}\n", 491 | " with torch.no_grad():\n", 492 | " outputs = model(**batch)\n", 493 | "\n", 494 | " logits = outputs.logits\n", 495 | " predictions = torch.argmax(logits, dim=-1)\n", 496 | " metric.add_batch(predictions=predictions, references=batch[\"labels\"])\n", 497 | "```\n", 498 | "\n", 499 | " Here is what `evaluate_step()` below does:\n", 500 | "\n", 501 | "* Sets model in eval mode.\n", 502 | "* Move items of the `batch` to `device`.\n", 503 | "* With `torch.no_grad()`, no gradients are calculated for any succeding steps.\n", 504 | "* Perform a forward pass on the model to calculate `outputs` from `batch`\n", 505 | "* Get the real `predictions` from `logits` (probability of positive and negative classes).\n", 506 | "\n", 507 | "Finally, we return the predictions and the actual labels so that we can compute the metrics.\n", 508 | "\n", 509 | "You will notice that we did not compute the metrics in `evaluate_step()`. This is because Ignite provides built-in [metrics](https://pytorch-ignite.ai/concepts/04-metrics/) which we can later attach to the engine." 510 | ] 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "metadata": { 515 | "id": "98H3y4xagmao" 516 | }, 517 | "source": [ 518 | "**Note:** Ignite suggests attaching metrics to evaluators and not trainers because during the training the model parameters are constantly changing and it is best to evaluate model on a stationary model. This information is important as there is a difference in the functions for training and evaluating. Training returns a single scalar loss. Evaluating returns `y_pred` and `y` as that output is used to calculate metrics per batch for the entire dataset.\n", 519 | "\n", 520 | "All metrics in Ignite require `y_pred` and `y` as outputs of the function attached to the Engine. " 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": 42, 526 | "metadata": { 527 | "id": "H3F69ZOwcUdQ" 528 | }, 529 | "outputs": [], 530 | "source": [ 531 | "def evaluate_step(engine, batch):\n", 532 | " model.eval()\n", 533 | "\n", 534 | " batch = {k: v.to(device) for k, v in batch.items()}\n", 535 | " with torch.no_grad():\n", 536 | " outputs = model(**batch)\n", 537 | "\n", 538 | " logits = outputs.logits\n", 539 | " predictions = torch.argmax(logits, dim=-1)\n", 540 | "\n", 541 | " return {'y_pred': predictions, 'y': batch[\"labels\"]}" 542 | ] 543 | }, 544 | { 545 | "cell_type": "markdown", 546 | "metadata": { 547 | "id": "VFrSJYx-mutP" 548 | }, 549 | "source": [ 550 | "Below we create two engines, a training evaluator and a validation evaluator. `train_evaluator` and `validation_evaluator` use the same function but they serve different purposes as we will see later in this tutorial." 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": 43, 556 | "metadata": { 557 | "id": "xfxf_TGadIjc" 558 | }, 559 | "outputs": [], 560 | "source": [ 561 | "train_evaluator = Engine(evaluate_step)\n", 562 | "validation_evaluator = Engine(evaluate_step)" 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": { 568 | "id": "ESp1oe8jjtI6" 569 | }, 570 | "source": [ 571 | "## Attach Metrics\n", 572 | "\n", 573 | "The 🤗 tutorial defines one metric, accuracy, to be used for evaluation:\n", 574 | "\n", 575 | "```python\n", 576 | "metric= load_metric(\"accuracy\")\n", 577 | "```\n", 578 | "\n", 579 | "We can easily attach Ignite's built-in [`Accuracy()`](https://pytorch.org/ignite/generated/ignite.metrics.Accuracy.html#accuracy) metric to to `train_evaluator` and `validation_evaluator`. We also need to specify the metric name (`accuracy` below). Internally, it will use `y_pred` and `y` to compute the accuracy. " 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 44, 585 | "metadata": { 586 | "id": "RsT3Yq1sd5Os" 587 | }, 588 | "outputs": [], 589 | "source": [ 590 | "from ignite.metrics import Accuracy\n", 591 | "\n", 592 | "Accuracy().attach(train_evaluator, 'accuracy')\n", 593 | "Accuracy().attach(validation_evaluator, 'accuracy')" 594 | ] 595 | }, 596 | { 597 | "cell_type": "markdown", 598 | "metadata": { 599 | "id": "so6yub_fjydU" 600 | }, 601 | "source": [ 602 | "## Log Metrics\n", 603 | "\n", 604 | "Now we will define custom handlers (functions) and attach them to various `Events` of the training process.\n", 605 | "\n", 606 | "The functions below both achieve similar tasks. They print the results of the `evaluator` run on a dataset. `log_training_results()` does this on the training evaluator and train dataset, while `log_validation_results()` on the validation evaluator and validation dataset. Another difference is how these functions are attached in the trainer engine.\n", 607 | "\n", 608 | "The first method involves using a decorator, the syntax is simple - `@` `trainer.on(Events.EPOCH_COMPLETED)`, means that the decorated function will be attached to the trainer and called at the end of each epoch. \n", 609 | "\n", 610 | "The second method involves using the add_event_handler method of trainer - `trainer.add_event_handler(Events.EPOCH_COMPLETED, custom_function)`. This achieves the same result as the above. " 611 | ] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "execution_count": null, 616 | "metadata": { 617 | "id": "mrl-60C9aEkW" 618 | }, 619 | "outputs": [], 620 | "source": [ 621 | "@trainer.on(Events.EPOCH_COMPLETED)\n", 622 | "def log_training_results(engine):\n", 623 | " train_evaluator.run(train_dataloader)\n", 624 | " metrics = train_evaluator.state.metrics\n", 625 | " avg_accuracy = metrics['accuracy']\n", 626 | " print(f\"Training Results - Epoch: {engine.state.epoch} Avg accuracy: {avg_accuracy:.3f}\")\n", 627 | " \n", 628 | "def log_validation_results(engine):\n", 629 | " validation_evaluator.run(eval_dataloader)\n", 630 | " metrics = validation_evaluator.state.metrics\n", 631 | " avg_accuracy = metrics['accuracy']\n", 632 | " print(f\"Validation Results - Epoch: {engine.state.epoch} Avg accuracy: {avg_accuracy:.3f}\")\n", 633 | "\n", 634 | "trainer.add_event_handler(Events.EPOCH_COMPLETED, log_validation_results)" 635 | ] 636 | }, 637 | { 638 | "cell_type": "markdown", 639 | "metadata": { 640 | "id": "fGbSjScZj2Bt" 641 | }, 642 | "source": [ 643 | "## Early Stopping\n", 644 | "\n", 645 | "Now we'll setup a [`EarlyStopping`](https://pytorch.org/ignite/generated/ignite.handlers.early_stopping.EarlyStopping.html#earlystopping) handler for the training process. `EarlyStopping` requires a score_function that allows the user to define whatever criteria to stop training. In this case, if the loss of the validation set does not decrease in 2 epochs (`patience`), the training process will stop early. " 646 | ] 647 | }, 648 | { 649 | "cell_type": "code", 650 | "execution_count": null, 651 | "metadata": { 652 | "id": "O0xElFVYeuL9" 653 | }, 654 | "outputs": [], 655 | "source": [ 656 | "from ignite.handlers import EarlyStopping\n", 657 | "\n", 658 | "def score_function(engine):\n", 659 | " val_accuracy = engine.state.metrics['accuracy']\n", 660 | " return val_accuracy\n", 661 | "\n", 662 | "handler = EarlyStopping(patience=2, score_function=score_function, trainer=trainer)\n", 663 | "validation_evaluator.add_event_handler(Events.COMPLETED, handler)" 664 | ] 665 | }, 666 | { 667 | "cell_type": "markdown", 668 | "metadata": { 669 | "id": "C1UtBUmmj9dq" 670 | }, 671 | "source": [ 672 | "## Model Checkpoint\n", 673 | "\n", 674 | "Lastly, we want to save the best model weights. So we will use Ignite's [`ModelCheckpoint`](https://pytorch.org/ignite/generated/ignite.handlers.checkpoint.ModelCheckpoint.html#modelcheckpoint) handler to checkpoint models at the end of each epoch. This will create a `models` directory and save the 2 best models (`n_saved`) with the prefix `bert-base-cased`." 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "execution_count": null, 680 | "metadata": { 681 | "id": "7xz8qozReQuG" 682 | }, 683 | "outputs": [], 684 | "source": [ 685 | "from ignite.handlers import ModelCheckpoint\n", 686 | "\n", 687 | "checkpointer = ModelCheckpoint(dirname='models', filename_prefix='bert-base-cased', n_saved=2, create_dir=True)\n", 688 | "trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model})" 689 | ] 690 | }, 691 | { 692 | "cell_type": "markdown", 693 | "metadata": { 694 | "id": "jgzcQVfvkAXK" 695 | }, 696 | "source": [ 697 | "## Begin Training!\n", 698 | "\n", 699 | "Next, we'll run the trainer for 10 epochs and monitor the results. Below we can see that `ProgessBar` prints the loss per iteration, and prints the results of training and validation as we specified in our custom function. " 700 | ] 701 | }, 702 | { 703 | "cell_type": "code", 704 | "execution_count": null, 705 | "metadata": { 706 | "id": "sS0Ut5z0dmQc" 707 | }, 708 | "outputs": [], 709 | "source": [ 710 | "trainer.run(train_dataloader, max_epochs=num_epochs)" 711 | ] 712 | }, 713 | { 714 | "cell_type": "markdown", 715 | "metadata": { 716 | "id": "OpqXiZUsznkY" 717 | }, 718 | "source": [ 719 | "That's it! We have successfully trained and evaluated a Transformer for Text Classification. " 720 | ] 721 | } 722 | ], 723 | "metadata": { 724 | "accelerator": "GPU", 725 | "colab": { 726 | "collapsed_sections": [], 727 | "name": "transformers-text-classification.ipynb", 728 | "provenance": [] 729 | }, 730 | "kernelspec": { 731 | "display_name": "Python 3 (ipykernel)", 732 | "language": "python", 733 | "name": "python3" 734 | }, 735 | "language_info": { 736 | "codemirror_mode": { 737 | "name": "ipython", 738 | "version": 3 739 | }, 740 | "file_extension": ".py", 741 | "mimetype": "text/x-python", 742 | "name": "python", 743 | "nbconvert_exporter": "python", 744 | "pygments_lexer": "ipython3", 745 | "version": "3.10.4" 746 | } 747 | }, 748 | "nbformat": 4, 749 | "nbformat_minor": 4 750 | } 751 | -------------------------------------------------------------------------------- /tutorials/intermediate/cifar10-distributed.py: -------------------------------------------------------------------------------- 1 | import fire 2 | from datetime import datetime 3 | from pathlib import Path 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from torchvision import datasets, models 9 | from torchvision.transforms import ( 10 | Compose, 11 | Normalize, 12 | Pad, 13 | RandomCrop, 14 | RandomHorizontalFlip, 15 | ToTensor, 16 | ) 17 | 18 | import ignite 19 | import ignite.distributed as idist 20 | from ignite.contrib.engines import common 21 | from ignite.handlers import PiecewiseLinear 22 | from ignite.engine import ( 23 | Events, 24 | create_supervised_trainer, 25 | create_supervised_evaluator, 26 | ) 27 | from ignite.handlers import Checkpoint, global_step_from_engine 28 | from ignite.metrics import Accuracy, Loss 29 | from ignite.utils import manual_seed, setup_logger 30 | 31 | 32 | config = { 33 | "seed": 543, 34 | "data_path": "cifar10", 35 | "output_path": "output-cifar10/", 36 | "model": "resnet18", 37 | "batch_size": 512, 38 | "momentum": 0.9, 39 | "weight_decay": 1e-4, 40 | "num_workers": 2, 41 | "num_epochs": 5, 42 | "learning_rate": 0.4, 43 | "num_warmup_epochs": 1, 44 | "validate_every": 3, 45 | "checkpoint_every": 200, 46 | "backend": None, 47 | "resume_from": None, 48 | "log_every_iters": 15, 49 | "nproc_per_node": None, 50 | "with_clearml": False, 51 | "with_amp": False, 52 | } 53 | 54 | 55 | def get_train_test_datasets(path): 56 | train_transform = Compose( 57 | [ 58 | Pad(4), 59 | RandomCrop(32, fill=128), 60 | RandomHorizontalFlip(), 61 | ToTensor(), 62 | Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 63 | ] 64 | ) 65 | test_transform = Compose( 66 | [ 67 | ToTensor(), 68 | Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 69 | ] 70 | ) 71 | 72 | train_ds = datasets.CIFAR10( 73 | root=path, train=True, download=False, transform=train_transform 74 | ) 75 | test_ds = datasets.CIFAR10( 76 | root=path, train=False, download=False, transform=test_transform 77 | ) 78 | 79 | return train_ds, test_ds 80 | 81 | 82 | def get_dataflow(config): 83 | train_dataset, test_dataset = get_train_test_datasets(config["data_path"]) 84 | 85 | train_loader = idist.auto_dataloader( 86 | train_dataset, 87 | batch_size=config["batch_size"], 88 | num_workers=config["num_workers"], 89 | shuffle=True, 90 | drop_last=True, 91 | ) 92 | 93 | test_loader = idist.auto_dataloader( 94 | test_dataset, 95 | batch_size=2 * config["batch_size"], 96 | num_workers=config["num_workers"], 97 | shuffle=False, 98 | ) 99 | return train_loader, test_loader 100 | 101 | 102 | def get_model(config): 103 | model_name = config["model"] 104 | if model_name in models.__dict__: 105 | fn = models.__dict__[model_name] 106 | else: 107 | raise RuntimeError(f"Unknown model name {model_name}") 108 | 109 | model = idist.auto_model(fn(num_classes=10)) 110 | 111 | return model 112 | 113 | 114 | def get_optimizer(config, model): 115 | optimizer = optim.SGD( 116 | model.parameters(), 117 | lr=config["learning_rate"], 118 | momentum=config["momentum"], 119 | weight_decay=config["weight_decay"], 120 | nesterov=True, 121 | ) 122 | optimizer = idist.auto_optim(optimizer) 123 | 124 | return optimizer 125 | 126 | 127 | def get_criterion(): 128 | return nn.CrossEntropyLoss().to(idist.device()) 129 | 130 | 131 | def get_lr_scheduler(config, optimizer): 132 | milestones_values = [ 133 | (0, 0.0), 134 | ( 135 | config["num_iters_per_epoch"] * config["num_warmup_epochs"], 136 | config["learning_rate"], 137 | ), 138 | (config["num_iters_per_epoch"] * config["num_epochs"], 0.0), 139 | ] 140 | lr_scheduler = PiecewiseLinear( 141 | optimizer, param_name="lr", milestones_values=milestones_values 142 | ) 143 | return lr_scheduler 144 | 145 | 146 | def get_save_handler(config): 147 | if config["with_clearml"]: 148 | from ignite.contrib.handlers.clearml_logger import ClearMLSaver 149 | 150 | return ClearMLSaver(dirname=config["output_path"]) 151 | 152 | return config["output_path"] 153 | 154 | 155 | def load_checkpoint(resume_from): 156 | checkpoint_fp = Path(resume_from) 157 | assert ( 158 | checkpoint_fp.exists() 159 | ), f"Checkpoint '{checkpoint_fp.as_posix()}' is not found" 160 | checkpoint = torch.load(checkpoint_fp.as_posix(), map_location="cpu") 161 | return checkpoint 162 | 163 | 164 | def create_trainer( 165 | model, optimizer, criterion, lr_scheduler, train_sampler, config, logger 166 | ): 167 | 168 | device = idist.device() 169 | amp_mode = None 170 | scaler = False 171 | 172 | trainer = create_supervised_trainer( 173 | model, 174 | optimizer, 175 | criterion, 176 | device=device, 177 | non_blocking=True, 178 | output_transform=lambda x, y, y_pred, loss: {"batch loss": loss.item()}, 179 | amp_mode="amp" if config["with_amp"] else None, 180 | scaler=config["with_amp"], 181 | ) 182 | trainer.logger = logger 183 | 184 | to_save = { 185 | "trainer": trainer, 186 | "model": model, 187 | "optimizer": optimizer, 188 | "lr_scheduler": lr_scheduler, 189 | } 190 | metric_names = [ 191 | "batch loss", 192 | ] 193 | 194 | common.setup_common_training_handlers( 195 | trainer=trainer, 196 | train_sampler=train_sampler, 197 | to_save=to_save, 198 | save_every_iters=config["checkpoint_every"], 199 | save_handler=get_save_handler(config), 200 | lr_scheduler=lr_scheduler, 201 | output_names=metric_names if config["log_every_iters"] > 0 else None, 202 | with_pbars=False, 203 | clear_cuda_cache=False, 204 | ) 205 | 206 | if config["resume_from"] is not None: 207 | checkpoint = load_checkpoint(config["resume_from"]) 208 | Checkpoint.load_objects(to_load=to_save, checkpoint=checkpoint) 209 | 210 | return trainer 211 | 212 | 213 | def create_evaluator(model, metrics, config): 214 | device = idist.device() 215 | 216 | amp_mode = "amp" if config["with_amp"] else None 217 | evaluator = create_supervised_evaluator( 218 | model, metrics=metrics, device=device, non_blocking=True, amp_mode=amp_mode 219 | ) 220 | 221 | return evaluator 222 | 223 | 224 | def setup_rank_zero(logger, config): 225 | device = idist.device() 226 | 227 | now = datetime.now().strftime("%Y%m%d-%H%M%S") 228 | output_path = config["output_path"] 229 | folder_name = ( 230 | f"{config['model']}_backend-{idist.backend()}-{idist.get_world_size()}_{now}" 231 | ) 232 | output_path = Path(output_path) / folder_name 233 | if not output_path.exists(): 234 | output_path.mkdir(parents=True) 235 | config["output_path"] = output_path.as_posix() 236 | logger.info(f"Output path: {config['output_path']}") 237 | 238 | if config["with_clearml"]: 239 | from clearml import Task 240 | 241 | task = Task.init("CIFAR10-Training", task_name=output_path.stem) 242 | task.connect_configuration(config) 243 | # Log hyper parameters 244 | hyper_params = [ 245 | "model", 246 | "batch_size", 247 | "momentum", 248 | "weight_decay", 249 | "num_epochs", 250 | "learning_rate", 251 | "num_warmup_epochs", 252 | ] 253 | task.connect({k: v for k, v in config.items()}) 254 | 255 | 256 | def log_basic_info(logger, config): 257 | logger.info(f"Train on CIFAR10") 258 | logger.info(f"- PyTorch version: {torch.__version__}") 259 | logger.info(f"- Ignite version: {ignite.__version__}") 260 | if torch.cuda.is_available(): 261 | # explicitly import cudnn as torch.backends.cudnn can not be pickled with hvd spawning procs 262 | from torch.backends import cudnn 263 | 264 | logger.info( 265 | f"- GPU Device: {torch.cuda.get_device_name(idist.get_local_rank())}" 266 | ) 267 | logger.info(f"- CUDA version: {torch.version.cuda}") 268 | logger.info(f"- CUDNN version: {cudnn.version()}") 269 | 270 | logger.info("\n") 271 | logger.info("Configuration:") 272 | for key, value in config.items(): 273 | logger.info(f"\t{key}: {value}") 274 | logger.info("\n") 275 | 276 | if idist.get_world_size() > 1: 277 | logger.info("\nDistributed setting:") 278 | logger.info(f"\tbackend: {idist.backend()}") 279 | logger.info(f"\tworld size: {idist.get_world_size()}") 280 | logger.info("\n") 281 | 282 | 283 | def log_metrics(logger, epoch, elapsed, tag, metrics): 284 | metrics_output = "\n".join([f"\t{k}: {v}" for k, v in metrics.items()]) 285 | logger.info( 286 | f"\nEpoch {epoch} - Evaluation time (seconds): {elapsed:.2f} - {tag} metrics:\n {metrics_output}" 287 | ) 288 | 289 | 290 | def training(local_rank, config): 291 | 292 | rank = idist.get_rank() 293 | manual_seed(config["seed"] + rank) 294 | 295 | logger = setup_logger(name="CIFAR10-Training") 296 | log_basic_info(logger, config) 297 | 298 | if rank == 0: 299 | setup_rank_zero(logger, config) 300 | 301 | train_loader, val_loader = get_dataflow(config) 302 | model = get_model(config) 303 | optimizer = get_optimizer(config, model) 304 | criterion = get_criterion() 305 | config["num_iters_per_epoch"] = len(train_loader) 306 | lr_scheduler = get_lr_scheduler(config, optimizer) 307 | 308 | trainer = create_trainer( 309 | model, optimizer, criterion, lr_scheduler, train_loader.sampler, config, logger 310 | ) 311 | 312 | metrics = { 313 | "Accuracy": Accuracy(), 314 | "Loss": Loss(criterion), 315 | } 316 | 317 | train_evaluator = create_evaluator(model, metrics, config) 318 | val_evaluator = create_evaluator(model, metrics, config) 319 | 320 | def run_validation(engine): 321 | epoch = trainer.state.epoch 322 | state = train_evaluator.run(train_loader) 323 | log_metrics(logger, epoch, state.times["COMPLETED"], "train", state.metrics) 324 | state = val_evaluator.run(val_loader) 325 | log_metrics(logger, epoch, state.times["COMPLETED"], "val", state.metrics) 326 | 327 | trainer.add_event_handler( 328 | Events.EPOCH_COMPLETED(every=config["validate_every"]) | Events.COMPLETED, 329 | run_validation, 330 | ) 331 | 332 | if rank == 0: 333 | evaluators = {"train": train_evaluator, "val": val_evaluator} 334 | tb_logger = common.setup_tb_logging( 335 | config["output_path"], trainer, optimizer, evaluators=evaluators 336 | ) 337 | 338 | best_model_handler = Checkpoint( 339 | {"model": model}, 340 | get_save_handler(config), 341 | filename_prefix="best", 342 | n_saved=2, 343 | global_step_transform=global_step_from_engine(trainer), 344 | score_name="val_accuracy", 345 | score_function=Checkpoint.get_default_score_fn("Accuracy"), 346 | ) 347 | val_evaluator.add_event_handler( 348 | Events.COMPLETED, 349 | best_model_handler, 350 | ) 351 | 352 | try: 353 | trainer.run(train_loader, max_epochs=config["num_epochs"]) 354 | except Exception as e: 355 | logger.exception("") 356 | raise e 357 | 358 | if rank == 0: 359 | tb_logger.close() 360 | 361 | 362 | def run(backend=None, **spawn_kwargs): 363 | config["backend"] = backend 364 | 365 | with idist.Parallel(backend=config["backend"], **spawn_kwargs) as parallel: 366 | parallel.run(training, config) 367 | 368 | 369 | if __name__ == "__main__": 370 | fire.Fire({"run": run}) 371 | --------------------------------------------------------------------------------