├── README.md ├── chapter_appendix ├── aws.ipynb ├── buy-gpu.ipynb ├── d2l.ipynb ├── how-to-contribute.ipynb ├── index.ipynb ├── jupyter.ipynb ├── math.ipynb └── notation.ipynb ├── chapter_attention-mechanism ├── attention.ipynb ├── index.ipynb ├── seq2seq-attention.ipynb └── transformer.ipynb ├── chapter_computational-performance ├── async-computation.ipynb ├── auto-parallelism.ipynb ├── hybridize.ipynb ├── index.ipynb ├── multiple-gpus-gluon.ipynb └── multiple-gpus.ipynb ├── chapter_computer-vision ├── anchor.ipynb ├── bounding-box.ipynb ├── fcn.ipynb ├── fine-tuning.ipynb ├── image-augmentation.ipynb ├── index.ipynb ├── kaggle-gluon-cifar10.ipynb ├── kaggle-gluon-dog.ipynb ├── multiscale-object-detection.ipynb ├── neural-style.ipynb ├── object-detection-dataset.ipynb ├── rcnn.ipynb ├── semantic-segmentation-and-dataset.ipynb └── ssd.ipynb ├── chapter_convolutional-modern ├── alexnet.ipynb ├── batch-norm.ipynb ├── densenet.ipynb ├── googlenet.ipynb ├── index.ipynb ├── nin.ipynb ├── resnet.ipynb └── vgg.ipynb ├── chapter_convolutional-neural-networks ├── channels.ipynb ├── conv-layer.ipynb ├── index.ipynb ├── lenet.ipynb ├── padding-and-strides.ipynb ├── pooling.ipynb └── why-conv.ipynb ├── chapter_crashcourse ├── autograd.ipynb ├── index.ipynb ├── linear-algebra.ipynb ├── lookup-api.ipynb ├── naive-bayes.ipynb ├── ndarray.ipynb └── probability.ipynb ├── chapter_deep-learning-computation ├── custom-layer.ipynb ├── deferred-init.ipynb ├── index.ipynb ├── model-construction.ipynb ├── parameters.ipynb ├── read-write.ipynb └── use-gpu.ipynb ├── chapter_install └── install.ipynb ├── chapter_introduction └── intro.ipynb ├── chapter_linear-networks ├── fashion-mnist.ipynb ├── index.ipynb ├── linear-regression-gluon.ipynb ├── linear-regression-scratch.ipynb ├── linear-regression.ipynb ├── softmax-regression-gluon.ipynb ├── softmax-regression-scratch.ipynb └── softmax-regression.ipynb ├── chapter_multilayer-perceptrons ├── backprop.ipynb ├── dropout.ipynb ├── environment.ipynb ├── index.ipynb ├── kaggle-house-price.ipynb ├── mlp-gluon.ipynb ├── mlp-scratch.ipynb ├── mlp.ipynb ├── numerical-stability-and-init.ipynb ├── underfit-overfit.ipynb └── weight-decay.ipynb ├── chapter_natural-language-processing ├── approx-training.ipynb ├── fasttext.ipynb ├── glove.ipynb ├── index.ipynb ├── sentiment-analysis-cnn.ipynb ├── sentiment-analysis-rnn.ipynb ├── similarity-analogy.ipynb ├── word2vec-gluon.ipynb └── word2vec.ipynb ├── chapter_optimization ├── adadelta.ipynb ├── adagrad.ipynb ├── adam.ipynb ├── gd-sgd.ipynb ├── index.ipynb ├── minibatch-sgd.ipynb ├── momentum.ipynb ├── optimization-intro.ipynb └── rmsprop.ipynb ├── chapter_preface └── preface.ipynb ├── chapter_recurrent-neural-networks ├── beam-search.ipynb ├── bi-rnn.ipynb ├── bptt.ipynb ├── deep-rnn.ipynb ├── encoder-decoder.ipynb ├── gru.ipynb ├── index.ipynb ├── lang-model-dataset.ipynb ├── lang-model.ipynb ├── lstm.ipynb ├── machine-translation.ipynb ├── rnn-gluon.ipynb ├── rnn-scratch.ipynb ├── rnn.ipynb ├── seq2seq.ipynb └── sequence.ipynb ├── contrib ├── appendix │ └── use_sagemaker.ipynb └── chapter_crashcourse │ ├── chapter-one-problem-set.ipynb │ └── linear-algebra.ipynb ├── data ├── airfoil_self_noise.dat ├── fr-en-small.txt ├── jaychou_lyrics.txt.zip ├── kaggle_cifar10 │ ├── test_tiny.zip │ ├── trainLabels.csv.zip │ └── train_tiny.zip ├── kaggle_dog │ └── train_valid_test_tiny.zip ├── kaggle_house_pred_test.csv ├── kaggle_house_pred_train.csv ├── ptb.zip └── timemachine.txt ├── environment.yml └── img ├── 404.jpg ├── DenseNetDense.png ├── Neuron.png ├── ResNetBlock.png ├── ResNetFull.png ├── ResNetManyFlavor.png ├── alexnet-all.png ├── anchor-label.png ├── attention.png ├── autumn_oak.jpg ├── aws.png ├── beam_search.png ├── birnn.png ├── blocks.png ├── book-org.png ├── capacity_vs_error.png ├── cat-cartoon1.png ├── cat-cartoon2.png ├── cat1.jpg ├── cat1.png ├── cat2.jpg ├── cat3.jpg ├── catdog.jpg ├── cbow.png ├── cifar10.png ├── comp-comm.png ├── connect.png ├── contrib01.png ├── contrib02.png ├── contrib03.png ├── contrib04.png ├── contrib05.png ├── contrib06.png ├── contribute.png ├── conv1d-2d.png ├── conv1d-channel.png ├── conv1d.png ├── conv_1x1.png ├── conv_multi_in.png ├── conv_pad.png ├── conv_stride.png ├── convert.sh ├── copyto.png ├── correlation.png ├── cuda.png ├── data-collection.png ├── data-parallel.png ├── death_cap.jpg ├── deep-rnn.png ├── deeplearning_amazon.png ├── dense-rnn.png ├── densenet.png ├── disk.png ├── dog-cartoon1.png ├── dog-cartoon2.jpg ├── dog-cartoon2.png ├── dog1.jpg ├── dog2.jpg ├── dog_hotdog.jpg ├── dogdogcat.png ├── dropout.png ├── dropout2.png ├── ec2.png ├── encoder-decoder.png ├── fast-rcnn.png ├── faster-rcnn.png ├── fcn.png ├── filters.png ├── finetune.png ├── flopsvsprice.png ├── forward.png ├── ftse100.png ├── functionclasses.png ├── git-clone.png ├── git-createpr.png ├── git-fork.png ├── git-forked.png ├── git-newpr.png ├── gru_1.png ├── gru_2.png ├── gru_3.png ├── gtx.png ├── hi-softmax.png ├── hmm.png ├── house_pricing.png ├── inception-full.png ├── inception.png ├── intersect.png ├── iou.png ├── jupyter.png ├── jupyter00.png ├── jupyter01.png ├── jupyter02.png ├── jupyter03.png ├── jupyter04.png ├── jupyter05.png ├── jupyter06.png ├── kaggle-dog.png ├── kaggle.png ├── kaggle_cifar10.png ├── kaggle_submit2.png ├── keypair.png ├── koebel.jpg ├── launching.png ├── lenet-vert.png ├── lenet.png ├── limits.png ├── linearregression.png ├── linreg.png ├── lstm_0.png ├── lstm_1.png ├── lstm_2.png ├── lstm_3.png ├── mask-rcnn.png ├── ml-loop.png ├── mlp.png ├── multi-head-attention.png ├── mxnet-website.png ├── neural-style-1.png ├── neural-style-2.png ├── neural-style.png ├── nin-compare.png ├── nin.png ├── ones_like.png ├── os.png ├── p2x.png ├── pikachu.jpg ├── pooling.png ├── popvssoda.png ├── r-cnn.png ├── rainier.jpg ├── ratbrain.jpg ├── residual-block.png ├── residual-rnn.png ├── rl-environment.png ├── rnn-bptt.png ├── rnn-train.png ├── rnn.png ├── roi.png ├── s2s_prob1.png ├── s2s_prob2.png ├── segmentation.png ├── self-attention-predict.png ├── self-attention.png ├── seq2seq-attention-details.png ├── seq2seq-details.png ├── seq2seq.png ├── seq2seq_attention.png ├── seq2seq_predict.png ├── sharks.png ├── singlelayer.png ├── singleneuron.png ├── skip-gram.png ├── softmaxreg.png ├── speech.png ├── ssd.png ├── ssh.png ├── stackedanimals.jpg ├── stopterminate.png ├── style-transfer.png ├── supervised-learning.png ├── taxonomy.jpg ├── textcnn.png ├── timemachine-5gram.png ├── transformer.png ├── truncated-bptt.png ├── vgg.png ├── wake-word.png ├── waldo-mask.jpg ├── waldo.jpg ├── wattvsprice.png ├── where-wally-walker-books.jpg ├── whitecat10.jpg ├── whitecat160.jpg ├── whitecat20.jpg ├── whitecat320.jpg ├── whitecat40.jpg ├── whitecat80.jpg ├── whitedog.jpg ├── whitedog10.jpg ├── whitedog160.jpg ├── whitedog20.jpg ├── whitedog320.jpg ├── whitedog40.jpg └── whitedog80.jpg /README.md: -------------------------------------------------------------------------------- 1 | # Notebooks 2 | 3 | This repo is for hosting .ipynb notebooks in [d2l-en](https://github.com/d2l-ai/d2l-en). These notebooks are modified to run on cloud, such as Amazon Sagemaker and Google Colab. For example, you can allocate a free GPU instance and execute the notebook on Google colab by clicking the following link: 4 | 5 | https://colab.research.google.com/github/d2l-ai/notebooks/blob/master/chapter_convolutional-neural-networks/lenet.ipynb 6 | 7 | Note: please don't modify the contents in this repo except for README.md, which are automatically generated. 8 | 9 | Also this feature is still experimental, there are known bugs. 10 | 11 | 12 | -------------------------------------------------------------------------------- /chapter_appendix/d2l.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# `d2l` API Document\n", 8 | "\n", 9 | "## Basic and Plotting" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "```eval_rst\n", 17 | "\n", 18 | ".. automodule:: d2l.base\n", 19 | " :members:\n", 20 | "\n", 21 | ".. automodule:: d2l.figure\n", 22 | " :members:\n", 23 | "\n", 24 | "```\n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## Loading Data" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "```eval_rst\n", 39 | "\n", 40 | ".. automodule:: d2l.data\n", 41 | " :members:\n", 42 | "\n", 43 | ".. automodule:: d2l.data.base\n", 44 | " :members:\n", 45 | "\n", 46 | ".. automodule:: d2l.data.fashion_mnist\n", 47 | " :members:\n", 48 | "\n", 49 | ".. automodule:: d2l.data.imdb\n", 50 | " :members:\n", 51 | "\n", 52 | ".. automodule:: d2l.data.pikachu\n", 53 | " :members:\n", 54 | "\n", 55 | ".. automodule:: d2l.data.voc\n", 56 | " :members:\n", 57 | "\n", 58 | "```\n" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "## Building Neural Networks" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "```eval_rst\n", 73 | "\n", 74 | ".. automodule:: d2l.model\n", 75 | " :members:\n", 76 | "\n", 77 | "```\n" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "## Training" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "```eval_rst\n", 92 | "\n", 93 | ".. automodule:: d2l.train\n", 94 | " :members:\n", 95 | "\n", 96 | "```\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "## Predicting" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "```eval_rst\n", 111 | "\n", 112 | ".. automodule:: d2l.predict\n", 113 | " :members:\n", 114 | "\n", 115 | "```\n" 116 | ] 117 | } 118 | ], 119 | "metadata": { 120 | "accelerator": "GPU", 121 | "language_info": { 122 | "name": "python" 123 | } 124 | }, 125 | "nbformat": 4, 126 | "nbformat_minor": 2 127 | } -------------------------------------------------------------------------------- /chapter_appendix/how-to-contribute.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to Contribute to This Book\n", 8 | "\n", 9 | "Contributions by readers [1] help us improve this book. If you find a typo, an outdated link, something where you think we missed a citation, where the code doesn't look elegant or where an explanation is unclear, please contribute back and help us help our readers. While in regular books the delay between print runs (and thus between typo corrections) can be measured in years, it typically takes hours to days to incorporate an improvement in this book. This is all possible due to version control and continuous integration testing. To do so you need to install Git and submit a pull request [2] to the GitHub repository. When your pull request is merged into the code repository by the author, you will become a contributor. In a nutshell the process works as described in the diagram below. \n", 10 | "\n", 11 | "\"Contributing\n", 12 | "\n", 13 | "## From Reader to Contributor in 6 Steps\n", 14 | "\n", 15 | "We will walk you through the steps in detail. If you are already familiar with Git you can skip this section. For concreteness we assume that the contributor's user name is `smolix`. \n", 16 | "\n", 17 | "### Install Git\n", 18 | "\n", 19 | "The Git open source book [3] describes how to install Git. This typically works via `apt install git` on Ubuntu Linux, by installing the Xcode developer tools on macOS, or by using GitHub's [desktop client](https://desktop.github.com). If you don't have a GitHub account, you need to sign up for one [4].\n", 20 | "\n", 21 | "### Log in to GitHub\n", 22 | "\n", 23 | "Enter the address of the book's code repository in your browser [2]. Click on the `Fork` button in the red box at the top-right of the figure below, to make a copy of the repository of this book. This is now *your copy* and you can change it any way you want. \n", 24 | "\n", 25 | "![The code repository page.](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/git-fork.png)\n", 26 | "\n", 27 | "Now, the code repository of this book will be copied to your username, such as `smolix/d2l-en` shown at the top-left of the screenshot below.\n", 28 | "\n", 29 | "![Copy the code repository.](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/git-forked.png)\n", 30 | "\n", 31 | "### Clone the Repository\n", 32 | "\n", 33 | "To clone the repository (i.e. to make a local copy) we need to get its repository address. The green button on the picture below displays this. Make sure that your local copy is up to date with the main repository if you decide to keep this fork around for longer. For now simply follow the instructions in the [Installation](../chapter_prerequisite/install.md) section to get started. The main difference is that you're now downloading *your own fork* of the repository. \n", 34 | "\n", 35 | "![Git clone.](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/git-clone.png)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "```\n", 43 | "# Replace your_github_username with your GitHub username\n", 44 | "git clone https://github.com/your_github_username/d2l-en.git\n", 45 | "```\n" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "On Unix the above command copies all the code from GitHub to the directory `d2l-en`. \n", 53 | "\n", 54 | "### Edit the Book and Push\n", 55 | "\n", 56 | "Now it's time to edit the book. It's best to edit the notebooks in Jupyter following the [instructions](../chapter_appendix/jupyter.md) in the appendix. Make the changes and check that they're OK. Assume we have modified a typo in the file `~/d2l-en/chapter_appendix/how-to-contribute.md`. \n", 57 | "You can then check which files you have changed:" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "```\n", 65 | "git status\n", 66 | "```\n" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "At this point Git will prompt that the `chapter_appendix/how-to-contribute.md` file has been modified." 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "```\n", 81 | "mylaptop:d2l-en smola$ git status\n", 82 | "On branch master\n", 83 | "Your branch is up-to-date with 'origin/master'.\n", 84 | "\n", 85 | "Changes not staged for commit:\n", 86 | " (use \"git add ...\" to update what will be committed)\n", 87 | " (use \"git checkout -- ...\" to discard changes in working directory)\n", 88 | "\n", 89 | "\tmodified: chapter_appendix/how-to-contribute.md\n", 90 | "```\n" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "After confirming that this is what you want, execute the following command:" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "```\n", 105 | "git add chapter_appendix/how-to-contribute.md\n", 106 | "git commit -m 'fix typo in git documentation'\n", 107 | "git push\n", 108 | "```\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "The changed code will then be in your personal fork of the repository. To request the addition of your change, you have to create a pull request for the official repository of the book.\n", 116 | "\n", 117 | "### Pull Request\n", 118 | "\n", 119 | "Go to your fork of the repository on GitHub and select \"New pull request\". This will open up a screen that shows you the changes between your edits and what is current in the main repository of the book. \n", 120 | "\n", 121 | "![Pull Request.](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/git-newpr.png)\n", 122 | "\n", 123 | "\n", 124 | "### Submit Pull Request\n", 125 | "\n", 126 | "Finally, submit a pull request. Make sure to describe the changes you have made in the pull request. This will make it easier for the authors to review it and to merge it with the book. Depending on the changes, this might get accepted right away, rejected, or more likely, you'll get some feedback on the changes. Once you've incorporated them, you're good to go. \n", 127 | "\n", 128 | "![Create Pull Request.](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/git-createpr.png)\n", 129 | "\n", 130 | "Your pull request will appear among the list of requests in the main repository. We will make every effort to process it quickly. \n", 131 | "\n", 132 | "## Summary\n", 133 | "\n", 134 | "* You can use GitHub to contribute to this book.\n", 135 | "* Forking a repositoy is the first step to contributing, since it allows you to edit things locally and only contribute back once you're ready.\n", 136 | "* Pull requests are how contributions are being bundled up. Try not to submit huge pull requests since this makes them hard to understand and incorporate. Better send several smaller ones. \n", 137 | "\n", 138 | "## Exercises\n", 139 | "\n", 140 | "1. Star and fork the `d2l-en` repository. \n", 141 | "1. Find some code that needs improvement and submit a pull request.\n", 142 | "1. Find a reference that we missed and submit a pull request. \n", 143 | "\n", 144 | "\n", 145 | "## References\n", 146 | "\n", 147 | "[1] List of contributors to this book. https://github.com/d2l-ai/d2l-en/graphs/contributors\n", 148 | "\n", 149 | "[2] Address of the code repository of this book. https://github.com/d2l-ai/d2l-en\n", 150 | "\n", 151 | "[3] Install Git. https://git-scm.com/book/zh/v2\n", 152 | "\n", 153 | "[4] URL of GitHub. https://github.com/\n", 154 | "\n", 155 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2401)\n", 156 | "\n", 157 | "\"\"" 158 | ] 159 | } 160 | ], 161 | "metadata": { 162 | "accelerator": "GPU", 163 | "language_info": { 164 | "name": "python" 165 | } 166 | }, 167 | "nbformat": 4, 168 | "nbformat_minor": 2 169 | } -------------------------------------------------------------------------------- /chapter_appendix/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Appendix" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "```eval_rst\n", 15 | "\n", 16 | ".. toctree::\n", 17 | " :maxdepth: 2\n", 18 | "\n", 19 | " notation\n", 20 | " math\n", 21 | " jupyter\n", 22 | " aws\n", 23 | " buy-gpu\n", 24 | " how-to-contribute\n", 25 | " d2l\n", 26 | "```\n" 27 | ] 28 | } 29 | ], 30 | "metadata": { 31 | "accelerator": "GPU", 32 | "language_info": { 33 | "name": "python" 34 | } 35 | }, 36 | "nbformat": 4, 37 | "nbformat_minor": 2 38 | } -------------------------------------------------------------------------------- /chapter_appendix/jupyter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using Jupyter\n", 8 | "\n", 9 | "This section describes how to edit and run the code in the chapters of this book using Jupyter Notebooks. Make sure you have Jupyter installed and downloaded the code as described in the [Installation](../chapter_install/install.md) section. If you want to know more about Jupyter see the excellent tutorial in the [Documentation](https://jupyter.readthedocs.io/en/latest/). \n", 10 | "\n", 11 | "\n", 12 | "## Edit and Run the Code Locally\n", 13 | "\n", 14 | "Suppose that the local path of code of the book is \"xx/yy/d2l-en/\". Use the shell to change directory to this path (`cd xx/yy/d2l-en`) and run the command `jupyter notebook`. If your browser doesn't do this automatically, open http://localhost:8888 and you will see the interface of Jupyter and all the folders containing the code of the book, as shown in Figure 14.1.\n", 15 | "\n", 16 | "![The folders containing the code in this book. ](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/jupyter00.png)\n", 17 | "\n", 18 | "You can access the notebook files by clicking on the folder displayed on the webpage. They usually have the suffix `.ipynb`.\n", 19 | "For the sake of brevity, we create a temporary `test.ipynb` file. The content displayed after you click it is as shown in Figure 14.2. This notebook includes a markdown cell and a code cell. The content in the markdown cell includes \"This is A Title\" and \"This is text\". The code cell contains two lines of Python code.\n", 20 | "\n", 21 | "![Markdown and code cells in the \"text.ipynb\" file. ](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/jupyter01.png)\n", 22 | "\n", 23 | "\n", 24 | "Double click on the markdown cell to enter edit mode. Add a new text string \"Hello world.\" at the end of the cell, as shown in Figure 14.3.\n", 25 | "\n", 26 | "![Edit the markdown cell. ](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/jupyter02.png)\n", 27 | "\n", 28 | "\n", 29 | "As shown in Figure 14.4, click \"Cell\" $\\rightarrow$ \"Run Cells\" in the menu bar to run the edited cell.\n", 30 | "\n", 31 | "![Run the cell. ](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/jupyter03.png)\n", 32 | "\n", 33 | "\n", 34 | "After running, the markdown cell is as shown in Figure 14.5.\n", 35 | "\n", 36 | "![The markdown cell after editing. ](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/jupyter04.png)\n", 37 | "\n", 38 | "\n", 39 | "Next, click on the code cell. Multiply the elements by 2 after the last line of code, as shown in Figure 14.6.\n", 40 | "\n", 41 | "![Edit the code cell. ](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/jupyter05.png)\n", 42 | "\n", 43 | "You can also run the cell with a shortcut (\"Ctrl + Enter\" by default) and obtain the output result from Figure 14.7.\n", 44 | "\n", 45 | "![Run the code cell to obtain the output. ](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/jupyter06.png)\n", 46 | "\n", 47 | "When a notebook contains more cells, we can click \"Kernel\" $\\rightarrow$ \"Restart & Run All\" in the menu bar to run all the cells in the entire notebook. By clicking \"Help\" $\\rightarrow$ \"Edit Keyboard Shortcuts\" in the menu bar, you can edit the shortcuts according to your preferences.\n", 48 | "\n", 49 | "\n", 50 | "## Advanced Options\n", 51 | "\n", 52 | "Beyond local editing there are two things that are quite important: editing the notebooks in markdown format and running Jupyter remotely. The latter matters when we want to run the code on a faster server. The former matters since Jupyter's native .ipnyb format stores a lot of auxiliary data that isn't really specific to what is in the notebooks, mostly related to how and where the code is run. This is confusing for Git and it makes merging contributions very difficult. Fortunately there's an alternative - native editing in Markdown. \n", 53 | "\n", 54 | "### Markdown Files in Jupyter \n", 55 | "\n", 56 | "If you wish to contribute to the content of this book, you need to modify the source file (.md file, not .ipynb file) on GitHub. Using the notedown plugin we can modify notebooks in .md format directly in Jupyter. Linux/MacOS users can execute the following commands to obtain the GitHub source files and activate the runtime environment. If you haven't done so already, install the environment needed for MXNet Gluon." 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "```\n", 64 | "git clone https://github.com/d2l-ai/d2l-en.git\n", 65 | "cd d2l-en \n", 66 | "sed -i 's/mxnet/mxnet-cu100/g' environment.yml # Only use this if you have a GPU\n", 67 | "conda env create -f environment.yml\n", 68 | "source activate gluon # Windows users run \"activate gluon\"\n", 69 | "```\n" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "Next, install the notedown plugin, run Jupyter Notebook, and load the plugin:" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "```\n", 84 | "pip install https://github.com/mli/notedown/tarball/master\n", 85 | "jupyter notebook --NotebookApp.contents_manager_class='notedown.NotedownContentsManager'\n", 86 | "```\n" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "To turn on the notedown plugin by default whenever you run Jupyter Notebook do the following: \n", 94 | "First, generate a Jupyter Notebook configuration file (if it has already been generated, you can skip this step)." 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "```\n", 102 | "jupyter notebook --generate-config\n", 103 | "```\n" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "Then, add the following line to the end of the Jupyter Notebook configuration file (for Linux/macOS, usually in the path `~/.jupyter/jupyter_notebook_config.py`):" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "```\n", 118 | "c.NotebookApp.contents_manager_class = 'notedown.NotedownContentsManager'\n", 119 | "```\n" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "After that, you only need to run the `jupyter notebook` command to turn on the notedown plugin by default.\n", 127 | "\n", 128 | "\n", 129 | "### Run Jupyter Notebook on a Remote Server\n", 130 | "\n", 131 | "Sometimes, you may want to run Jupyter Notebook on a remote server and access it through a browser on your local computer. If Linux or MacOS is installed on your local machine (Windows can also support this function through third-party software such as PuTTY), you can use port forwarding:" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "```\n", 139 | "ssh myserver -L 8888:localhost:8888\n", 140 | "```\n" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "The above is the address of the remote server `myserver`. Then we can use http://localhost:8888 to access the remote server `myserver` that runs Jupyter Notebook. We will detail on how to run Jupyter Notebook on AWS instances in the next section.\n", 148 | "\n", 149 | "### Timing\n", 150 | "\n", 151 | "We can use the `ExecuteTime` plugin to time the execution of each code cell in a Jupyter Notebook. Use the following commands to install the plugin:" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "```\n", 159 | "pip install jupyter_contrib_nbextensions\n", 160 | "jupyter contrib nbextension install --user\n", 161 | "jupyter nbextension enable execute_time/ExecuteTime\n", 162 | "```\n" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## Summary\n", 170 | "\n", 171 | "* To edit the book chapters you need to activate markdown format in Jupyter.\n", 172 | "* You can run servers remotely using port forwarding.\n", 173 | "\n", 174 | "## Exercises\n", 175 | "\n", 176 | "1. Try to edit and run the code in this book locally.\n", 177 | "1. Try to edit and run the code in this book *remotely* via port forwarding.\n", 178 | "1. Measure $\\mathbf{A}^\\top \\mathbf{B}$ vs. $\\mathbf{A} \\mathbf{B}$ for two square matrices in $\\mathbb{R}^{1024 \\times 1024}$. Which one is faster?\n", 179 | "\n", 180 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2398)\n", 181 | "\n", 182 | "\"\"" 183 | ] 184 | } 185 | ], 186 | "metadata": { 187 | "accelerator": "GPU", 188 | "language_info": { 189 | "name": "python" 190 | } 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 2 194 | } -------------------------------------------------------------------------------- /chapter_appendix/notation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# List of Main Symbols\n", 8 | "\n", 9 | "The main symbols used in this book are listed below.\n", 10 | "\n", 11 | "## Numbers\n", 12 | "\n", 13 | "|Symbol |Type |\n", 14 | "|------------|------|\n", 15 | "|$x$ |Scalar|\n", 16 | "|$\\mathbf{x}$|Vector|\n", 17 | "|$\\mathbf{X}$|Matrix|\n", 18 | "|$\\mathsf{X}$|Tensor|\n", 19 | "\n", 20 | "## Sets\n", 21 | "\n", 22 | "|Symbol |Type |\n", 23 | "|-------------------------|----------------------------------------------------|\n", 24 | "|$\\mathcal{X}$ |Set |\n", 25 | "|$\\mathbb{R}$ |Real numbers |\n", 26 | "|$\\mathbb{R}^n$ |Vectors of real numbers in $n$ dimensions |\n", 27 | "|$\\mathbb{R}^{a \\times b}$|Matrix of real numbers with $a$ rows and $b$ columns|\n", 28 | "\n", 29 | "\n", 30 | "## Operators\n", 31 | "\n", 32 | "|Symbol |Type |\n", 33 | "|-------------------------|-------------------------------------------|\n", 34 | "|$\\mathbf{(\\cdot)}^\\top$ |Vector or matrix transposition |\n", 35 | "|$\\odot$ |Element-wise multiplication |\n", 36 | "|$\\lvert\\mathcal{X}\\rvert$|Cardinality (number of elements) of the set $\\mathcal{X}$|\n", 37 | "|$\\|\\cdot\\|_p$ |$L_p$ norm |\n", 38 | "|$\\|\\cdot\\|$ |$L_2$ norm |\n", 39 | "|$\\sum$ |Series addition |\n", 40 | "|$\\prod$ |Series multiplication |\n", 41 | "\n", 42 | "\n", 43 | "## Functions\n", 44 | "\n", 45 | "|Symbol |Type |\n", 46 | "|-------------|----------------------------|\n", 47 | "|$f(\\cdot)$ |Function |\n", 48 | "|$\\log(\\cdot)$|Natural logarithm|\n", 49 | "|$\\exp(\\cdot)$|Exponential function |\n", 50 | "\n", 51 | "## Derivatives and Gradients\n", 52 | "\n", 53 | "|Symbol |Type |\n", 54 | "|---------------------------------|---------------------------------------------|\n", 55 | "| $\\frac{dy}{dx}$ |Derivative of $y$ with respect to $x$ |\n", 56 | "| $\\partial_{x} {y}$ |Partial derivative of $y$ with respect to $x$|\n", 57 | "| $\\nabla_{\\mathbf{x}} y$ |Gradient of $y$ with respect to $\\mathbf{x}$ |\n", 58 | "\n", 59 | "## Probability and Statistics\n", 60 | "\n", 61 | "|Symbol |Type |\n", 62 | "|-----------------------------------------|---------------------------------------------------------|\n", 63 | "|$\\Pr(\\cdot)$ | Probability distribution |\n", 64 | "|$z \\sim \\Pr$ | Random variable $z$ obeys the probability distribution $\\Pr$ |\n", 65 | "|$\\Pr(x|y)$ | Conditional probability of $x|y$ |\n", 66 | "|${\\mathbf{E}}_{x} [f(x)]$| Expectation of $f$ with respect to $x$ |\n", 67 | "\n", 68 | "## Complexity\n", 69 | "\n", 70 | "|Symbol |Type |\n", 71 | "|-------------|--------------|\n", 72 | "|$\\mathcal{O}$|Big O notation|\n", 73 | "|$\\mathcal{o}$|Little o notation (grows much more slowly than)|" 74 | ] 75 | } 76 | ], 77 | "metadata": { 78 | "accelerator": "GPU", 79 | "language_info": { 80 | "name": "python" 81 | } 82 | }, 83 | "nbformat": 4, 84 | "nbformat_minor": 2 85 | } -------------------------------------------------------------------------------- /chapter_attention-mechanism/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Attention Mechanism" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "```eval_rst\n", 15 | "\n", 16 | ".. toctree::\n", 17 | " :maxdepth: 2\n", 18 | "\n", 19 | " attention\n", 20 | " seq2seq-attention\n", 21 | " transformer\n", 22 | "\n", 23 | "```\n" 24 | ] 25 | } 26 | ], 27 | "metadata": { 28 | "accelerator": "GPU", 29 | "language_info": { 30 | "name": "python" 31 | } 32 | }, 33 | "nbformat": 4, 34 | "nbformat_minor": 2 35 | } -------------------------------------------------------------------------------- /chapter_attention-mechanism/seq2seq-attention.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Sequence to Sequence with Attention Mechanism\n", 8 | "\n", 9 | "In this section, we add the attention mechanism to the sequence to sequence model introduced in the [\"Sequence to Sequence\"](../chapter_recurrent-neural-networks/seq2seq.md) section to explicitly select state. The following figure shows the model architecture for a decoding time step. As can be seen, the memory of the attention layer consists of the encoder outputs of each time step. During decoding, the decoder output from the previous time step is used as the query, the attention output is then fed into the decoder with the input to provide attentional context information. \n", 10 | "\n", 11 | "\"The\n", 12 | "\n", 13 | "The layer structure in the encoder and the decoder is shown in the following figure. \n", 14 | "\n", 15 | "\"\"" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": { 22 | "attributes": { 23 | "classes": [], 24 | "id": "", 25 | "n": "1" 26 | } 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "# Install dependencies before importing\n", 31 | "!pip install mxnet-cu100\n", 32 | "!pip install d2l\n", 33 | "\n", 34 | "import sys\n", 35 | "sys.path.insert(0, '..')\n", 36 | "\n", 37 | "from mxnet import nd\n", 38 | "from mxnet.gluon import rnn, nn\n", 39 | "import d2l" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "## Decoder\n", 47 | "\n", 48 | "Now let's implement the decoder of this model. We add a MLP attention layer which has the same hidden size as the LSTM layer. The state passed from the encoder to the decoder contains three items: \n", 49 | "- the encoder outputs of all time steps, which are used as the attention layer's memory with identical keys and values\n", 50 | "- the hidden state of the last time step that is used to initialize the encoder's hidden state\n", 51 | "- valid lengths of the decoder inputs so the attention layer will not consider encoder outputs for padding tokens. \n", 52 | "\n", 53 | "In each time step of decoding, we use the output of the last RNN layer as the query for the attention layer. Its output is then concatenated with the input embedding vector to feed into the RNN layer. Despite the RNN layer hidden state also contains history information from decoder, the attention output explicitly selects the encoder outputs that are correlated to the query and suspends other non-correlated information." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 2, 59 | "metadata": { 60 | "attributes": { 61 | "classes": [], 62 | "id": "", 63 | "n": "2" 64 | } 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "class Seq2SeqAttentionDecoder(d2l.Decoder):\n", 69 | " def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,\n", 70 | " dropout=0, **kwargs):\n", 71 | " super(Seq2SeqAttentionDecoder, self).__init__(**kwargs)\n", 72 | " self.attention_cell = d2l.MLPAttention(num_hiddens, dropout)\n", 73 | " self.embedding = nn.Embedding(vocab_size, embed_size)\n", 74 | " self.rnn = rnn.LSTM(num_hiddens, num_layers, dropout=dropout)\n", 75 | " self.dense = nn.Dense(vocab_size, flatten=False)\n", 76 | "\n", 77 | " def init_state(self, enc_outputs, enc_valid_len, *args):\n", 78 | " outputs, hidden_state = enc_outputs\n", 79 | " # Transpose outputs to (batch_size, seq_len, hidden_size) \n", 80 | " return (outputs.swapaxes(0,1), hidden_state, enc_valid_len)\n", 81 | "\n", 82 | " def forward(self, X, state):\n", 83 | " enc_outputs, hidden_state, enc_valid_len = state\n", 84 | " X = self.embedding(X).swapaxes(0, 1)\n", 85 | " outputs = []\n", 86 | " for x in X:\n", 87 | " # query shape: (batch_size, 1, hidden_size)\n", 88 | " query = hidden_state[0][-1].expand_dims(axis=1)\n", 89 | " # context has same shape as query\n", 90 | " context = self.attention_cell(\n", 91 | " query, enc_outputs, enc_outputs, enc_valid_len)\n", 92 | " # concatenate on the feature dimension\n", 93 | " x = nd.concat(context, x.expand_dims(axis=1), dim=-1)\n", 94 | " # reshape x to (1, batch_size, embed_size+hidden_size)\n", 95 | " out, hidden_state = self.rnn(x.swapaxes(0, 1), hidden_state)\n", 96 | " outputs.append(out)\n", 97 | " outputs = self.dense(nd.concat(*outputs, dim=0))\n", 98 | " return outputs.swapaxes(0, 1), [enc_outputs, hidden_state, \n", 99 | " enc_valid_len]" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "Use the same hyper-parameters to create an encoder and decoder as the [\"Sequence to Sequence\"](../chapter_recurrent-neural-networks/seq2seq.md) section, we get the same decoder output shape, but the state structure is changed." 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 3, 112 | "metadata": { 113 | "attributes": { 114 | "classes": [], 115 | "id": "", 116 | "n": "3" 117 | } 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "encoder = d2l.Seq2SeqEncoder(vocab_size=10, embed_size=8,\n", 122 | " num_hiddens=16, num_layers=2)\n", 123 | "encoder.initialize()\n", 124 | "decoder = Seq2SeqAttentionDecoder(vocab_size=10, embed_size=8, \n", 125 | " num_hiddens=16, num_layers=2)\n", 126 | "decoder.initialize()\n", 127 | "X = nd.zeros((4, 7))\n", 128 | "state = decoder.init_state(encoder(X), None)\n", 129 | "out, state = decoder(X, state)\n", 130 | "out.shape, len(state), state[0].shape, len(state[1]), state[1][0].shape" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "## Training\n", 138 | "\n", 139 | "Again, we use the same training hyper-parameters as the [\"Sequence to Sequence\"](../chapter_recurrent-neural-networks/seq2seq.md) section. The training loss is similar to the seq2seq model, because the sequences in the training dataset are relative short. The additional attention layer doesn't lead to a significant different. But due to both attention layer computational overhead and we unroll the time steps in the decoder, this model is much slower than the seq2seq model." 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 5, 145 | "metadata": { 146 | "attributes": { 147 | "classes": [], 148 | "id": "", 149 | "n": "5" 150 | } 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "embed_size, num_hiddens, num_layers, dropout = 32, 32, 2, 0.0\n", 155 | "batch_size, num_examples, max_len = 64, 1e3, 10\n", 156 | "lr, num_epochs, ctx = 0.005, 200, d2l.try_gpu()\n", 157 | "\n", 158 | "src_vocab, tgt_vocab, train_iter = d2l.load_data_nmt(\n", 159 | " batch_size, max_len, num_examples)\n", 160 | "encoder = d2l.Seq2SeqEncoder(\n", 161 | " len(src_vocab), embed_size, num_hiddens, num_layers, dropout)\n", 162 | "decoder = Seq2SeqAttentionDecoder(\n", 163 | " len(tgt_vocab), embed_size, num_hiddens, num_layers, dropout)\n", 164 | "model = d2l.EncoderDecoder(encoder, decoder)\n", 165 | "d2l.train_ch7(model, train_iter, lr, num_epochs, ctx)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "Lastly, we predict several sample examples." 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 6, 178 | "metadata": { 179 | "attributes": { 180 | "classes": [], 181 | "id": "", 182 | "n": "6" 183 | } 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "for sentence in ['Go .', 'Wow !', \"I'm OK .\", 'I won !']:\n", 188 | " print(sentence + ' => ' + d2l.translate_ch7(\n", 189 | " model, sentence, src_vocab, tgt_vocab, max_len, ctx))" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "## Summary" 197 | ] 198 | } 199 | ], 200 | "metadata": { 201 | "accelerator": "GPU", 202 | "language_info": { 203 | "name": "python" 204 | } 205 | }, 206 | "nbformat": 4, 207 | "nbformat_minor": 2 208 | } -------------------------------------------------------------------------------- /chapter_computational-performance/auto-parallelism.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Automatic Parallelism\n", 8 | "\n", 9 | "MXNet automatically constructs computational graphs at the back end. Using a computational graph, the system is aware of all the computational dependencies, and can selectively execute multiple non-interdependent tasks in parallel to improve computing performance. For instance, the first example in the [“Asynchronous Computing”](async-computation.md) section executes `a = nd.ones((1, 2))` and `b = nd.ones((1, 2))` in turn. There is no dependency between these two steps, so the system can choose to execute them in parallel.\n", 10 | "\n", 11 | "Typically, a single operator will use all the computational resources on all CPUs or a single GPU. For example, the `dot` operator will use all threads on all CPUs (even if there are multiple CPU processors on a single machine) or a single GPU. If computational load of each operator is large enough and multiple operators are run in parallel on only on the CPU or a single GPU, then the operations of each operator can only receive a portion of computational resources of CPU or single GPU. Even if these computations can be parallelized, the ultimate increase in computing performance may not be significant. In this section, our discussion of automatic parallel computation mainly focuses on parallel computation using both CPUs and GPUs, as well as the parallelization of computation and communication.\n", 12 | "\n", 13 | "First, import the required packages or modules for experiment in this section. Note that we need at least one GPU to run the experiment in this section." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "# Install dependencies before importing\n", 23 | "!pip install mxnet-cu100\n", 24 | "!pip install d2l\n", 25 | "\n", 26 | "import sys\n", 27 | "sys.path.insert(0, '..')\n", 28 | "\n", 29 | "import d2l\n", 30 | "import mxnet as mx\n", 31 | "from mxnet import nd" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Parallel Computation using CPUs and GPUs\n", 39 | "\n", 40 | "First, we will discuss parallel computation using CPUs and GPUs, for example, when computation in a program occurs both on the CPU and a GPU. First, define the `run` function so that it performs 10 matrix multiplications." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "def run(x):\n", 50 | " return [nd.dot(x, x) for _ in range(10)]" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "Next, create an NDArray on both the CPU and GPU." 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "x_cpu = nd.random.uniform(shape=(2000, 2000))\n", 67 | "x_gpu = nd.random.uniform(shape=(6000, 6000), ctx=mx.gpu(0))" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "Then, use the two NDArrays to run the `run` function on both the CPU and GPU and print the time required." 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "run(x_cpu) # Warm-up begins\n", 84 | "run(x_gpu)\n", 85 | "nd.waitall() # Warm-up ends\n", 86 | "\n", 87 | "with d2l.Benchmark('Run on CPU.'):\n", 88 | " run(x_cpu)\n", 89 | " nd.waitall()\n", 90 | "\n", 91 | "with d2l.Benchmark('Then run on GPU.'):\n", 92 | " run(x_gpu)\n", 93 | " nd.waitall()" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "We remove `nd.waitall()` between the two computing tasks `run(x_cpu)` and `run(x_gpu)` and hope the system can automatically parallel these two tasks." 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "with d2l.Benchmark('Run on both CPU and GPU in parallel.'):\n", 110 | " run(x_cpu)\n", 111 | " run(x_gpu)\n", 112 | " nd.waitall()" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "As we can see, when two computing tasks are executed together, the total execution time is less than the sum of their separate execution times. This means that MXNet can effectively automate parallel computation on CPUs and GPUs.\n", 120 | "\n", 121 | "\n", 122 | "## Parallel Computation of Computing and Communication\n", 123 | "\n", 124 | "In computations that use both the CPU and GPU, we often need to copy data between the CPU and GPU, resulting in data communication. In the example below, we compute on the GPU and then copy the results back to the CPU. We print the GPU computation time and the communication time from the GPU to CPU." 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "def copy_to_cpu(x):\n", 134 | " return [y.copyto(mx.cpu()) for y in x]\n", 135 | "\n", 136 | "with d2l.Benchmark('Run on GPU.'):\n", 137 | " y = run(x_gpu)\n", 138 | " nd.waitall()\n", 139 | "\n", 140 | "with d2l.Benchmark('Then copy to CPU.'):\n", 141 | " copy_to_cpu(y)\n", 142 | " nd.waitall()" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "We remove the `waitall` function between computation and communication and print the total time need to complete both tasks." 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "with d2l.Benchmark('Run and copy in parallel.'):\n", 159 | " y = run(x_gpu)\n", 160 | " copy_to_cpu(y)\n", 161 | " nd.waitall()" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "As we can see, the total time required to perform computation and communication is less than the sum of their separate execution times. It should be noted that this computation and communication task is different from the parallel computation task that simultaneously used the CPU and GPU described earlier in this section. Here, there is a dependency between execution and communication: `y[i]` must be computed before it can be copied to the CPU. Fortunately, the system can copy `y[i-1]` when computing `y[i]` to reduce the total running time of computation and communication.\n", 169 | "\n", 170 | "## Summary\n", 171 | "\n", 172 | "* MXNet can improve computing performance through automatic parallel computation, such as parallel computation using the CPU and GPU and the parallelization of computation and communication.\n", 173 | "\n", 174 | "\n", 175 | "## Exercises\n", 176 | "\n", 177 | "* 10 operations were performed in the `run` function defined in this section. There are no dependencies between them. Design an experiment to see if MXNet will automatically execute them in parallel.\n", 178 | "* Designing computation tasks that include more complex data dependencies, and run experiments to see if MXNet can obtain the correct results and improve computing performance.\n", 179 | "* When the computational load of an operator is small enough, parallel computation on only the CPU or a single GPU may also improve the computing performance. Design an experiment to verify this.\n", 180 | "\n", 181 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2382)\n", 182 | "\n", 183 | "\"\"" 184 | ] 185 | } 186 | ], 187 | "metadata": { 188 | "accelerator": "GPU", 189 | "language_info": { 190 | "name": "python" 191 | } 192 | }, 193 | "nbformat": 4, 194 | "nbformat_minor": 2 195 | } -------------------------------------------------------------------------------- /chapter_computational-performance/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Computational Performance\n", 8 | "\n", 9 | "In deep learning, data sets are usually large and model computation is complex. Therefore, we are always very concerned about computing performance. This chapter will focus on the important factors that affect computing performance: imperative programming, symbolic programming, asynchronous programing, automatic parallel computation, and multi-GPU computation. By studying this chapter, you should be able to further improve the computing performance of the models that have been implemented in the previous chapters, for example, by reducing the model training time without affecting the accuracy of the model." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "```eval_rst\n", 17 | "\n", 18 | ".. toctree::\n", 19 | " :maxdepth: 2\n", 20 | "\n", 21 | " hybridize\n", 22 | " async-computation\n", 23 | " auto-parallelism\n", 24 | " multiple-gpus\n", 25 | " multiple-gpus-gluon\n", 26 | "```\n" 27 | ] 28 | } 29 | ], 30 | "metadata": { 31 | "accelerator": "GPU", 32 | "language_info": { 33 | "name": "python" 34 | } 35 | }, 36 | "nbformat": 4, 37 | "nbformat_minor": 2 38 | } -------------------------------------------------------------------------------- /chapter_computer-vision/bounding-box.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Object Detection and Bounding Boxes\n", 8 | "\n", 9 | "In the previous section, we introduced many models for image classification. In image classification tasks, we assume that there is only one main target in the image and we only focus on how to identify the target category. However, in many situations, there are multiple targets in the image that we are interested in. We not only want to classify them, but also want to obtain their specific positions in the image. In computer vision, we refer to such tasks as object detection (or object detection).\n", 10 | "\n", 11 | "Object detection is widely used in many fields. For example, in self-driving technology, we need to plan routes by identifying the locations of vehicles, pedestrians, roads, and obstacles in the captured video image. Robots often perform this type of task to detect targets of interest. Systems in the security field need to detect abnormal targets, such as intruders or bombs.\n", 12 | "\n", 13 | "In the next few sections, we will introduce multiple deep learning models used for object detection. Before that, we should discuss the concept of target location. First, import the packages and modules required for the experiment." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "# Install dependencies before importing\n", 23 | "!pip install mxnet-cu100\n", 24 | "!pip install d2l\n", 25 | "\n", 26 | "import sys\n", 27 | "sys.path.insert(0, '..')\n", 28 | "\n", 29 | "%matplotlib inline\n", 30 | "import d2l\n", 31 | "from mxnet import image" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "Next, we will load the sample images that will be used in this section. We can see there is a dog on the left side of the image and a cat on the right. They are the two main targets in this image." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "d2l.set_figsize()\n", 48 | "img = image.imread('../img/catdog.jpg').asnumpy()\n", 49 | "d2l.plt.imshow(img); # Add a semicolon to only display the image" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Bounding Box\n", 57 | "\n", 58 | "In object detection, we usually use a bounding box to describe the target location. The bounding box is a rectangular box that can be determined by the $x$ and $y$ axis coordinates in the upper-left corner and the $x$ and $y$ axis coordinates in the lower-right corner of the rectangle. We will define the bounding boxes of the dog and the cat in the image based on the coordinate information in the above image. The origin of the coordinates in the above image is the upper left corner of the image, and to the right and down are the positive directions of the $x$ axis and the $y$ axis, respectively." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 2, 64 | "metadata": { 65 | "attributes": { 66 | "classes": [], 67 | "id": "", 68 | "n": "2" 69 | } 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "# bbox is the abbreviation for bounding box\n", 74 | "dog_bbox, cat_bbox = [60, 45, 378, 516], [400, 112, 655, 493]" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "We can draw the bounding box in the image to check if it is accurate. Before drawing the box, we will define a helper function `bbox_to_rect`. It represents the bounding box in the bounding box format of matplotlib." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 3, 87 | "metadata": { 88 | "attributes": { 89 | "classes": [], 90 | "id": "", 91 | "n": "3" 92 | } 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "# This function has been saved in the d2l package for future use\n", 97 | "def bbox_to_rect(bbox, color):\n", 98 | " # Convert the bounding box (top-left x, top-left y, bottom-right x,\n", 99 | " # bottom-right y) format to matplotlib format: ((upper-left x,\n", 100 | " # upper-left y), width, height)\n", 101 | " return d2l.plt.Rectangle(\n", 102 | " xy=(bbox[0], bbox[1]), width=bbox[2]-bbox[0], height=bbox[3]-bbox[1],\n", 103 | " fill=False, edgecolor=color, linewidth=2)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "After loading the bounding box on the image, we can see that the main outline of the target is basically inside the box." 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "fig = d2l.plt.imshow(img)\n", 120 | "fig.axes.add_patch(bbox_to_rect(dog_bbox, 'blue'))\n", 121 | "fig.axes.add_patch(bbox_to_rect(cat_bbox, 'red'));" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "## Summary\n", 129 | "\n", 130 | "* In object detection, we not only need to identify all the objects of interest in the image, but also their positions. The positions are generally represented by a rectangular bounding box.\n", 131 | "\n", 132 | "## Exercises\n", 133 | "\n", 134 | "* Find some images and try to label a bounding box that contains the target. Compare the difference between the time it takes to label the bounding box and label the category.\n", 135 | "\n", 136 | "\n", 137 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2444)\n", 138 | "\n", 139 | "\"\"" 140 | ] 141 | } 142 | ], 143 | "metadata": { 144 | "accelerator": "GPU", 145 | "language_info": { 146 | "name": "python" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 2 151 | } -------------------------------------------------------------------------------- /chapter_computer-vision/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Computer Vision\n", 8 | "\n", 9 | "Many applications in the area of computer vision are closely related to our daily lives, now and in the future, whether medical diagnostics, driverless vehicles, camera monitoring, or smart filters. In recent years, deep learning technology has greatly enhanced computer vision systems' performance. It can be said that the most advanced computer vision applications are nearly inseparable from deep learning.\n", 10 | "\n", 11 | "We have introduced deep learning models commonly used in the area of computer vision in the chapter \"Convolutional Neural Networks\" and have practiced simple image classification tasks. In this chapter, we will introduce image augmentation and fine tuning methods and apply them to image classification. Then, we will explore various methods of object detection. After that, we will learn how to use fully convolutional networks to perform semantic segmentation on images. Then, we explain how to use style transfer technology to generate images that look like the cover of this book. Finally, we will perform practice exercises on two important computer vision data sets to review the content of this chapter and the previous chapters." 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "```eval_rst\n", 19 | "\n", 20 | ".. toctree::\n", 21 | " :maxdepth: 2\n", 22 | "\n", 23 | " image-augmentation\n", 24 | " fine-tuning\n", 25 | " bounding-box\n", 26 | " anchor\n", 27 | " multiscale-object-detection\n", 28 | " object-detection-dataset\n", 29 | " ssd\n", 30 | " rcnn\n", 31 | " semantic-segmentation-and-dataset\n", 32 | " fcn\n", 33 | " neural-style\n", 34 | " kaggle-gluon-cifar10\n", 35 | " kaggle-gluon-dog\n", 36 | "```\n" 37 | ] 38 | } 39 | ], 40 | "metadata": { 41 | "accelerator": "GPU", 42 | "language_info": { 43 | "name": "python" 44 | } 45 | }, 46 | "nbformat": 4, 47 | "nbformat_minor": 2 48 | } -------------------------------------------------------------------------------- /chapter_computer-vision/object-detection-dataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Object Detection Data Set (Pikachu)\n", 8 | "\n", 9 | "There are no small data sets, like MNIST or Fashion-MNIST, in the object detection field. In order to quickly test models, we are going to assemble a small data set. First, we generate 1000 Pikachu images of different angles and sizes using an open source 3D Pikachu model. Then, we collect a series of background images and place a Pikachu image at a random position on each image. We use the im2rec tool provided by MXNet to convert the images to binary RecordIO format[1]. This format can reduce the storage overhead of the data set on the disk and improve the reading efficiency. If you want to learn more about how to read images, refer to the documentation for the GluonCV Toolkit[2].\n", 10 | "\n", 11 | "\n", 12 | "## Download the Data Set\n", 13 | "\n", 14 | "The Pikachu data set in RecordIO format can be downloaded directly from the Internet. The operation for downloading the data set is defined in the function `_download_pikachu`." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "attributes": { 22 | "classes": [], 23 | "id": "", 24 | "n": "1" 25 | } 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "# Install dependencies before importing\n", 30 | "!pip install mxnet-cu100\n", 31 | "!pip install d2l\n", 32 | "\n", 33 | "import sys\n", 34 | "sys.path.insert(0, '..')\n", 35 | "\n", 36 | "%matplotlib inline\n", 37 | "import d2l\n", 38 | "from mxnet import gluon, image\n", 39 | "from mxnet.gluon import utils as gutils\n", 40 | "import os\n", 41 | "\n", 42 | "def _download_pikachu(data_dir):\n", 43 | " root_url = ('https://apache-mxnet.s3-accelerate.amazonaws.com/'\n", 44 | " 'gluon/dataset/pikachu/')\n", 45 | " dataset = {'train.rec': 'e6bcb6ffba1ac04ff8a9b1115e650af56ee969c8',\n", 46 | " 'train.idx': 'dcf7318b2602c06428b9988470c731621716c393',\n", 47 | " 'val.rec': 'd6c33f799b4d058e82f2cb5bd9a976f69d72d520'}\n", 48 | " for k, v in dataset.items():\n", 49 | " gutils.download(root_url + k, os.path.join(data_dir, k), sha1_hash=v)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Read the Data Set\n", 57 | "\n", 58 | "We are going to read the object detection data set by creating the instance `ImageDetIter`. The \"Det\" in the name refers to Detection. We will read the training data set in random order. Since the format of the data set is RecordIO, we need the image index file `'train.idx'` to read random mini-batches. In addition, for each image of the training set, we will use random cropping and require the cropped image to cover at least 95% of each object. Since the cropping is random, this requirement is not always satisfied. We preset the maximum number of random cropping attempts to 200. If none of them meets the requirement, the image will not be cropped. To ensure the certainty of the output, we will not randomly crop the images in the test data set. We also do not need to read the test data set in random order." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 2, 64 | "metadata": { 65 | "attributes": { 66 | "classes": [], 67 | "id": "", 68 | "n": "2" 69 | } 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "# This function has been saved in the d2l package for future use\n", 74 | "# Edge_size: the width and height of the output image\n", 75 | "def load_data_pikachu(batch_size, edge_size=256):\n", 76 | " data_dir = '../data/pikachu'\n", 77 | " _download_pikachu(data_dir)\n", 78 | " train_iter = image.ImageDetIter(\n", 79 | " path_imgrec=os.path.join(data_dir, 'train.rec'),\n", 80 | " path_imgidx=os.path.join(data_dir, 'train.idx'),\n", 81 | " batch_size=batch_size,\n", 82 | " data_shape=(3, edge_size, edge_size), # The shape of the output image\n", 83 | " shuffle=True, # Read the data set in random order\n", 84 | " rand_crop=1, # The probability of random cropping is 1\n", 85 | " min_object_covered=0.95, max_attempts=200)\n", 86 | " val_iter = image.ImageDetIter(\n", 87 | " path_imgrec=os.path.join(data_dir, 'val.rec'), batch_size=batch_size,\n", 88 | " data_shape=(3, edge_size, edge_size), shuffle=False)\n", 89 | " return train_iter, val_iter" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "Below, we read a mini-batch and print the shape of the image and label. The shape of the image is the same as in the previous experiment (batch size, number of channels, height, width). The shape of the label is (batch size, $m$, 5), where $m$ is equal to the maximum number of bounding boxes contained in a single image in the data set. Although computation for the mini-batch is very efficient, it requires each image to contain the same number of bounding boxes so that they can be placed in the same batch. Since each image may have a different number of bounding boxes, we can add illegal bounding boxes to images that have less than $m$ bounding boxes until each image contains $m$ bounding boxes. Thus, we can read a mini-batch of images each time. The label of each bounding box in the image is represented by an array of length 5 The first element in the array is the category of the object contained in the bounding box. When the value is -1, the bounding box is an illegal bounding box for filling purpose. The remaining four elements of the array represent the $x, y$ axis coordinates of the upper-left corner of the bounding box and the $x, y$ axis coordinates of the lower-right corner of the bounding box (the value range is between 0 and 1). The Pikachu data set here has only one bounding box per image, so $m=1$." 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 3, 102 | "metadata": { 103 | "attributes": { 104 | "classes": [], 105 | "id": "", 106 | "n": "3" 107 | } 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "batch_size, edge_size = 32, 256\n", 112 | "train_iter, _ = load_data_pikachu(batch_size, edge_size)\n", 113 | "batch = train_iter.next()\n", 114 | "batch.data[0].shape, batch.label[0].shape" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Graphic Data\n", 122 | "\n", 123 | "We have ten images with bounding boxes on them. We can see that the angle, size, and position of Pikachu are different in each image. Of course, this is a simple man-made data set. In actual practice, the data is usually much more complicated." 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 4, 129 | "metadata": { 130 | "attributes": { 131 | "classes": [], 132 | "id": "", 133 | "n": "4" 134 | } 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "imgs = (batch.data[0][0:10].transpose((0, 2, 3, 1))) / 255\n", 139 | "axes = d2l.show_images(imgs, 2, 5).flatten()\n", 140 | "for ax, label in zip(axes, batch.label[0][0:10]):\n", 141 | " d2l.show_bboxes(ax, [label[0][1:5] * edge_size], colors=['w'])" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "## Summary\n", 149 | "\n", 150 | "* The Pikachu data set we synthesized can be used to test object detection models.\n", 151 | "* The data reading for object detection is similar to that for image classification. However, after we introduce bounding boxes, the label shape and image augmentation (e.g., random cropping) are changed.\n", 152 | "\n", 153 | "\n", 154 | "## Exercises\n", 155 | "\n", 156 | "* Referring to the MXNet documentation, what are the parameters for the constructors of the `image.ImageDetIter` and `image.CreateDetAugmenter` classes? What is their significance?\n", 157 | "\n", 158 | "## References\n", 159 | "\n", 160 | "[1] im2rec Tool. https://github.com/apache/incubator-mxnet/blob/master/tools/im2rec.py\n", 161 | "\n", 162 | "[2] GluonCV Toolkit. https://gluon-cv.mxnet.io/\n", 163 | "\n", 164 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2452)\n", 165 | "\n", 166 | "\"\"" 167 | ] 168 | } 169 | ], 170 | "metadata": { 171 | "accelerator": "GPU", 172 | "language_info": { 173 | "name": "python" 174 | } 175 | }, 176 | "nbformat": 4, 177 | "nbformat_minor": 2 178 | } -------------------------------------------------------------------------------- /chapter_convolutional-modern/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Modern Convolutional Networks\n", 8 | "\n", 9 | "Now that we understand the basics of wiring together convolutional neural networks, we will take you through a tour of modern deep learning.\n", 10 | "In this chapter, each section will correspond to a significant neural network architecture that was at some point (or currently) the base model upon which an enormous amount of research and projects were built. \n", 11 | "Each of these networks was at briefly \n", 12 | "a dominant architecture an d many were\n", 13 | "at one point winners or runners-up in the famous ImageNet competition,\n", 14 | "which has served as a barometer of progress \n", 15 | "on supervised learning in computer vision since 2010.\n", 16 | "\n", 17 | "These models include AlexNet, the first large-scale network deployed to beat conventional computer vision methods on a large-scale vision challenge; \n", 18 | "the VGG network, which makes use of a number of repeating blocks of elements; the network in network (NiN) which convolves whole neural networks patch-wise over inputs; the GoogLeNet, which makes use of networks with parallel\n", 19 | "concatenations (GoogLeNet); residual networks (ResNet) which are currently the most popular go-to architecture today, and densely connected networks (DenseNet), which are expensive to compute but have set some recent benchmarks." 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "```eval_rst\n", 27 | "\n", 28 | ".. toctree::\n", 29 | " :maxdepth: 2\n", 30 | "\n", 31 | " alexnet\n", 32 | " vgg\n", 33 | " nin\n", 34 | " googlenet\n", 35 | " batch-norm\n", 36 | " resnet\n", 37 | " densenet\n", 38 | "```\n" 39 | ] 40 | } 41 | ], 42 | "metadata": { 43 | "accelerator": "GPU", 44 | "language_info": { 45 | "name": "python" 46 | } 47 | }, 48 | "nbformat": 4, 49 | "nbformat_minor": 2 50 | } -------------------------------------------------------------------------------- /chapter_convolutional-neural-networks/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Convolutional Neural Networks\n", 8 | "\n", 9 | "In several of our previous examples, we have already come up\n", 10 | "against image data, which consist of pixels arranged in a 2D grid.\n", 11 | "Depending on whether we are looking at a black and white or color image,\n", 12 | "we might have either one or multiple numerical values \n", 13 | "correspnding to each pixel location.\n", 14 | "Until now, we have dealt with this rich structure \n", 15 | "in the least satisfying possible way.\n", 16 | "We simply threw away this spatial structure \n", 17 | "by flattening each image into a 1D vector,\n", 18 | "and fed it into a fully-connected network.\n", 19 | "These networks are invariant to the order of their inputs.\n", 20 | "We will get qualitatively identical results\n", 21 | "out of a multilayer perceptron \n", 22 | "whether we preserve the original order of our features or \n", 23 | "if we permute the columns of our design matrix before learning the parameters.\n", 24 | "Ideally, we would find a way to leverage our prior knowledge \n", 25 | "that nearby pixels are more related to each other.\n", 26 | "\n", 27 | "In this chapter, we introduce convolutional neural networks (CNNs),\n", 28 | "a powerful family of neural networks \n", 29 | "that were designed for precisely this purpose.\n", 30 | "CNN-based network *architecures* \n", 31 | "now dominate the field of computer vision to such an extent \n", 32 | "that hardly anyone these days would develop \n", 33 | "a commerical application or enter a competition\n", 34 | "related to image recognition, object detection, \n", 35 | "or semantic segmentation, \n", 36 | "without basing their approach on them.\n", 37 | "\n", 38 | "Modern 'convnets', as they are often called owe their design\n", 39 | "to inspirations from biology, group theory, \n", 40 | "and a healthy dose of experimental tinkering. \n", 41 | "In addition to their strong predictive performance,\n", 42 | "convolutional neural networks tend to be computationally efficient,\n", 43 | "both because they tend to require fewer parameters\n", 44 | "than dense architectures \n", 45 | "and also because convolutions are easy to parralelize across GPU cores.\n", 46 | "As a result, researchers have sought to apply convnets whenever possible,\n", 47 | "and increasingly they have emerged as credible competitors\n", 48 | "even on tasks with 1D sequence structure,\n", 49 | "such as audio, text, and time series analysis,\n", 50 | "where recurrent neural networks (introduced in the next chapter)\n", 51 | "are conventionally used. \n", 52 | "Some clever adaptations of CNNs have also brought them to bear \n", 53 | "on graph-structured data and in recommender systems.\n", 54 | "\n", 55 | "First, we will walk through the basic operations\n", 56 | "that comprise the backbone of all modern convolutional networks.\n", 57 | "These include the convolutional layers themselves,\n", 58 | "nitty-gritty details including padding and stride,\n", 59 | "the pooling layers used to aggregate information \n", 60 | "across adjacent spatial regions, \n", 61 | "the use of multiple *channels* (also called *filters*) at each layer,\n", 62 | "and a careful discussion of the structure of modern architectures.\n", 63 | "We will conclude the chapter with a full working example of LeNet, \n", 64 | "the first convolutional network successfully deployed, \n", 65 | "long before the rise of modern deep learning.\n", 66 | "In the next chapter we'll dive into full implementations\n", 67 | "of some of the recent popular neural networks \n", 68 | "whose designs are representative of most of the techniques\n", 69 | "commonly used to design modern convolutional neural networks." 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "```eval_rst\n", 77 | "\n", 78 | ".. toctree::\n", 79 | " :maxdepth: 2\n", 80 | "\n", 81 | " why-conv\n", 82 | " conv-layer\n", 83 | " padding-and-strides\n", 84 | " channels\n", 85 | " pooling\n", 86 | " lenet\n", 87 | "```\n" 88 | ] 89 | } 90 | ], 91 | "metadata": { 92 | "accelerator": "GPU", 93 | "language_info": { 94 | "name": "python" 95 | } 96 | }, 97 | "nbformat": 4, 98 | "nbformat_minor": 2 99 | } -------------------------------------------------------------------------------- /chapter_crashcourse/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# The Preliminaries: A Crashcourse\n", 8 | "\n", 9 | "To get started with deep learning, \n", 10 | "we will need to develop a few basic skills.\n", 11 | "All machine learning is concerned \n", 12 | "with extracting information from data.\n", 13 | "So we will begin by learning the practical skills \n", 14 | "for storing and manipulating data with Apache MXNet.\n", 15 | "Moreover machine learning typically requires \n", 16 | "working with large datasets, which we can think of as tables, \n", 17 | "where the rows correspond to examples\n", 18 | "and the columns correspond to attributes. \n", 19 | "Linear algebra gives us a powerful set of techniques \n", 20 | "for working with tabular data. \n", 21 | "We won't go too far into the weeds but rather focus on the basic\n", 22 | "of matrix operations and their implementation in Apache MXNet.\n", 23 | "Additionally, deep learning is all about optimization.\n", 24 | "We have a model with some parameters and \n", 25 | "we want to find those that fit our data the *best*.\n", 26 | "Determining which way to move each parameter at each step of an algorithm\n", 27 | "requires a little bit of calculus. \n", 28 | "Fortunately, Apache MXNet's autograd package covers this for us,\n", 29 | "and we will cover it next. \n", 30 | "Next, machine learning is concerned with making predictions:\n", 31 | "*what is the likely value of some unknown attribute, \n", 32 | "given the information that we observe?*\n", 33 | "To reason rigorously under uncertainty \n", 34 | "we will need to invoke the language of probability and statistics.\n", 35 | "To conclude the chapter, we will present \n", 36 | "your first basic classifier, *Naive Bayes*." 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "```eval_rst\n", 44 | "\n", 45 | ".. toctree::\n", 46 | " :maxdepth: 2\n", 47 | "\n", 48 | " ndarray\n", 49 | " linear-algebra\n", 50 | " autograd\n", 51 | " probability\n", 52 | " naive-bayes\n", 53 | " lookup-api\n", 54 | "\n", 55 | "```\n" 56 | ] 57 | } 58 | ], 59 | "metadata": { 60 | "accelerator": "GPU", 61 | "language_info": { 62 | "name": "python" 63 | } 64 | }, 65 | "nbformat": 4, 66 | "nbformat_minor": 2 67 | } -------------------------------------------------------------------------------- /chapter_crashcourse/lookup-api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Documentation\n", 8 | "\n", 9 | "Due to constraints on the length of this book, we cannot possibly introduce every single MXNet function and class (and you probably would no want us to). The API documentation and additional tutorials and examples provide plenty of documentation beyond the book. In this section we provide you some guidance to exploring the MXNet API.\n", 10 | "\n", 11 | "## Finding all the functions and classes in the module\n", 12 | "\n", 13 | "In order to know which functions and classes can be called in a module, we invoke the `dir` function. For instance, we can query all properties in the `nd.random` module as follows:" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": { 20 | "attributes": { 21 | "classes": [], 22 | "id": "", 23 | "n": "1" 24 | } 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "# Install dependencies before importing\n", 29 | "!pip install mxnet-cu100\n", 30 | "!pip install d2l\n", 31 | "\n", 32 | "from mxnet import nd\n", 33 | "print(dir(nd.random))" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "Generally, we can ignore functions that start and end with `__` (special objects in Python) or functions that start with a single `_`(usually internal functions). Based on the remaining function/attribute names, we might hazard a guess that this module offers various methods for generating random numbers, including sampling from the uniform distribution (`uniform`), normal distribution (`normal`), and Poisson distribution (`poisson`).\n", 41 | "\n", 42 | "## Finding the usage of specific functions and classes\n", 43 | "\n", 44 | "For more specific instructions on how to use a given function or class, we can invoke the `help` function. As an example, let's explore the usage instructions for NDArray's `ones_like` function." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "help(nd.ones_like)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "From the documentation, we can see that the `ones_like` function creates a new array with the same shape as the supplied NDArray and all elements set to `1`. Whenever possible, you should run a quick test to confirm your interpretation:" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "x = nd.array([[0, 0, 0], [2, 2, 2]])\n", 70 | "y = x.ones_like()\n", 71 | "y" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "In the Jupyter notebook, we can use `?` to display the document in another window. For example, `nd.random.uniform?` will create content that is almost identical to `help(nd.random.uniform)`, displaying it in a new browser window. In addition, if we use two question marks, e.g. `nd.random.uniform??`, the code implementing the function will also be displayed.\n", 79 | "\n", 80 | "## API Documentation\n", 81 | "\n", 82 | "For further details on the API details check the MXNet website at [http://mxnet.apache.org/](http://mxnet.apache.org/). You can find the details under the appropriate headings (also for programming languages other than Python).\n", 83 | "\n", 84 | "## Exercise\n", 85 | "\n", 86 | "Look up `ones_like` and `autograd` in the API documentation.\n", 87 | "\n", 88 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2322)\n", 89 | "\n", 90 | "\"\"" 91 | ] 92 | } 93 | ], 94 | "metadata": { 95 | "accelerator": "GPU", 96 | "language_info": { 97 | "name": "python" 98 | } 99 | }, 100 | "nbformat": 4, 101 | "nbformat_minor": 2 102 | } -------------------------------------------------------------------------------- /chapter_deep-learning-computation/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Deep Learning Computation\n", 8 | "\n", 9 | "The previous chapter introduced the principles and implementation for a simple deep learning model, including multi-layer perceptrons. In this chapter we will cover various key components of deep learning computation, such as model construction, parameter access and initialization, custom layers, and reading, storing, and using GPUs. Throughout this chapter, you will gain important insights into model implementation and computation details, which gives readers a solid foundation for implementing more complex models in the following chapters." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "```eval_rst\n", 17 | "\n", 18 | ".. toctree::\n", 19 | " :maxdepth: 2\n", 20 | "\n", 21 | " model-construction\n", 22 | " parameters\n", 23 | " deferred-init\n", 24 | " custom-layer\n", 25 | " read-write\n", 26 | " use-gpu\n", 27 | "\n", 28 | "```\n" 29 | ] 30 | } 31 | ], 32 | "metadata": { 33 | "accelerator": "GPU", 34 | "language_info": { 35 | "name": "python" 36 | } 37 | }, 38 | "nbformat": 4, 39 | "nbformat_minor": 2 40 | } -------------------------------------------------------------------------------- /chapter_deep-learning-computation/read-write.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# File I/O\n", 8 | "\n", 9 | "So far we discussed how to process data, how to build, train and test deep learning models. However, at some point we are likely happy with what we obtained and we want to save the results for later use and distribution. Likewise, when running a long training process it is best practice to save intermediate results (checkpointing) to ensure that we don't lose several days worth of computation when tripping over the power cord of our server. At the same time, we might want to load a pretrained model (e.g. we might have word embeddings for English and use it for our fancy spam classifier). For all of these cases we need to load and store both individual weight vectors and entire models. This section addresses both issues.\n", 10 | "\n", 11 | "## NDArray\n", 12 | "\n", 13 | "In its simplest form, we can directly use the `save` and `load` functions to store and read NDArrays separately. This works just as expected." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "# Install dependencies before importing\n", 23 | "!pip install mxnet-cu100\n", 24 | "!pip install d2l\n", 25 | "\n", 26 | "from mxnet import nd\n", 27 | "from mxnet.gluon import nn\n", 28 | "\n", 29 | "x = nd.arange(4)\n", 30 | "nd.save('x-file', x)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "Then, we read the data from the stored file back into memory." 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "x2 = nd.load('x-file')\n", 47 | "x2" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "We can also store a list of NDArrays and read them back into memory." 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 2, 60 | "metadata": { 61 | "attributes": { 62 | "classes": [], 63 | "id": "", 64 | "n": "2" 65 | } 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "y = nd.zeros(4)\n", 70 | "nd.save('x-files', [x, y])\n", 71 | "x2, y2 = nd.load('x-files')\n", 72 | "(x2, y2)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "We can even write and read a dictionary that maps from a string to an NDArray. This is convenient, for instance when we want to read or write all the weights in a model." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 4, 85 | "metadata": { 86 | "attributes": { 87 | "classes": [], 88 | "id": "", 89 | "n": "4" 90 | } 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "mydict = {'x': x, 'y': y}\n", 95 | "nd.save('mydict', mydict)\n", 96 | "mydict2 = nd.load('mydict')\n", 97 | "mydict2" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "## Gluon Model Parameters\n", 105 | "\n", 106 | "Saving individual weight vectors (or other NDArray tensors) is useful but it gets very tedious if we want to save (and later load) an entire model. After all, we might have hundreds of parameter groups sprinkled throughout. Writing a script that collects all the terms and matches them to an architecture is quite some work. For this reason Gluon provides built-in functionality to load and save entire networks rather than just single weight vectors. An important detail to note is that this saves model *parameters* and not the entire model. I.e. if we have a 3 layer MLP we need to specify the *architecture* separately. The reason for this is that the models themselves can contain arbitrary code, hence they cannot be serialized quite so easily (there is a way to do this for compiled models - please refer to the [MXNet documentation](http://www.mxnet.io) for the technical details on it). The result is that in order to reinstate a model we need to generate the architecture in code and then load the parameters from disk. The [deferred initialization](deferred-init.md) is quite advantageous here since we can simply define a model without the need to put actual values in place. Let's start with our favorite MLP." 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 6, 112 | "metadata": { 113 | "attributes": { 114 | "classes": [], 115 | "id": "", 116 | "n": "6" 117 | } 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "class MLP(nn.Block):\n", 122 | " def __init__(self, **kwargs):\n", 123 | " super(MLP, self).__init__(**kwargs)\n", 124 | " self.hidden = nn.Dense(256, activation='relu')\n", 125 | " self.output = nn.Dense(10)\n", 126 | "\n", 127 | " def forward(self, x):\n", 128 | " return self.output(self.hidden(x))\n", 129 | "\n", 130 | "net = MLP()\n", 131 | "net.initialize()\n", 132 | "x = nd.random.uniform(shape=(2, 20))\n", 133 | "y = net(x)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "Next, we store the parameters of the model as a file with the name 'mlp.params'." 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "net.save_parameters('mlp.params')" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "To check whether we are able to recover the model we instantiate a clone of the original MLP model. Unlike the random initialization of model parameters, here we read the parameters stored in the file directly." 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 8, 162 | "metadata": { 163 | "attributes": { 164 | "classes": [], 165 | "id": "", 166 | "n": "8" 167 | } 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "clone = MLP()\n", 172 | "clone.load_parameters('mlp.params')" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "Since both instances have the same model parameters, the computation result of the same input `x` should be the same. Let's verify this." 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "yclone = clone(x)\n", 189 | "yclone == y" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "## Summary\n", 197 | "\n", 198 | "* The `save` and `load` functions can be used to perform File I/O for NDArray objects.\n", 199 | "* The `load_parameters` and `save_parameters` functions allow us to save entire sets of parameters for a network in Gluon.\n", 200 | "* Saving the architecture has to be done in code rather than in parameters.\n", 201 | "\n", 202 | "## Exercises\n", 203 | "\n", 204 | "1. Even if there is no need to deploy trained models to a different device, what are the practical benefits of storing model parameters?\n", 205 | "1. Assume that we want to reuse only parts of a network to be incorporated into a network of a *different* architecture. How would you go about using, say the first two layers from a previous network in a new network.\n", 206 | "1. How would you go about saving network architecture and parameters? What restrictions would you impose on the architecture?\n", 207 | "\n", 208 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2329)\n", 209 | "\n", 210 | "\"\"" 211 | ] 212 | } 213 | ], 214 | "metadata": { 215 | "accelerator": "GPU", 216 | "language_info": { 217 | "name": "python" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 2 222 | } -------------------------------------------------------------------------------- /chapter_linear-networks/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Linear Neural Networks\n", 8 | "\n", 9 | "Before we get into the details of deep neural networks, we need to cover the basics of neural network training. In this chapter, we will cover the entire training process, including defining simple neural network architecures, handling data, specifying a loss function, and training the model. In order to make things easier to grasp, we begin with the simplest concepts. Fortunately, classic statistical learning techniques such as linear and logistic regression can be cast as *shallow* neural networks. Starting from these classic algorthms, we'll introduce you to the basics, providing the basis for more complex techniques such as softmax regression (introduced at the end of this chapter) and multilayer perceptrons (introduced in the next chapter)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "```eval_rst\n", 17 | "\n", 18 | ".. toctree::\n", 19 | " :maxdepth: 2\n", 20 | "\n", 21 | " linear-regression\n", 22 | " linear-regression-scratch\n", 23 | " linear-regression-gluon\n", 24 | " softmax-regression\n", 25 | " fashion-mnist\n", 26 | " softmax-regression-scratch\n", 27 | " softmax-regression-gluon\n", 28 | "```\n" 29 | ] 30 | } 31 | ], 32 | "metadata": { 33 | "accelerator": "GPU", 34 | "language_info": { 35 | "name": "python" 36 | } 37 | }, 38 | "nbformat": 4, 39 | "nbformat_minor": 2 40 | } -------------------------------------------------------------------------------- /chapter_linear-networks/softmax-regression-gluon.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Concise Implementation of Softmax Regression\n", 8 | "\n", 9 | "Just as Gluon made it much easier to implement [linear regression](linear-regression-gluon.md), we'll find it similarly (or possibly more) convenient for implementing classification models. \n", 10 | "Again, we begin with our import ritual." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": { 17 | "attributes": { 18 | "classes": [], 19 | "id": "", 20 | "n": "1" 21 | } 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "# Install dependencies before importing\n", 26 | "!pip install mxnet-cu100\n", 27 | "!pip install d2l\n", 28 | "\n", 29 | "import sys\n", 30 | "sys.path.insert(0, '..')\n", 31 | "\n", 32 | "%matplotlib inline\n", 33 | "import d2l\n", 34 | "from mxnet import gluon, init\n", 35 | "from mxnet.gluon import loss as gloss, nn" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "Let's stick with the Fashion-MNIST dataset and keep the batch size at $256$ as in the last section." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": { 49 | "attributes": { 50 | "classes": [], 51 | "id": "", 52 | "n": "2" 53 | } 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "batch_size = 256\n", 58 | "train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "## Initialize Model Parameters\n", 66 | "\n", 67 | "As [mentioned previously](softmax-regression.md), the output layer of softmax regression is a fully connected (`Dense`) layer. Therefore, to implement our model, we just need to add one `Dense` layer with 10 outputs to our `Sequential`. Again, here, the `Sequential` isn't really necessary, but we might as well form the habit since it will be ubiquitous when implementing deep models. Again, we initialize the weights at random with zero mean and standard deviation 0.01." 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 3, 73 | "metadata": { 74 | "attributes": { 75 | "classes": [], 76 | "id": "", 77 | "n": "3" 78 | } 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "net = nn.Sequential()\n", 83 | "net.add(nn.Dense(10))\n", 84 | "net.initialize(init.Normal(sigma=0.01))" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "## The Softmax\n", 92 | "\n", 93 | "In the previous example, we calculated our model's output and then ran this output through the cross-entropy loss. At its heart it uses `-nd.pick(y_hat, y).log()`. Mathematically, that's a perfectly reasonable thing to do. However, computationally, things can get hairy when dealing with exponentiation due to numerical stability issues, a matter we've already discussed a few times (e.g. in when covering [Naive Bayes](../chapter_crashcourse/naive-bayes.md) and in the problem set of the previous chapter). Recall that the softmax function calculates $\\hat y_j = \\frac{e^{z_j}}{\\sum_{i=1}^{n} e^{z_i}}$, where $\\hat y_j$ is the j-th element of ``yhat`` and $z_j$ is the j-th element of the input ``y_linear`` variable, as computed by the softmax.\n", 94 | "\n", 95 | "If some of the $z_i$ are very large (i.e. very positive), \n", 96 | "$e^{z_i}$ might be larger than the largest number \n", 97 | "we can have for certain types of ``float`` (i.e. overflow). \n", 98 | "This would make the denominator (and/or numerator) ``inf`` and we get zero, \n", 99 | "or ``inf``, or ``nan`` for $\\hat y_j$. \n", 100 | "In any case, we won't get a well-defined return value for ``cross_entropy``. This is the reason we subtract $\\text{max}(z_i)$ \n", 101 | "from all $z_i$ first in ``softmax`` function. \n", 102 | "You can verify that this shifting in $z_i$ \n", 103 | "will not change the return value of ``softmax``.\n", 104 | "\n", 105 | "After the above subtraction/ normalization step, \n", 106 | "it is possible that $z_j$ is very negative. \n", 107 | "Thus, $e^{z_j}$ will be very close to zero \n", 108 | "and might be rounded to zero due to finite precision (i.e underflow), \n", 109 | "which makes $\\hat y_j$ zero and we get ``-inf`` for $\\text{log}(\\hat y_j)$. \n", 110 | "A few steps down the road in backpropagation, \n", 111 | "we start to get horrific not-a-number (``nan``) results printed to screen.\n", 112 | "\n", 113 | "Our salvation is that even though we're computing these exponential functions, we ultimately plan to take their log in the cross-entropy functions. \n", 114 | "It turns out that by combining these two operators \n", 115 | "``softmax`` and ``cross_entropy`` together, \n", 116 | "we can escape the numerical stability issues \n", 117 | "that might otherwise plague us during backpropagation. \n", 118 | "As shown in the equation below, we avoided calculating $e^{z_j}$ \n", 119 | "but directly used $z_j$ due to $\\log(\\exp(\\cdot))$.\n", 120 | "\n", 121 | "$$\n", 122 | "\\begin{aligned}\n", 123 | "\\log{(\\hat y_j)} & = \\log\\left( \\frac{e^{z_j}}{\\sum_{i=1}^{n} e^{z_i}}\\right) \\\\\n", 124 | "& = \\log{(e^{z_j})}-\\text{log}{\\left( \\sum_{i=1}^{n} e^{z_i} \\right)} \\\\\n", 125 | "& = z_j -\\log{\\left( \\sum_{i=1}^{n} e^{z_i} \\right)}\n", 126 | "\\end{aligned}\n", 127 | "$$\n", 128 | "\n", 129 | "We'll want to keep the conventional softmax function handy \n", 130 | "in case we ever want to evaluate the probabilities output by our model. \n", 131 | "But instead of passing softmax probabilities into our new loss function, \n", 132 | "we'll just pass $\\hat{y}$ and compute the softmax and its log \n", 133 | "all at once inside the softmax_cross_entropy loss function, \n", 134 | "which does smart things like the log-sum-exp trick ([see on Wikipedia](https://en.wikipedia.org/wiki/LogSumExp))." 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 4, 140 | "metadata": { 141 | "attributes": { 142 | "classes": [], 143 | "id": "", 144 | "n": "4" 145 | } 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "loss = gloss.SoftmaxCrossEntropyLoss()" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "## Optimization Algorithm\n", 157 | "\n", 158 | "We use the mini-batch random gradient descent \n", 159 | "with a learning rate of $0.1$ as the optimization algorithm. \n", 160 | "Note that this is the same choice as for linear regression \n", 161 | "and it illustrates the general applicability of the optimizers." 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 5, 167 | "metadata": { 168 | "attributes": { 169 | "classes": [], 170 | "id": "", 171 | "n": "5" 172 | } 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "## Training\n", 184 | "\n", 185 | "Next, we use the training functions defined in the last section to train a model." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 6, 191 | "metadata": { 192 | "attributes": { 193 | "classes": [], 194 | "id": "", 195 | "n": "6" 196 | } 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "num_epochs = 5\n", 201 | "d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None,\n", 202 | " None, trainer)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "Just as before, this algorithm converges to a solution \n", 210 | "that achieves an accuracy of 83.7%, \n", 211 | "albeit this time with a lot fewer lines of code than before. \n", 212 | "Note that in many cases, Gluon takes specific precautions \n", 213 | "in addition to the most well-known tricks for ensuring numerical stability. \n", 214 | "This saves us from many common pitfalls that might befall us \n", 215 | "if we were to code all of our models from scratch.\n", 216 | "\n", 217 | "## Exercises\n", 218 | "\n", 219 | "1. Try adjusting the hyper-parameters, such as batch size, epoch, and learning rate, to see what the results are.\n", 220 | "1. Why might the test accuracy decrease again after a while? How could we fix this?\n", 221 | "\n", 222 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2337)\n", 223 | "\n", 224 | "\"\"" 225 | ] 226 | } 227 | ], 228 | "metadata": { 229 | "accelerator": "GPU", 230 | "language_info": { 231 | "name": "python" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 2 236 | } -------------------------------------------------------------------------------- /chapter_multilayer-perceptrons/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Multilayer Perceptrons\n", 8 | "\n", 9 | "In this chapter, we will introduce your first truly *deep* networks. \n", 10 | "The simplest deep networks are called multilayer perceptrons, \n", 11 | "and they consist of many layers of neurons \n", 12 | "each fully connected to those in the layer below \n", 13 | "(from which they receive input) \n", 14 | "and those above (which they, in turn, influence).\n", 15 | "When we train high-capacity models we run the risk of overfitting. \n", 16 | "Thus, we will need to provide your first rigorous introduction\n", 17 | "to the notions of overfitting, underfitting, and capacity control. \n", 18 | "To help you combat these problems, \n", 19 | "we will introduce regularization techniques such as dropout and weight decay.\n", 20 | "We will also discuss issues relating to numerical stability and parameter initialization that are key to successfully training deep networks. \n", 21 | "Throughout, we focus on applying models to real data,\n", 22 | "aiming to give the reader a firm grasp not just of the concepts \n", 23 | "but also of the practice of using deep networks. \n", 24 | "We punt matters relating to the computational performance, \n", 25 | "scalability and efficiency of our models to subsequent chapters." 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "```eval_rst\n", 33 | "\n", 34 | ".. toctree::\n", 35 | " :maxdepth: 2\n", 36 | "\n", 37 | " mlp\n", 38 | " mlp-scratch\n", 39 | " mlp-gluon\n", 40 | " underfit-overfit\n", 41 | " weight-decay\n", 42 | " dropout\n", 43 | " backprop\n", 44 | " numerical-stability-and-init\n", 45 | " environment\n", 46 | " kaggle-house-price\n", 47 | "```\n" 48 | ] 49 | } 50 | ], 51 | "metadata": { 52 | "accelerator": "GPU", 53 | "language_info": { 54 | "name": "python" 55 | } 56 | }, 57 | "nbformat": 4, 58 | "nbformat_minor": 2 59 | } -------------------------------------------------------------------------------- /chapter_multilayer-perceptrons/mlp-gluon.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Concise Implementation of Multilayer Perceptron\n", 8 | "\n", 9 | "Now that we learned how multilayer perceptrons (MLPs) work in theory, let's implement them. We begin, as always, by importing modules." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# Install dependencies before importing\n", 19 | "!pip install mxnet-cu100\n", 20 | "!pip install d2l\n", 21 | "\n", 22 | "import sys\n", 23 | "sys.path.insert(0, '..')\n", 24 | "\n", 25 | "import d2l\n", 26 | "from mxnet import gluon, init\n", 27 | "from mxnet.gluon import loss as gloss, nn" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## The Model\n", 35 | "\n", 36 | "The only difference from our softmax regression implementation\n", 37 | "is that we add two `Dense` (fully-connected) layers instead of one. \n", 38 | "The first is our hidden layer, which has *256* hidden units \n", 39 | "and uses the ReLU activation function." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 5, 45 | "metadata": { 46 | "attributes": { 47 | "classes": [], 48 | "id": "", 49 | "n": "5" 50 | } 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "net = nn.Sequential()\n", 55 | "net.add(nn.Dense(256, activation='relu'))\n", 56 | "net.add(nn.Dense(10))\n", 57 | "net.initialize(init.Normal(sigma=0.01))" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "Note that as above we can invoke `net.add()` multiple times in succession,\n", 65 | "but we can also invoke it a single time, passing in \n", 66 | "multiple layers to be added the network. \n", 67 | "Thus, we could have equivalently written \n", 68 | "`net.add(nn.Dense(256, activation='relu'), nn.Dense(10))`. \n", 69 | "Again, note that as always, Gluon automatically \n", 70 | "infers the missing input dimensions to each layer. \n", 71 | "\n", 72 | "Training the model follows the exact same steps as in our softmax regression implementation." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 6, 78 | "metadata": { 79 | "attributes": { 80 | "classes": [], 81 | "id": "", 82 | "n": "6" 83 | } 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "batch_size = 256\n", 88 | "train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)\n", 89 | "\n", 90 | "loss = gloss.SoftmaxCrossEntropyLoss()\n", 91 | "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})\n", 92 | "num_epochs = 10\n", 93 | "d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None,\n", 94 | " None, trainer)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "## Exercises\n", 102 | "\n", 103 | "1. Try adding a few more hidden layers to see how the result changes.\n", 104 | "1. Try out different activation functions. Which ones work best?\n", 105 | "1. Try out different initializations of the weights.\n", 106 | "\n", 107 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2340)\n", 108 | "\n", 109 | "\"\"" 110 | ] 111 | } 112 | ], 113 | "metadata": { 114 | "accelerator": "GPU", 115 | "language_info": { 116 | "name": "python" 117 | } 118 | }, 119 | "nbformat": 4, 120 | "nbformat_minor": 2 121 | } -------------------------------------------------------------------------------- /chapter_multilayer-perceptrons/mlp-scratch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Implementation of Multilayer Perceptron from Scratch\n", 8 | "\n", 9 | "Now that we know how multilayer perceptrons (MLPs) work in theory, \n", 10 | "let's implement them. First, we import the required packages." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 9, 16 | "metadata": { 17 | "attributes": { 18 | "classes": [], 19 | "id": "", 20 | "n": "9" 21 | } 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "# Install dependencies before importing\n", 26 | "!pip install mxnet-cu100\n", 27 | "!pip install d2l\n", 28 | "\n", 29 | "import sys\n", 30 | "sys.path.insert(0, '..')\n", 31 | "\n", 32 | "%matplotlib inline\n", 33 | "import d2l\n", 34 | "from mxnet import nd\n", 35 | "from mxnet.gluon import loss as gloss" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "To compare against the results \n", 43 | "we previously achieved with vanilla softmax regression, \n", 44 | "we continue to use the Fashion-MNIST image classification dataset." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "metadata": { 51 | "attributes": { 52 | "classes": [], 53 | "id": "", 54 | "n": "2" 55 | } 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "batch_size = 256\n", 60 | "train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Initialize Model Parameters\n", 68 | "\n", 69 | "Recall that this dataset contains 10 classes and that \n", 70 | "each image consists of a $28 \\times 28 = 784$ grid of pixel values. \n", 71 | "Since we'll be discarding the spatial strucutre (for now),\n", 72 | "we can just think of this as a classifiation dataset \n", 73 | "with $784$ input features and $10$ classes. \n", 74 | "In particular we will implement our MLP \n", 75 | "with one hidden layer and $256$ hidden units.\n", 76 | "Note that we can regard both of these choices as *hyperparameters*\n", 77 | "that could be set based on performance on validation data. \n", 78 | "Typically, we'll choose layer widths as powers of $2$ \n", 79 | "to make everything align nicely in memory.\n", 80 | "\n", 81 | "Again, we will allocate several NDArrays to represent our parameters.\n", 82 | "Note that we now have one weight matrix and one bias vector *per layer*.\n", 83 | "As always, we must call `attach_grad` to allocate memory for the gradients with respect to these parameters." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 3, 89 | "metadata": { 90 | "attributes": { 91 | "classes": [], 92 | "id": "", 93 | "n": "3" 94 | } 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "num_inputs, num_outputs, num_hiddens = 784, 10, 256\n", 99 | "\n", 100 | "W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens))\n", 101 | "b1 = nd.zeros(num_hiddens)\n", 102 | "W2 = nd.random.normal(scale=0.01, shape=(num_hiddens, num_outputs))\n", 103 | "b2 = nd.zeros(num_outputs)\n", 104 | "params = [W1, b1, W2, b2]\n", 105 | "\n", 106 | "for param in params:\n", 107 | " param.attach_grad()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## Activation Function\n", 115 | "\n", 116 | "To make sure we know how everything works, \n", 117 | "we will use the `maximum` function to implement ReLU ourselves,\n", 118 | "instead of invoking `nd.relu` directly." 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 4, 124 | "metadata": { 125 | "attributes": { 126 | "classes": [], 127 | "id": "", 128 | "n": "4" 129 | } 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "def relu(X):\n", 134 | " return nd.maximum(X, 0)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "## The model\n", 142 | "\n", 143 | "As in softmax regression, we will `reshape` each 2D image \n", 144 | "into a flat vector of length `num_inputs`. \n", 145 | "Finally, we cam implement our model with just a few lines of code." 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 5, 151 | "metadata": { 152 | "attributes": { 153 | "classes": [], 154 | "id": "", 155 | "n": "5" 156 | } 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "def net(X):\n", 161 | " X = X.reshape((-1, num_inputs))\n", 162 | " H = relu(nd.dot(X, W1) + b1)\n", 163 | " return nd.dot(H, W2) + b2" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "## The Loss Function\n", 171 | "\n", 172 | "For better numerical stability and because we already know \n", 173 | "how to implement [softmax regression completely from scratch](../chapter_linear-networks/softmax-regression-scratch), \n", 174 | "we will use Gluon's integrated function \n", 175 | "for calculating the softmax and cross-entropy loss. \n", 176 | "Recall that we discussed some of these intricacies \n", 177 | "in the [previous section](mlp.md). \n", 178 | "We encourage the interested reader to examing the source code\n", 179 | "for `mxnet.gluon.loss.nnSoftmaxCrossEntropyLoss` for more details." 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 6, 185 | "metadata": { 186 | "attributes": { 187 | "classes": [], 188 | "id": "", 189 | "n": "6" 190 | } 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "loss = gloss.SoftmaxCrossEntropyLoss()" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "## Training\n", 202 | "\n", 203 | "Steps for training the MLP are no different than for softmax regression. \n", 204 | "In the `d2l` package, we directly call the `train_ch3` function, whose implementation was introduced [here](softmax-regression-scratch.md). \n", 205 | "We set the number of epochs to $10$ and the learning rate to $0.5$." 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 7, 211 | "metadata": { 212 | "attributes": { 213 | "classes": [], 214 | "id": "", 215 | "n": "7" 216 | } 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "num_epochs, lr = 10, 0.5\n", 221 | "d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,\n", 222 | " params, lr)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "To see how well we did, let's apply the model to some test data. \n", 230 | "If you're interested, compare the result to corresponding [linear model](softmax-regression-scratch.md)." 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "for X, y in test_iter:\n", 240 | " break\n", 241 | "\n", 242 | "true_labels = d2l.get_fashion_mnist_labels(y.asnumpy())\n", 243 | "pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy())\n", 244 | "titles = [truelabel + '\\n' + predlabel\n", 245 | " for truelabel, predlabel in zip(true_labels, pred_labels)]\n", 246 | "\n", 247 | "d2l.show_fashion_mnist(X[0:9], titles[0:9])" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "This looks a bit better than our previous result, a good sign that we're on the right path.\n", 255 | "\n", 256 | "## Summary\n", 257 | "\n", 258 | "We saw that implementing a simple MLP is easy, even when done manually. \n", 259 | "That said, with a large number of layers, this can get messy \n", 260 | "(e.g. naming and keeping track of the model parameters, etc).\n", 261 | "\n", 262 | "## Exercises\n", 263 | "\n", 264 | "1. Change the value of the hyper-parameter `num_hiddens` in order to see how this hyperparameter influences your results.\n", 265 | "1. Try adding a new hidden layer to see how it affects the results.\n", 266 | "1. How does changing the learning rate change the result.\n", 267 | "1. What is the best result you can get by optimizing over all the parameters (learning rate, iterations, number of hidden layers, number of hidden units per layer)?\n", 268 | "\n", 269 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2339)\n", 270 | "\n", 271 | "\"\"" 272 | ] 273 | } 274 | ], 275 | "metadata": { 276 | "accelerator": "GPU", 277 | "language_info": { 278 | "name": "python" 279 | } 280 | }, 281 | "nbformat": 4, 282 | "nbformat_minor": 2 283 | } -------------------------------------------------------------------------------- /chapter_natural-language-processing/fasttext.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Subword Embedding (fastText)\n", 8 | "\n", 9 | "English words usually have internal structures and formation methods. For example, we can deduce the relationship between \"dog\", \"dogs\", and \"dogcatcher\" by their spelling. All these words have the same root, \"dog\", but they use different suffixes to change the meaning of the word. Moreover, this association can be extended to other words. For example, the relationship between \"dog\" and \"dogs\" is just like the relationship between \"cat\" and \"cats\". The relationship between \"boy\" and \"boyfriend\" is just like the relationship between \"girl\" and \"girlfriend\". This characteristic is not unique to English. In French and Spanish, a lot of verbs can have more than 40 different forms depending on the context. In Finnish, a noun may have more than 15 forms. In fact, morphology, which is an important branch of linguistics, studies the internal structure and formation of words.\n", 10 | "\n", 11 | "In word2vec, we did not directly use morphology information. In both the skip-gram model and continuous bag-of-words model, we use different vectors to represent words with different forms. For example, \"dog\" and \"dogs\" are represented by two different vectors, while the relationship between these two vectors is not directly represented in the model. In view of this, fastText proposes the method of subword embedding, thereby attempting to introduce morphological information in the skip-gram model in word2vec[1].\n", 12 | "\n", 13 | "In fastText, each central word is represented as a collection of subwords. Below we use the word \"where\" as an example to understand how subwords are formed. First, we add the special characters “<” and “>” at the beginning and end of the word to distinguish the subwords used as prefixes and suffixes. Then, we treat the word as a sequence of characters to extract the $n$-grams. For example, when $n=3$, we can get all subwords with a length of 3:\n", 14 | "\n", 15 | "$$\\textrm{\"\"},$$\n", 16 | "\n", 17 | "and the special subword $\\textrm{\"\"}$.\n", 18 | "\n", 19 | "In fastText, for a word $w$, we record the union of all its subwords with length of 3 to 6 and special subwords as $\\mathcal{G}_w$. Thus, the dictionary is the union of the collection of subwords of all words. Assume the vector of the subword $g$ in the dictionary is $\\mathbf{z}_g$. Then, the central word vector $\\mathbf{u}_w$ for the word $w$ in the skip-gram model can be expressed as\n", 20 | "\n", 21 | "$$\\mathbf{u}_w = \\sum_{g\\in\\mathcal{G}_w} \\mathbf{z}_g.$$\n", 22 | "\n", 23 | "The rest of the fastText process is consistent with the skip-gram model, so it is not repeated here. As we can see, compared with the skip-gram model, the dictionary in fastText is larger, resulting in more model parameters. Also, the vector of one word requires the summation of all subword vectors, which results in higher computation complexity. However, we can obtain better vectors for more uncommon complex words, even words not existing in the dictionary, by looking at other words with similar structures.\n", 24 | "\n", 25 | "\n", 26 | "## Summary\n", 27 | "\n", 28 | "* FastText proposes a subword embedding method. Based on the skip-gram model in word2vec, it represents the central word vector as the sum of the subword vectors of the word.\n", 29 | "* Subword embedding utilizes the principles of morphology, which usually improves the quality of representations of uncommon words.\n", 30 | "\n", 31 | "\n", 32 | "## Exercises\n", 33 | "\n", 34 | "* When there are too many subwords (for example, 6 words in English result in about $3\\times 10^8$ combinations), what problems arise? Can you think of any methods to solve them? Hint: Refer to the end of section 3.2 of the fastText paper[1].\n", 35 | "* How can you design a subword embedding model based on the continuous bag-of-words model?\n", 36 | "\n", 37 | "\n", 38 | "\n", 39 | "\n", 40 | "## Reference\n", 41 | "\n", 42 | "[1] Bojanowski, P., Grave, E., Joulin, A., & Mikolov, T. (2016). Enriching word vectors with subword information. arXiv preprint arXiv:1607.04606.\n", 43 | "\n", 44 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2388)\n", 45 | "\n", 46 | "\"\"" 47 | ] 48 | } 49 | ], 50 | "metadata": { 51 | "accelerator": "GPU", 52 | "language_info": { 53 | "name": "python" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 2 58 | } -------------------------------------------------------------------------------- /chapter_natural-language-processing/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Natural Language Processing\n", 8 | "\n", 9 | "Natural language processing is concerned with interactions between computers and humans that use natural language. In practice, it is very common for us to use this technique to process and analyze large amounts of natural language data, like the language models from the \"Recurrent Neural Networks\" section.\n", 10 | "\n", 11 | "In this chapter, we will discuss how to use vectors to represent words and train the word vectors on a corpus. We will also use word vectors pre-trained on a larger corpus to find synonyms and analogies. Then, in the text classification task, we will use word vectors to analyze the emotion of a text and explain the important ideas of timing data classification based on RNNs and the convolutional neural networks. In addition, many of the outputs of natural language processing tasks are not fixed, such as sentences of arbitrary length. We will introduce the encoder-decoder model, beam search, and attention mechanisms to address problems of this type and apply them to machine translation." 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "```eval_rst\n", 19 | "\n", 20 | ".. toctree::\n", 21 | " :maxdepth: 2\n", 22 | "\n", 23 | " word2vec\n", 24 | " approx-training\n", 25 | " word2vec-gluon\n", 26 | " fasttext\n", 27 | " glove\n", 28 | " similarity-analogy\n", 29 | " sentiment-analysis-rnn\n", 30 | " sentiment-analysis-cnn\n", 31 | "```\n" 32 | ] 33 | } 34 | ], 35 | "metadata": { 36 | "accelerator": "GPU", 37 | "language_info": { 38 | "name": "python" 39 | } 40 | }, 41 | "nbformat": 4, 42 | "nbformat_minor": 2 43 | } -------------------------------------------------------------------------------- /chapter_optimization/adadelta.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Adadelta\n", 8 | "\n", 9 | "In addition to RMSProp, Adadelta is another common optimization algorithm that helps improve the chances of finding useful solutions at later stages of iteration, which is difficult to do when using the Adagrad algorithm for the same purpose[1]. The interesting thing is that there is no learning rate hyperparameter in the Adadelta algorithm.\n", 10 | "\n", 11 | "## The Algorithm\n", 12 | "\n", 13 | "Like RMSProp, the Adadelta algorithm uses the variable $\\boldsymbol{s}_t$, which is an EWMA on the squares of elements in mini-batch stochastic gradient $\\boldsymbol{g}_t$. At time step 0, all the elements are initialized to 0.\n", 14 | "Given the hyperparameter $0 \\leq \\rho < 1$ (counterpart of $\\gamma$ in RMSProp), at time step $t>0$, compute using the same method as RMSProp:\n", 15 | "\n", 16 | "$$\\boldsymbol{s}_t \\leftarrow \\rho \\boldsymbol{s}_{t-1} + (1 - \\rho) \\boldsymbol{g}_t \\odot \\boldsymbol{g}_t. $$\n", 17 | "\n", 18 | "Unlike RMSProp, Adadelta maintains an additional state variable, $\\Delta\\boldsymbol{x}_t$ the elements of which are also initialized to 0 at time step 0. We use $\\Delta\\boldsymbol{x}_{t-1}$ to compute the variation of the independent variable:\n", 19 | "\n", 20 | "$$ \\boldsymbol{g}_t' \\leftarrow \\sqrt{\\frac{\\Delta\\boldsymbol{x}_{t-1} + \\epsilon}{\\boldsymbol{s}_t + \\epsilon}} \\odot \\boldsymbol{g}_t, $$\n", 21 | "\n", 22 | "Here, $\\epsilon$ is a constant added to maintain the numerical stability, such as $10^{-5}$. Next, we update the independent variable:\n", 23 | "\n", 24 | "$$\\boldsymbol{x}_t \\leftarrow \\boldsymbol{x}_{t-1} - \\boldsymbol{g}'_t. $$\n", 25 | "\n", 26 | "Finally, we use $\\Delta\\boldsymbol{x}$ to record the EWMA on the squares of elements in $\\boldsymbol{g}'$, which is the variation of the independent variable.\n", 27 | "\n", 28 | "$$\\Delta\\boldsymbol{x}_t \\leftarrow \\rho \\Delta\\boldsymbol{x}_{t-1} + (1 - \\rho) \\boldsymbol{g}'_t \\odot \\boldsymbol{g}'_t. $$\n", 29 | "\n", 30 | "As we can see, if the impact of $\\epsilon$ is not considered here, Adadelta differs from RMSProp in its replacement of the hyperparameter $\\eta$ with $\\sqrt{\\Delta\\boldsymbol{x}_{t-1}}$.\n", 31 | "\n", 32 | "\n", 33 | "## Implementation from Scratch\n", 34 | "\n", 35 | "Adadelta needs to maintain two state variables for each independent variable, $\\boldsymbol{s}_t$ and $\\Delta\\boldsymbol{x}_t$. We use the formula from the algorithm to implement Adadelta." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 11, 41 | "metadata": { 42 | "attributes": { 43 | "classes": [], 44 | "id": "", 45 | "n": "11" 46 | } 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "# Install dependencies before importing\n", 51 | "!pip install mxnet-cu100\n", 52 | "!pip install d2l\n", 53 | "\n", 54 | "import sys\n", 55 | "sys.path.insert(0, '..')\n", 56 | "\n", 57 | "%matplotlib inline\n", 58 | "import d2l\n", 59 | "from mxnet import nd\n", 60 | "\n", 61 | "features, labels = d2l.get_data_ch7()\n", 62 | "\n", 63 | "def init_adadelta_states():\n", 64 | " s_w, s_b = nd.zeros((features.shape[1], 1)), nd.zeros(1)\n", 65 | " delta_w, delta_b = nd.zeros((features.shape[1], 1)), nd.zeros(1)\n", 66 | " return ((s_w, delta_w), (s_b, delta_b))\n", 67 | "\n", 68 | "def adadelta(params, states, hyperparams):\n", 69 | " rho, eps = hyperparams['rho'], 1e-5\n", 70 | " for p, (s, delta) in zip(params, states):\n", 71 | " s[:] = rho * s + (1 - rho) * p.grad.square()\n", 72 | " g = ((delta + eps).sqrt() / (s + eps).sqrt()) * p.grad\n", 73 | " p[:] -= g\n", 74 | " delta[:] = rho * delta + (1 - rho) * g * g" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "Then, we train the model with the hyperparameter $\\rho=0.9$." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 12, 87 | "metadata": { 88 | "attributes": { 89 | "classes": [], 90 | "id": "", 91 | "n": "12" 92 | } 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "d2l.train_ch9(adadelta, init_adadelta_states(), {'rho': 0.9}, features,\n", 97 | " labels)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "## Concise Implementation\n", 105 | "\n", 106 | "From the `Trainer` instance for the algorithm named \"adadelta\", we can implement Adadelta in Gluon. Its hyperparameters can be specified by `rho`." 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 9, 112 | "metadata": { 113 | "attributes": { 114 | "classes": [], 115 | "id": "", 116 | "n": "9" 117 | } 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "d2l.train_gluon_ch9('adadelta', {'rho': 0.9}, features, labels)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "## Summary\n", 129 | "\n", 130 | "* Adadelta has no learning rate hyperparameter, it uses an EWMA on the squares of elements in the variation of the independent variable to replace the learning rate.\n", 131 | "\n", 132 | "## Exercises\n", 133 | "\n", 134 | "* Adjust the value of $\\rho$ and observe the experimental results.\n", 135 | "\n", 136 | "## Reference\n", 137 | "\n", 138 | "[1] Zeiler, M. D. (2012). ADADELTA: an adaptive learning rate method. arXiv preprint arXiv:1212.5701.\n", 139 | "\n", 140 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2377)\n", 141 | "\n", 142 | "\"\"" 143 | ] 144 | } 145 | ], 146 | "metadata": { 147 | "accelerator": "GPU", 148 | "language_info": { 149 | "name": "python" 150 | } 151 | }, 152 | "nbformat": 4, 153 | "nbformat_minor": 2 154 | } -------------------------------------------------------------------------------- /chapter_optimization/adam.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Adam\n", 8 | "\n", 9 | "Created on the basis of RMSProp, Adam also uses EWMA on the mini-batch stochastic gradient[1]. Here, we are going to introduce this algorithm.\n", 10 | "\n", 11 | "## The Algorithm\n", 12 | "\n", 13 | "Adam uses the momentum variable $\\boldsymbol{v}_t$ and variable $\\boldsymbol{s}_t$, which is an EWMA on the squares of elements in the mini-batch stochastic gradient from RMSProp, and initializes each element of the variables to 0 at time step 0. Given the hyperparameter $0 \\leq \\beta_1 < 1$ (the author of the algorithm suggests a value of 0.9), the momentum variable $\\boldsymbol{v}_t$ at time step $t$ is the EWMA of the mini-batch stochastic gradient $\\boldsymbol{g}_t$:\n", 14 | "\n", 15 | "$$\\boldsymbol{v}_t \\leftarrow \\beta_1 \\boldsymbol{v}_{t-1} + (1 - \\beta_1) \\boldsymbol{g}_t. $$\n", 16 | "\n", 17 | "Just as in RMSProp, given the hyperparameter $0 \\leq \\beta_2 < 1$ (the author of the algorithm suggests a value of 0.999),\n", 18 | "After taken the squares of elements in the mini-batch stochastic gradient, find $\\boldsymbol{g}_t \\odot \\boldsymbol{g}_t$ and perform EWMA on it to obtain $\\boldsymbol{s}_t$:\n", 19 | "\n", 20 | "$$\\boldsymbol{s}_t \\leftarrow \\beta_2 \\boldsymbol{s}_{t-1} + (1 - \\beta_2) \\boldsymbol{g}_t \\odot \\boldsymbol{g}_t. $$\n", 21 | "\n", 22 | "Since we initialized elements in $\\boldsymbol{v}_0$ and $\\boldsymbol{s}_0$ to 0,\n", 23 | "we get $\\boldsymbol{v}_t = (1-\\beta_1) \\sum_{i=1}^t \\beta_1^{t-i} \\boldsymbol{g}_i$ at time step $t$. Sum the mini-batch stochastic gradient weights from each previous time step to get $(1-\\beta_1) \\sum_{i=1}^t \\beta_1^{t-i} = 1 - \\beta_1^t$. Notice that when $t$ is small, the sum of the mini-batch stochastic gradient weights from each previous time step will be small. For example, when $\\beta_1 = 0.9$, $\\boldsymbol{v}_1 = 0.1\\boldsymbol{g}_1$. To eliminate this effect, for any time step $t$, we can divide $\\boldsymbol{v}_t$ by $1 - \\beta_1^t$, so that the sum of the mini-batch stochastic gradient weights from each previous time step is 1. This is also called bias correction. In the Adam algorithm, we perform bias corrections for variables $\\boldsymbol{v}_t$ and $\\boldsymbol{s}_t$:\n", 24 | "\n", 25 | "$$\\hat{\\boldsymbol{v}}_t \\leftarrow \\frac{\\boldsymbol{v}_t}{1 - \\beta_1^t}, $$\n", 26 | "\n", 27 | "$$\\hat{\\boldsymbol{s}}_t \\leftarrow \\frac{\\boldsymbol{s}_t}{1 - \\beta_2^t}. $$\n", 28 | "\n", 29 | "\n", 30 | "Next, the Adam algorithm will use the bias-corrected variables $\\hat{\\boldsymbol{v}}_t$ and $\\hat{\\boldsymbol{s}}_t$ from above to re-adjust the learning rate of each element in the model parameters using element operations.\n", 31 | "\n", 32 | "$$\\boldsymbol{g}_t' \\leftarrow \\frac{\\eta \\hat{\\boldsymbol{v}}_t}{\\sqrt{\\hat{\\boldsymbol{s}}_t} + \\epsilon},$$\n", 33 | "\n", 34 | "Here, $\\eta$ is the learning rate while $\\epsilon$ is a constant added to maintain numerical stability, such as $10^{-8}$. Just as for Adagrad, RMSProp, and Adadelta, each element in the independent variable of the objective function has its own learning rate. Finally, use $\\boldsymbol{g}_t'$ to iterate the independent variable:\n", 35 | "\n", 36 | "$$\\boldsymbol{x}_t \\leftarrow \\boldsymbol{x}_{t-1} - \\boldsymbol{g}_t'. $$\n", 37 | "\n", 38 | "## Implementation from Scratch\n", 39 | "\n", 40 | "We use the formula from the algorithm to implement Adam. Here, time step $t$ uses `hyperparams` to input parameters to the `adam` function." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": { 47 | "attributes": { 48 | "classes": [], 49 | "id": "", 50 | "n": "2" 51 | } 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "# Install dependencies before importing\n", 56 | "!pip install mxnet-cu100\n", 57 | "!pip install d2l\n", 58 | "\n", 59 | "import sys\n", 60 | "sys.path.insert(0, '..')\n", 61 | "\n", 62 | "%matplotlib inline\n", 63 | "import d2l\n", 64 | "from mxnet import nd\n", 65 | "\n", 66 | "features, labels = d2l.get_data_ch7()\n", 67 | "\n", 68 | "def init_adam_states():\n", 69 | " v_w, v_b = nd.zeros((features.shape[1], 1)), nd.zeros(1)\n", 70 | " s_w, s_b = nd.zeros((features.shape[1], 1)), nd.zeros(1)\n", 71 | " return ((v_w, s_w), (v_b, s_b))\n", 72 | "\n", 73 | "def adam(params, states, hyperparams):\n", 74 | " beta1, beta2, eps = 0.9, 0.999, 1e-6\n", 75 | " for p, (v, s) in zip(params, states):\n", 76 | " v[:] = beta1 * v + (1 - beta1) * p.grad\n", 77 | " s[:] = beta2 * s + (1 - beta2) * p.grad.square()\n", 78 | " v_bias_corr = v / (1 - beta1 ** hyperparams['t'])\n", 79 | " s_bias_corr = s / (1 - beta2 ** hyperparams['t'])\n", 80 | " p[:] -= hyperparams['lr'] * v_bias_corr / (s_bias_corr.sqrt() + eps)\n", 81 | " hyperparams['t'] += 1" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "Use Adam to train the model with a learning rate of $0.01$." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 5, 94 | "metadata": { 95 | "attributes": { 96 | "classes": [], 97 | "id": "", 98 | "n": "5" 99 | } 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "d2l.train_ch9(adam, init_adam_states(), {'lr': 0.01, 't': 1}, features,\n", 104 | " labels)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## Concise Implementation\n", 112 | "\n", 113 | "From the `Trainer` instance of the algorithm named \"adam\", we can implement Adam with Gluon." 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 11, 119 | "metadata": { 120 | "attributes": { 121 | "classes": [], 122 | "id": "", 123 | "n": "11" 124 | } 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "d2l.train_gluon_ch9('adam', {'learning_rate': 0.01}, features, labels)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "## Summary\n", 136 | "\n", 137 | "* Created on the basis of RMSProp, Adam also uses EWMA on the mini-batch stochastic gradient\n", 138 | "* Adam uses bias correction.\n", 139 | "\n", 140 | "## Exercises\n", 141 | "\n", 142 | "* Adjust the learning rate and observe and analyze the experimental results.\n", 143 | "* Some people say that Adam is a combination of RMSProp and momentum. Why do you think they say this?\n", 144 | "\n", 145 | "\n", 146 | "\n", 147 | "\n", 148 | "## Reference\n", 149 | "\n", 150 | "[1] Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980.\n", 151 | "\n", 152 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2378)\n", 153 | "\n", 154 | "\"\"" 155 | ] 156 | } 157 | ], 158 | "metadata": { 159 | "accelerator": "GPU", 160 | "language_info": { 161 | "name": "python" 162 | } 163 | }, 164 | "nbformat": 4, 165 | "nbformat_minor": 2 166 | } -------------------------------------------------------------------------------- /chapter_optimization/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Optimization Algorithms\n", 8 | "\n", 9 | "If you read the book in sequence up to this point you already used a number of advanced optimization algorithms to train deep learning models. They were the tools that allowed us to continue updating model parameters and to minimize the value of the loss function, as evaluated on the training set. Indeed, anyone content with treating optimization as a black box device to minimize objective functions in a simple setting might well content oneself with the knowledge that there exists an array of incantations of such a procedure (with names such as 'Adam', 'NAG', or 'SGD'). \n", 10 | "\n", 11 | "To do well, however, some deeper knowledge is required. \n", 12 | "Optimization algorithms are important for deep learning. On one hand, training a complex deep learning model can take hours, days, or even weeks. The performance of the optimization algorithm directly affects the model's training efficiency. On the other hand, understanding the principles of different optimization algorithms and the role of their parameters will enable us to tune the hyperparameters in a targeted manner to improve the performance of deep learning models. \n", 13 | "\n", 14 | "In this chapter, we explore common deep learning optimization algorithms in depth. Almost all optimization problems arising in deep learning are *nonconvex*. Nonetheless, the design and analysis of algorithms in the context of convex problems has proven to be very instructive. It is for that reason that this section includes a primer on convex optimization and the proof for a very simple stochastic gradient descent algorithm on a convex objective function." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "```eval_rst\n", 22 | "\n", 23 | ".. toctree::\n", 24 | " :maxdepth: 2\n", 25 | "\n", 26 | " optimization-intro\n", 27 | " gd-sgd\n", 28 | " minibatch-sgd\n", 29 | " momentum\n", 30 | " adagrad\n", 31 | " rmsprop\n", 32 | " adadelta\n", 33 | " adam\n", 34 | "```\n" 35 | ] 36 | } 37 | ], 38 | "metadata": { 39 | "accelerator": "GPU", 40 | "language_info": { 41 | "name": "python" 42 | } 43 | }, 44 | "nbformat": 4, 45 | "nbformat_minor": 2 46 | } -------------------------------------------------------------------------------- /chapter_optimization/rmsprop.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# RMSProp\n", 8 | "\n", 9 | "In the experiment in the [\"Adagrad\"](adagrad.md) section, the learning rate of each element in the independent variable of the objective function declines (or remains unchanged) during iteration because the variable $\\boldsymbol{s}_t$ in the denominator is increased by the square by element operation of the mini-batch stochastic gradient, adjusting the learning rate. Therefore, when the learning rate declines very fast during early iteration, yet the current solution is still not desirable, Adagrad might have difficulty finding a useful solution because the learning rate will be too small at later stages of iteration. To tackle this problem, the RMSProp algorithm made a small modification to Adagrad[1].\n", 10 | "\n", 11 | "## The Algorithm\n", 12 | "\n", 13 | "We introduced EWMA (exponentially weighted moving average) in the [\"Momentum\"](momentum.md) section. Unlike in Adagrad, the state variable $\\boldsymbol{s}_t$ is the sum of the square by element all the mini-batch stochastic gradients $\\boldsymbol{g}_t$ up to the time step $t$, RMSProp uses the EWMA on the square by element results of these gradients. Specifically, given the hyperparameter $0 \\leq \\gamma < 1$, RMSProp is computed at time step $t>0$.\n", 14 | "\n", 15 | "$$\\boldsymbol{s}_t \\leftarrow \\gamma \\boldsymbol{s}_{t-1} + (1 - \\gamma) \\boldsymbol{g}_t \\odot \\boldsymbol{g}_t. $$\n", 16 | "\n", 17 | "Like Adagrad, RMSProp re-adjusts the learning rate of each element in the independent variable of the objective function with element operations and then updates the independent variable.\n", 18 | "\n", 19 | "$$\\boldsymbol{x}_t \\leftarrow \\boldsymbol{x}_{t-1} - \\frac{\\eta}{\\sqrt{\\boldsymbol{s}_t + \\epsilon}} \\odot \\boldsymbol{g}_t, $$\n", 20 | "\n", 21 | "Here, $\\eta$ is the learning rate while $\\epsilon$ is a constant added to maintain numerical stability, such as $10^{-6}$. Because the state variable of RMSProp is an EWMA of the squared term $\\boldsymbol{g}_t \\odot \\boldsymbol{g}_t$, it can be seen as the weighted average of the mini-batch stochastic gradient's squared terms from the last $1/(1-\\gamma)$ time steps. Therefore, the learning rate of each element in the independent variable will not always decline (or remain unchanged) during iteration.\n", 22 | "\n", 23 | "By convention, we will use the objective function $f(\\boldsymbol{x})=0.1x_1^2+2x_2^2$ to observe the iterative trajectory of the independent variable in RMSProp. Recall that in the [\"Adagrad\"](adagrad.md) section, when we used Adagrad with a learning rate of 0.4, the independent variable moved less in later stages of iteration. However, at the same learning rate, RMSProp can approach the optimal solution faster." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": { 30 | "attributes": { 31 | "classes": [], 32 | "id": "", 33 | "n": "3" 34 | } 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "# Install dependencies before importing\n", 39 | "!pip install mxnet-cu100\n", 40 | "!pip install d2l\n", 41 | "\n", 42 | "import sys\n", 43 | "sys.path.insert(0, '..')\n", 44 | "\n", 45 | "%matplotlib inline\n", 46 | "import d2l\n", 47 | "import math\n", 48 | "from mxnet import nd\n", 49 | "\n", 50 | "def rmsprop_2d(x1, x2, s1, s2):\n", 51 | " g1, g2, eps = 0.2 * x1, 4 * x2, 1e-6\n", 52 | " s1 = gamma * s1 + (1 - gamma) * g1 ** 2\n", 53 | " s2 = gamma * s2 + (1 - gamma) * g2 ** 2\n", 54 | " x1 -= eta / math.sqrt(s1 + eps) * g1\n", 55 | " x2 -= eta / math.sqrt(s2 + eps) * g2\n", 56 | " return x1, x2, s1, s2\n", 57 | "\n", 58 | "def f_2d(x1, x2):\n", 59 | " return 0.1 * x1 ** 2 + 2 * x2 ** 2\n", 60 | "\n", 61 | "eta, gamma = 0.4, 0.9\n", 62 | "d2l.show_trace_2d(f_2d, d2l.train_2d(rmsprop_2d))" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "## Implementation from Scratch\n", 70 | "\n", 71 | "Next, we implement RMSProp with the formula in the algorithm." 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 22, 77 | "metadata": { 78 | "attributes": { 79 | "classes": [], 80 | "id": "", 81 | "n": "22" 82 | } 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "\n", 87 | "features, labels = d2l.get_data_ch7()\n", 88 | "def init_rmsprop_states():\n", 89 | " s_w = nd.zeros((features.shape[1], 1))\n", 90 | " s_b = nd.zeros(1)\n", 91 | " return (s_w, s_b)\n", 92 | "\n", 93 | "def rmsprop(params, states, hyperparams):\n", 94 | " gamma, eps = hyperparams['gamma'], 1e-6\n", 95 | " for p, s in zip(params, states):\n", 96 | " s[:] = gamma * s + (1 - gamma) * p.grad.square()\n", 97 | " p[:] -= hyperparams['lr'] * p.grad / (s + eps).sqrt()" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "We set the initial learning rate to 0.01 and the hyperparameter $\\gamma$ to 0.9. Now, the variable $\\boldsymbol{s}_t$ can be treated as the weighted average of the square term $\\boldsymbol{g}_t \\odot \\boldsymbol{g}_t$ from the last $1/(1-0.9) = 10$ time steps." 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 24, 110 | "metadata": { 111 | "attributes": { 112 | "classes": [], 113 | "id": "", 114 | "n": "24" 115 | } 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "d2l.train_ch9(rmsprop, init_rmsprop_states(), {'lr': 0.01, 'gamma': 0.9},\n", 120 | " features, labels)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "## Concise Implementation\n", 128 | "\n", 129 | "From the `Trainer` instance of the algorithm named \"rmsprop\", we can implement the RMSProp algorithm with Gluon to train models. Note that the hyperparameter $\\gamma$ is assigned by `gamma1`." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 29, 135 | "metadata": { 136 | "attributes": { 137 | "classes": [], 138 | "id": "", 139 | "n": "29" 140 | } 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "d2l.train_gluon_ch9('rmsprop', {'learning_rate': 0.01, 'gamma1': 0.9},\n", 145 | " features, labels)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Summary\n", 153 | "\n", 154 | "* The difference between RMSProp and Adagrad is that RMSProp uses an EWMA on the squares of elements in the mini-batch stochastic gradient to adjust the learning rate.\n", 155 | "\n", 156 | "## Exercises\n", 157 | "\n", 158 | "* What happens to the experimental results if we set the value of $\\gamma$ to 1? Why?\n", 159 | "* Try using other combinations of initial learning rates and $\\gamma$ hyperparameters and observe and analyze the experimental results.\n", 160 | "\n", 161 | "\n", 162 | "\n", 163 | "\n", 164 | "## Reference\n", 165 | "\n", 166 | "[1] Tieleman, T., & Hinton, G. (2012). Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude. COURSERA: Neural networks for machine learning, 4(2), 26-31.\n", 167 | "\n", 168 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2376)\n", 169 | "\n", 170 | "\"\"" 171 | ] 172 | } 173 | ], 174 | "metadata": { 175 | "accelerator": "GPU", 176 | "language_info": { 177 | "name": "python" 178 | } 179 | }, 180 | "nbformat": 4, 181 | "nbformat_minor": 2 182 | } -------------------------------------------------------------------------------- /chapter_recurrent-neural-networks/deep-rnn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Deep Recurrent Neural Networks\n", 8 | "\n", 9 | "Up to now, we only discussed recurrent neural networks with a single unidirectional hidden layer. In it the specific functional form of how latent variables and observations interact was rather arbitrary. This isn't a big problem as long as we have enough flexibility to model different types of interactions. With a single layer, however, this can be quite challenging. In the case of the perceptron we fixed this problem by adding more layers. Within RNNs this is a bit more tricky, since we first need to decide how and where to add extra nonlinearity. Our discussion below focuses primarily on LSTMs but it applies to other sequence models, too.\n", 10 | "\n", 11 | "* We could add extra nonlinearity to the gating mechansims. That is, instead of using a single perceptron we could use multiple layers. This leaves the *mechanism* of the LSTM unchanged. Instead it makes it more sophisticated. This would make sense if we were led to believe that the LSTM mechanism describes some form of universal truth of how latent variable autoregressive models work. \n", 12 | "* We could stack multiple layers of LSTMs on top of each other. This results in a mechanism that is more flexible, due to the combination of several simple layers. In particular, data might be relevant at different levels of the stack. For instance, we might want to keep high-level data about financial market conditions (bear or bull market) available at a high level, whereas at a lower level we only record shorter-term temporal dynamics. \n", 13 | "\n", 14 | "Beyond all this abstract discussion it is probably easiest to understand the family of models we are interested in by reviewing the diagram below. It describes a deep recurrent neural network with $L$ hidden layers. Each hidden state is continuously passed to the next time step of the current layer and the next layer of the current time step.\n", 15 | "\n", 16 | "\"\n", 17 | "\n", 18 | "## Functional Dependencies\n", 19 | "\n", 20 | "At time step $t$ we assume that we have a minibatch $\\mathbf{X}_t \\in \\mathbb{R}^{n \\times d}$ (number of examples: $n$, number of inputs: $d$). The hidden state of hidden layer $\\ell$ ($\\ell=1,\\ldots,T$) is $\\mathbf{H}_t^{(\\ell)} \\in \\mathbb{R}^{n \\times h}$ (number of hidden units: $h$), the output layer variable is $\\mathbf{O}_t \\in \\mathbb{R}^{n \\times q}$ (number of outputs: $q$) and a hidden layer activation function $f_l$ for layer $l$. We compute the hidden state of layer $1$ as before, using $\\mathbf{X}_t$ as input. For all subsequent layers the hidden state of the previous layer is used in its place.\n", 21 | "\n", 22 | "$$\\begin{aligned}\n", 23 | "\\mathbf{H}_t^{(1)} & = f_1\\left(\\mathbf{X}_t, \\mathbf{H}_{t-1}^{(1)}\\right) \\\\\n", 24 | "\\mathbf{H}_t^{(l)} & = f_l\\left(\\mathbf{H}_t^{(l-1)}, \\mathbf{H}_{t-1}^{(l)}\\right)\n", 25 | "\\end{aligned}$$\n", 26 | "\n", 27 | "Finally, the output of the output layer is only based on the hidden state of hidden layer $L$. We use the output function $g$ to address this:\n", 28 | "\n", 29 | "$$\\mathbf{O}_t = g \\left(\\mathbf{H}_t^{(L)}\\right)$$\n", 30 | "\n", 31 | "Just as with multilayer perceptrons, the number of hidden layers $L$ and number of hidden units $h$ are hyper parameters. In particular, we can pick a regular RNN, a GRU or an LSTM to implement the model.\n", 32 | "\n", 33 | "## Concise Implementation\n", 34 | "\n", 35 | "Fortunately many of the logistical details required to implement multiple layers of an RNN are readily available in Gluon. To keep things simple we only illustrate the implementation using such built-in functionality. The code is very similar to the one we used previously for LSTMs. In fact, the only difference is that we specify the number of layers explicitly rather than picking the default of a single layer. Let's begin by importing the appropriate modules and data." 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 17, 41 | "metadata": { 42 | "attributes": { 43 | "classes": [], 44 | "id": "", 45 | "n": "17" 46 | } 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "# Install dependencies before importing\n", 51 | "!pip install mxnet-cu100\n", 52 | "!pip install d2l\n", 53 | "\n", 54 | "import sys\n", 55 | "sys.path.insert(0, '..')\n", 56 | "\n", 57 | "import d2l\n", 58 | "from mxnet import nd\n", 59 | "from mxnet.gluon import rnn\n", 60 | "\n", 61 | "corpus_indices, vocab = d2l.load_data_time_machine()" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "The architectural decisions (parameters, etc.) are very similar to those of previous sections. We pick the same number of inputs and outputs as we have distinct tokens, i.e. `vocab_size`. The number of hidden units is still 256 and we retain a learning rate of 100. The only difference is that we now select a nontrivial number of layers `num_layers = 2`. Since the model is somewhat slower to train we use 3000 iterations." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 22, 74 | "metadata": { 75 | "attributes": { 76 | "classes": [], 77 | "id": "", 78 | "n": "22" 79 | } 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "num_inputs, num_hiddens, num_layers, num_outputs = len(vocab), 256, 2, len(vocab)\n", 84 | "ctx = d2l.try_gpu()\n", 85 | "num_epochs, num_steps, batch_size, lr, clipping_theta = 500, 35, 32, 5, 1\n", 86 | "prefixes = ['traveller', 'time traveller']" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "## Training\n", 94 | "\n", 95 | "The actual invocation logic is identical to before and we re-use `train_and_predict_rnn_gluon`. The only difference is that we now instantiate two layers with LSTMs. This rather more complex architecture and the large number of epochs slow down training considerably." 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 8, 101 | "metadata": { 102 | "attributes": { 103 | "classes": [], 104 | "id": "", 105 | "n": "8" 106 | } 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "lstm_layer = rnn.LSTM(hidden_size = num_hiddens, num_layers=num_layers)\n", 111 | "model = d2l.RNNModel(lstm_layer, len(vocab))\n", 112 | "d2l.train_and_predict_rnn_gluon(model, num_hiddens, corpus_indices, vocab, \n", 113 | " ctx, num_epochs, num_steps, lr, \n", 114 | " clipping_theta, batch_size, prefixes)\n" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Summary\n", 122 | "\n", 123 | "* In deep recurrent neural networks, hidden state information is passed to the next time step of the current layer and the next layer of the current time step.\n", 124 | "* There exist many different flavors of deep RNNs, such as LSTMs, GRUs or regular RNNs. Conveniently these models are all available as parts of the `rnn` module in Gluon. \n", 125 | "* Initialization of the models requires care. Overall, deep RNNs require considerable amount of work (learning rate, clipping, etc) to ensure proper convergence. \n", 126 | "\n", 127 | "## Exercises\n", 128 | "\n", 129 | "1. Try to implement a two-layer RNN from scratch using the [\"single layer implementation\"](rnn-scratch.md) we discussed in an earlier section. \n", 130 | "2. Replace the LSTM by a GRU and compare the accuracy.\n", 131 | "3. Increase the training data to include multiple books. How low can you go on the perplexity scale?\n", 132 | "4. Would you want to combine sources of different authors when modeling text? Why is this a good idea? What could go wrong?\n", 133 | "\n", 134 | "## Scan the QR Code to [Discuss](https://discuss.mxnet.io/t/2369)\n", 135 | "\n", 136 | "\"\"" 137 | ] 138 | } 139 | ], 140 | "metadata": { 141 | "accelerator": "GPU", 142 | "language_info": { 143 | "name": "python" 144 | } 145 | }, 146 | "nbformat": 4, 147 | "nbformat_minor": 2 148 | } -------------------------------------------------------------------------------- /chapter_recurrent-neural-networks/encoder-decoder.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Encoder-Decoder Architecture\n", 8 | "\n", 9 | "The encoder-decoder architecture is a neural network design pattern. In this architecture, the network is partitioned into two parts, the encoder and the decoder. The encoder's role is encoding the inputs into state, which often contains several tensors. Then the state is passed into the decoder to generate the outputs. In machine translation, the encoder transforms a source sentence, e.g. \"Hello world.\", into state, e.g. a vector, that captures its semantic information. The decoder then uses this state to generate the translated target sentence, e.g. \"Bonjour le monde.\". \n", 10 | "\n", 11 | "\"The\n", 12 | "\n", 13 | "In this section, we will show an interface to implement this encoder-decoder architecture." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": { 20 | "attributes": { 21 | "classes": [], 22 | "id": "", 23 | "n": "1" 24 | } 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "# Install dependencies before importing\n", 29 | "!pip install mxnet-cu100\n", 30 | "!pip install d2l\n", 31 | "\n", 32 | "from mxnet.gluon import nn" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Encoder\n", 40 | "\n", 41 | "The encoder is a normal neural network that takes inputs, e.g. a source sentence, to return outputs." 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": { 48 | "attributes": { 49 | "classes": [], 50 | "id": "", 51 | "n": "2" 52 | } 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "class Encoder(nn.Block):\n", 57 | " def __init__(self, **kwargs):\n", 58 | " super(Encoder, self).__init__(**kwargs)\n", 59 | "\n", 60 | " def forward(self, X):\n", 61 | " raise NotImplementedError" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## Decoder\n", 69 | "\n", 70 | "The decoder has an additional method `init_state` to parse the outputs of the encoder with possible additional information, e.g. the valid lengths of inputs, to return the state it needs. In the forward method, the decoder takes both inputs, e.g. a target sentence, and the state. It returns outputs, with potentially modified state if the encoder contains RNN layers." 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": { 77 | "attributes": { 78 | "classes": [], 79 | "id": "", 80 | "n": "3" 81 | } 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "class Decoder(nn.Block):\n", 86 | " def __init__(self, **kwargs):\n", 87 | " super(Decoder, self).__init__(**kwargs)\n", 88 | "\n", 89 | " def init_state(self, enc_outputs, *args):\n", 90 | " raise NotImplementedError\n", 91 | "\n", 92 | " def forward(self, X, state):\n", 93 | " raise NotImplementedError" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "## Model\n", 101 | "\n", 102 | "The encoder-decoder model contains both an encoder an decoder. We implement its forward method for training. It takes both encoder inputs and decoder inputs, with optional additional information. During computation, it first compute encoder outputs to initialize the decoder state, and then returns the decoder outputs." 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 4, 108 | "metadata": { 109 | "attributes": { 110 | "classes": [], 111 | "id": "", 112 | "n": "4" 113 | } 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "class EncoderDecoder(nn.Block):\n", 118 | " def __init__(self, encoder, decoder, **kwargs):\n", 119 | " super(EncoderDecoder, self).__init__(**kwargs)\n", 120 | " self.encoder = encoder\n", 121 | " self.decoder = decoder\n", 122 | "\n", 123 | " def forward(self, enc_X, dec_X, *args):\n", 124 | " enc_outputs = self.encoder(enc_X)\n", 125 | " dec_state = self.decoder.init_state(enc_outputs, *args)\n", 126 | " return self.decoder(dec_X, dec_state)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "## Summary" 134 | ] 135 | } 136 | ], 137 | "metadata": { 138 | "accelerator": "GPU", 139 | "language_info": { 140 | "name": "python" 141 | } 142 | }, 143 | "nbformat": 4, 144 | "nbformat_minor": 2 145 | } -------------------------------------------------------------------------------- /chapter_recurrent-neural-networks/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Recurrent Neural Networks\n", 8 | "\n", 9 | "So far we encountered two types of data: generic vectors and\n", 10 | "images. For the latter we designed specialized layers to take\n", 11 | "advantage of the regularity properties in them. In other words, if we\n", 12 | "were to permute the pixels in an image, it would be much more\n", 13 | "difficult to reason about its content of something that would look\n", 14 | "much like the background of a test pattern in the times of Analog TV.\n", 15 | "\n", 16 | "Most importantly, so far we tacitly assumed that our data is generated\n", 17 | "iid, i.e. independently and identically distributed, all drawn from some\n", 18 | "distribution. Unfortunately, this isn't true for most data. For\n", 19 | "instance, the words in this paragraph are written in sequence, and it\n", 20 | "would be quite difficult to decipher its meaning if they were\n", 21 | "permuted randomly. Likewise, image frames in a video, the audio signal\n", 22 | "in a conversation, or the browsing behavior on a website, all follow\n", 23 | "sequential order. It is thus only reasonable to assume that\n", 24 | "specialized models for such data will do better at describing it and\n", 25 | "at solving estimation problems.\n", 26 | "\n", 27 | "Another issue arises from the fact that we might not only receive a\n", 28 | "sequence as an input but rather might be expected to continue the\n", 29 | "sequence. For instance, the task could be to continue the series 2,\n", 30 | "4, 6, 8, 10, ... This is quite common in time series analysis, to\n", 31 | "predict the stock market, the fever curve of a patient or the\n", 32 | "acceleration needed for a race car. Again we want to have models that\n", 33 | "can handle such data.\n", 34 | "\n", 35 | "In short, while convolutional neural networks can efficiently process\n", 36 | "spatial information, recurrent neural networks are designed to better\n", 37 | "handle sequential information. These networks introduces state\n", 38 | "variables to store past information and, together with the current\n", 39 | "input, determine the current output.\n", 40 | "\n", 41 | "Many of the examples for using recurrent networks are based on text\n", 42 | "data. Hence, we will emphasize language models in this chapter. After\n", 43 | "a more formal review of sequence data we discuss basic concepts of a\n", 44 | "language model and use this discussion as the inspiration for the\n", 45 | "design of recurrent neural networks. Next, we describe the gradient\n", 46 | "calculation method in recurrent neural networks to explore problems\n", 47 | "that may be encountered in recurrent neural network training. For some\n", 48 | "of these problems, we can use gated recurrent neural networks, such as\n", 49 | "LSTMs and GRUs, described later in this chapter." 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "```eval_rst\n", 57 | "\n", 58 | ".. toctree::\n", 59 | " :maxdepth: 2\n", 60 | "\n", 61 | " sequence\n", 62 | " lang-model\n", 63 | " rnn\n", 64 | " lang-model-dataset\n", 65 | " rnn-scratch\n", 66 | " rnn-gluon\n", 67 | " bptt\n", 68 | " gru\n", 69 | " lstm\n", 70 | " deep-rnn\n", 71 | " bi-rnn\n", 72 | " machine-translation\n", 73 | " encoder-decoder\n", 74 | " seq2seq\n", 75 | "```\n" 76 | ] 77 | } 78 | ], 79 | "metadata": { 80 | "accelerator": "GPU", 81 | "language_info": { 82 | "name": "python" 83 | } 84 | }, 85 | "nbformat": 4, 86 | "nbformat_minor": 2 87 | } -------------------------------------------------------------------------------- /contrib/appendix/use_sagemaker.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Run on Amazon SageMaker\n", 8 | "\n", 9 | "This tutorial will guide you through Amazon SageMaker: a service that allows you to be up and running with MXNet in 5 minutes and to do Machine Learning at large scale in the quickest and easiest way possible.\n", 10 | "\n", 11 | "Deep Learning projects usually consist of a set of problem tasks: for instance you may have to create training datasets, train and evaluate your model, tune its hyperparameters and finally deploy the model to a production ready cluster. This workflow can be quite cumbersome and time consuming. For this reason AWS provides Amazon SageMaker a fully managed machine learning service that accelerates the overall Deep Learning workflow.\n", 12 | "\n", 13 | "This chapter will give a high level overview about Amazon SageMaker, in-depth tutorials can be found on the [Sagemaker website](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html).\n", 14 | "\n", 15 | "![Sagemaker](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/sagemaker.png)\n", 16 | "\n", 17 | "SageMaker offers Jupyter notebooks and supports MXNet out-of-the box. You can run your notebooks on CPU instances and as such profit from the free tier. However, more powerful CPU instances or GPU instances are charged by time.\n", 18 | "Within this notebook you can [fetch, explore and prepare training data](https://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-notebooks-instances.html)." 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "```\n", 26 | "import mxnet as mx\n", 27 | "import sagemaker\n", 28 | "mx.test_utils.get_cifar10() # Downloads Cifar-10 dataset to ./data\n", 29 | "sagemaker_session = sagemaker.Session()\n", 30 | "inputs = sagemaker_session.upload_data(path='data/cifar',\n", 31 | " key_prefix='data/cifar10')\n", 32 | "```\n" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "Once the data is ready, you can easily launch training via the SageMaker SDK. So there is no need to manually configure and log into EC2 instances. You can either bring your own model or use SageMaker's [built-in algorithms](https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html) that are tailored to specific use cases such as computer vision, NLP etc. SageMaker encapsulates the process of training into the class ```Estimator``` and we can now start the training on the local notebook instance:" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "```\n", 47 | "from sagemaker.mxnet import MXNet as MXNetEstimator\n", 48 | "estimator = MXNetEstimator(entry_point='train.py', \n", 49 | " role=sagemaker.get_execution_role(),\n", 50 | " train_instance_count=1, \n", 51 | " train_instance_type='local',\n", 52 | " hyperparameters={'batch_size': 1024, \n", 53 | " 'epochs': 30})\n", 54 | "estimator.fit(inputs)\n", 55 | "```\n" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "If you require a more powerful platform for training, then you only need to change the ```train_instance_type```. Once you call ```fit```, SageMaker will automatically create the required EC2 instances, train your model within a Docker container and then immediately shutdown these instances. ```Fit()``` requires an entry point (here ```train.py```) that describes the model and training loop. This script needs to provide certain functions, that will be automatically executed by SageMaker. More information about the entry point script can be found [here](https://docs.aws.amazon.com/sagemaker/latest/dg/mxnet-training-inference-code-template.html).\n", 63 | "When the model is ready for deployment you can use [SageMaker's hosting services](https://docs.aws.amazon.com/sagemaker/latest/dg/how-it-works-hosting.html) that create an HTTPS endpoint where model inference is provided." 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "```\n", 71 | "predictor = estimator.deploy(initial_instance_count=1,\n", 72 | " instance_type='ml.m4.xlarge')\n", 73 | "```\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "The following links show more advanced uses cases in SageMaker:\n", 81 | " - [Distributed training on multiple machines](https://medium.com/apache-mxnet/94-accuracy-on-cifar-10-in-10-minutes-with-amazon-sagemaker-754e441d01d7) \n", 82 | " - [Hyperparameter Tuning Jobs](https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning-ex.html)\n", 83 | " - [Optimize a model with SageMaker Neo](https://docs.aws.amazon.com/sagemaker/latest/dg/neo.html)\n", 84 | " - [Build Groundtruth Datasets](https://docs.aws.amazon.com/sagemaker/latest/dg/sms-getting-started.html)\n", 85 | " - [Getting started with SageMaker](https://medium.com/apache-mxnet/getting-started-with-sagemaker-ebe1277484c9)\n", 86 | "\n", 87 | "## Acquire the Code for this Book and activate MXNet GPU environment\n", 88 | "\n", 89 | "Next, download the code for this book and and unzip it. Go to you notebook instance and create a new cell with the following content:" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "```%%bash\n", 97 | "%%bash\n", 98 | "wget https://www.diveintodeeplearning.org/d2l-en-1.0.zip \n", 99 | "unzip d2l-en-1.0.zip \n", 100 | "```\n" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "When you open Jupyter, you will find the downloaded and unzipped chapters. Now you can pick any available notebook. \n", 108 | "![Sagemaker Notebook](https://raw.githubusercontent.com/d2l-ai/notebooks/master/img/jupyter_sagemaker.png)\n", 109 | "\n", 110 | "## Summary\n", 111 | "\n", 112 | "* You can use cloud computing services to obtain more powerful computing resources and use them to run the deep learning code in this document.\n", 113 | "\n", 114 | "## Problem\n", 115 | "\n", 116 | "* The cloud offers convenience, but it does not come cheap. Research the prices of cloud services and find ways to reduce overhead.\n", 117 | "\n", 118 | "## Discuss on our Forum\n", 119 | "\n", 120 | "[Link to the discuss thread.](https://discuss.mxnet.io/t/2399)." 121 | ] 122 | } 123 | ], 124 | "metadata": { 125 | "accelerator": "GPU", 126 | "language_info": { 127 | "name": "python" 128 | } 129 | }, 130 | "nbformat": 4, 131 | "nbformat_minor": 2 132 | } -------------------------------------------------------------------------------- /contrib/chapter_crashcourse/chapter-one-problem-set.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Problem Set \n", 8 | "\n", 9 | "\"For the things we have to learn before we can do them, we learn by doing them.\" - Aristotle\n", 10 | "\n", 11 | "There's nothing quite like working with a new tool to really understand it, so we have put together some exercises through this book to give you a chance to put into practice what you learned in the previous lesson(s). \n", 12 | "\n", 13 | "## Problems using NDarray [(Official Documentation)](https://mxnet.incubator.apache.org/api/python/ndarray/ndarray.html) \n", 14 | "\n", 15 | "\n", 16 | "Problem 1: Initialize an ndarray of dimension 1x256 on the GPU without overwriting its memory. Then, find the index corresponding to the maximum value in the array (argmax)" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "```python\n", 24 | "# Problem 1 Work Area\n", 25 | "```\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Problems from Linear Algebra\n", 33 | "\n", 34 | "Problem 2: Create a 4x4 matrix of random values (where values are uniformly random on the iterval [0,1]. Then create an 4x4 identity matrix (an identity of size n is the n × n square matrix with ones on the main diagonal and zeros elsewhere). Multiply the two together and verify that you get the original matrix back." 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "```python\n", 42 | "# Problem 2 Work Area\n", 43 | "```\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "Problem 3: Create a 3x3x20 tensor such that at every x,y coordinate, moving through the z coordinate lists the [Fibonacci sequence](https://en.wikipedia.org/wiki/Fibonacci_number). So, at a z position of 0, the 3x3 matrix will be all 1s. At z-position 1, the 3x3 matrix will be all 1s. At z-position 2, the 3x3 matrix will be all 2s, at z-position 3, the 3x3 matrix will be all 3s and so forth.\n", 51 | "\n", 52 | "Hint: Create the first 2 matrices by hand and then use element-wise operations in a loop to construct the rest of the tensor." 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "```python\n", 60 | "# Problem 3 Work Area\n", 61 | "```\n" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Problem 4: What is the sum of the vector you created? What is the mean?" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "```python\n", 76 | "# Problem 4 Work Area\n", 77 | "```\n" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "Problem 5: Create a vector [0,1], and another vector [1,0], and use mxnet to calculate the angle between them. Remember that the dot product of two vectors is equal to the cosine of the angle between the vectors, and that the arccos function is the inverse of cosine." 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "```python\n", 92 | "# Problem 5 Work Area\n", 93 | "```\n" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "## Problems from Probability\n", 101 | "\n", 102 | "Problem 6: In the classic game of Risk, the attacker can roll a maximum of three dice, while the defender can roll a maximum of two dice. Simulate the attacking and defending dice using `sample_multinomial` to try to estimate the odds that an attacker will win against a defender when both are rolling the maximum number of dice." 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "```python\n", 110 | "# Problem 6 Work Area\n", 111 | "```\n" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "## Problems from Automatic differentiation with ``autograd`` \n", 119 | "\n", 120 | "Problem 7: The formula for a parabola is y=ax^2+bx+c. If a=5 and b = 13, what is the slope of y when x=0. How about when x=7?" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "```python\n", 128 | "# Problem 7 Work Area\n", 129 | "```\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "Problem 8: Graph the parabola described in Problem 6 and inspect the slope of y when x = 0 and x = 7. Does it match up with your answer from Problem 6?" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "```python\n", 144 | "# Problem 8 Work Area\n", 145 | "```\n" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Next\n", 153 | "[Chapter 2: Linear regression from scratch](../chapter02_supervised-learning/linear-regression-scratch.ipynb)\n", 154 | "\n", 155 | "For whinges or inquiries, [open an issue on GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)" 156 | ] 157 | } 158 | ], 159 | "metadata": { 160 | "accelerator": "GPU", 161 | "language_info": { 162 | "name": "python" 163 | } 164 | }, 165 | "nbformat": 4, 166 | "nbformat_minor": 2 167 | } -------------------------------------------------------------------------------- /data/fr-en-small.txt: -------------------------------------------------------------------------------- 1 | elle est vieille . she is old . 2 | elle est tranquille . she is quiet . 3 | elle a tort . she is wrong . 4 | elle est canadienne . she is canadian . 5 | elle est japonaise . she is japanese . 6 | ils sont russes . they are russian . 7 | ils se disputent . they are arguing . 8 | ils regardent . they are watching . 9 | ils sont acteurs . they are actors . 10 | elles sont crevees . they are exhausted . 11 | il est mon genre ! he is my type ! 12 | il a des ennuis . he is in trouble . 13 | c est mon frere . he is my brother . 14 | c est mon oncle . he is my uncle . 15 | il a environ mon age . he is about my age . 16 | elles sont toutes deux bonnes . they are both good . 17 | elle est bonne nageuse . she is a good swimmer . 18 | c est une personne adorable . he is a lovable person . 19 | il fait du velo . he is riding a bicycle . 20 | ils sont de grands amis . they are great friends . 21 | -------------------------------------------------------------------------------- /data/jaychou_lyrics.txt.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/data/jaychou_lyrics.txt.zip -------------------------------------------------------------------------------- /data/kaggle_cifar10/test_tiny.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/data/kaggle_cifar10/test_tiny.zip -------------------------------------------------------------------------------- /data/kaggle_cifar10/trainLabels.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/data/kaggle_cifar10/trainLabels.csv.zip -------------------------------------------------------------------------------- /data/kaggle_cifar10/train_tiny.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/data/kaggle_cifar10/train_tiny.zip -------------------------------------------------------------------------------- /data/kaggle_dog/train_valid_test_tiny.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/data/kaggle_dog/train_valid_test_tiny.zip -------------------------------------------------------------------------------- /data/ptb.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/data/ptb.zip -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: gluon 2 | dependencies: 3 | - python=3.6 4 | - jupyter=1.0.0 5 | - matplotlib=2.2.2 6 | - pandas=0.23.4 7 | - pip: 8 | - mxnet==1.4.0 9 | -------------------------------------------------------------------------------- /img/404.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/404.jpg -------------------------------------------------------------------------------- /img/DenseNetDense.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/DenseNetDense.png -------------------------------------------------------------------------------- /img/Neuron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/Neuron.png -------------------------------------------------------------------------------- /img/ResNetBlock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/ResNetBlock.png -------------------------------------------------------------------------------- /img/ResNetFull.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/ResNetFull.png -------------------------------------------------------------------------------- /img/ResNetManyFlavor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/ResNetManyFlavor.png -------------------------------------------------------------------------------- /img/alexnet-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/alexnet-all.png -------------------------------------------------------------------------------- /img/anchor-label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/anchor-label.png -------------------------------------------------------------------------------- /img/attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/attention.png -------------------------------------------------------------------------------- /img/autumn_oak.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/autumn_oak.jpg -------------------------------------------------------------------------------- /img/aws.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/aws.png -------------------------------------------------------------------------------- /img/beam_search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/beam_search.png -------------------------------------------------------------------------------- /img/birnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/birnn.png -------------------------------------------------------------------------------- /img/blocks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/blocks.png -------------------------------------------------------------------------------- /img/book-org.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/book-org.png -------------------------------------------------------------------------------- /img/capacity_vs_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/capacity_vs_error.png -------------------------------------------------------------------------------- /img/cat-cartoon1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/cat-cartoon1.png -------------------------------------------------------------------------------- /img/cat-cartoon2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/cat-cartoon2.png -------------------------------------------------------------------------------- /img/cat1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/cat1.jpg -------------------------------------------------------------------------------- /img/cat1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/cat1.png -------------------------------------------------------------------------------- /img/cat2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/cat2.jpg -------------------------------------------------------------------------------- /img/cat3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/cat3.jpg -------------------------------------------------------------------------------- /img/catdog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/catdog.jpg -------------------------------------------------------------------------------- /img/cbow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/cbow.png -------------------------------------------------------------------------------- /img/cifar10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/cifar10.png -------------------------------------------------------------------------------- /img/comp-comm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/comp-comm.png -------------------------------------------------------------------------------- /img/connect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/connect.png -------------------------------------------------------------------------------- /img/contrib01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/contrib01.png -------------------------------------------------------------------------------- /img/contrib02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/contrib02.png -------------------------------------------------------------------------------- /img/contrib03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/contrib03.png -------------------------------------------------------------------------------- /img/contrib04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/contrib04.png -------------------------------------------------------------------------------- /img/contrib05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/contrib05.png -------------------------------------------------------------------------------- /img/contrib06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/contrib06.png -------------------------------------------------------------------------------- /img/contribute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/contribute.png -------------------------------------------------------------------------------- /img/conv1d-2d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/conv1d-2d.png -------------------------------------------------------------------------------- /img/conv1d-channel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/conv1d-channel.png -------------------------------------------------------------------------------- /img/conv1d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/conv1d.png -------------------------------------------------------------------------------- /img/conv_1x1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/conv_1x1.png -------------------------------------------------------------------------------- /img/conv_multi_in.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/conv_multi_in.png -------------------------------------------------------------------------------- /img/conv_pad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/conv_pad.png -------------------------------------------------------------------------------- /img/conv_stride.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/conv_stride.png -------------------------------------------------------------------------------- /img/convert.sh: -------------------------------------------------------------------------------- 1 | ls *.pdf | while read f; do pdf2svg $f ${f%.pdf}.svg; done 2 | -------------------------------------------------------------------------------- /img/copyto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/copyto.png -------------------------------------------------------------------------------- /img/correlation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/correlation.png -------------------------------------------------------------------------------- /img/cuda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/cuda.png -------------------------------------------------------------------------------- /img/data-collection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/data-collection.png -------------------------------------------------------------------------------- /img/data-parallel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/data-parallel.png -------------------------------------------------------------------------------- /img/death_cap.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/death_cap.jpg -------------------------------------------------------------------------------- /img/deep-rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/deep-rnn.png -------------------------------------------------------------------------------- /img/deeplearning_amazon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/deeplearning_amazon.png -------------------------------------------------------------------------------- /img/dense-rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/dense-rnn.png -------------------------------------------------------------------------------- /img/densenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/densenet.png -------------------------------------------------------------------------------- /img/disk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/disk.png -------------------------------------------------------------------------------- /img/dog-cartoon1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/dog-cartoon1.png -------------------------------------------------------------------------------- /img/dog-cartoon2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/dog-cartoon2.jpg -------------------------------------------------------------------------------- /img/dog-cartoon2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/dog-cartoon2.png -------------------------------------------------------------------------------- /img/dog1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/dog1.jpg -------------------------------------------------------------------------------- /img/dog2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/dog2.jpg -------------------------------------------------------------------------------- /img/dog_hotdog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/dog_hotdog.jpg -------------------------------------------------------------------------------- /img/dogdogcat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/dogdogcat.png -------------------------------------------------------------------------------- /img/dropout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/dropout.png -------------------------------------------------------------------------------- /img/dropout2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/dropout2.png -------------------------------------------------------------------------------- /img/ec2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/ec2.png -------------------------------------------------------------------------------- /img/encoder-decoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/encoder-decoder.png -------------------------------------------------------------------------------- /img/fast-rcnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/fast-rcnn.png -------------------------------------------------------------------------------- /img/faster-rcnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/faster-rcnn.png -------------------------------------------------------------------------------- /img/fcn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/fcn.png -------------------------------------------------------------------------------- /img/filters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/filters.png -------------------------------------------------------------------------------- /img/finetune.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/finetune.png -------------------------------------------------------------------------------- /img/flopsvsprice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/flopsvsprice.png -------------------------------------------------------------------------------- /img/forward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/forward.png -------------------------------------------------------------------------------- /img/ftse100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/ftse100.png -------------------------------------------------------------------------------- /img/functionclasses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/functionclasses.png -------------------------------------------------------------------------------- /img/git-clone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/git-clone.png -------------------------------------------------------------------------------- /img/git-createpr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/git-createpr.png -------------------------------------------------------------------------------- /img/git-fork.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/git-fork.png -------------------------------------------------------------------------------- /img/git-forked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/git-forked.png -------------------------------------------------------------------------------- /img/git-newpr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/git-newpr.png -------------------------------------------------------------------------------- /img/gru_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/gru_1.png -------------------------------------------------------------------------------- /img/gru_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/gru_2.png -------------------------------------------------------------------------------- /img/gru_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/gru_3.png -------------------------------------------------------------------------------- /img/gtx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/gtx.png -------------------------------------------------------------------------------- /img/hi-softmax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/hi-softmax.png -------------------------------------------------------------------------------- /img/hmm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/hmm.png -------------------------------------------------------------------------------- /img/house_pricing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/house_pricing.png -------------------------------------------------------------------------------- /img/inception-full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/inception-full.png -------------------------------------------------------------------------------- /img/inception.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/inception.png -------------------------------------------------------------------------------- /img/intersect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/intersect.png -------------------------------------------------------------------------------- /img/iou.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/iou.png -------------------------------------------------------------------------------- /img/jupyter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/jupyter.png -------------------------------------------------------------------------------- /img/jupyter00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/jupyter00.png -------------------------------------------------------------------------------- /img/jupyter01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/jupyter01.png -------------------------------------------------------------------------------- /img/jupyter02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/jupyter02.png -------------------------------------------------------------------------------- /img/jupyter03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/jupyter03.png -------------------------------------------------------------------------------- /img/jupyter04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/jupyter04.png -------------------------------------------------------------------------------- /img/jupyter05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/jupyter05.png -------------------------------------------------------------------------------- /img/jupyter06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/jupyter06.png -------------------------------------------------------------------------------- /img/kaggle-dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/kaggle-dog.png -------------------------------------------------------------------------------- /img/kaggle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/kaggle.png -------------------------------------------------------------------------------- /img/kaggle_cifar10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/kaggle_cifar10.png -------------------------------------------------------------------------------- /img/kaggle_submit2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/kaggle_submit2.png -------------------------------------------------------------------------------- /img/keypair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/keypair.png -------------------------------------------------------------------------------- /img/koebel.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/koebel.jpg -------------------------------------------------------------------------------- /img/launching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/launching.png -------------------------------------------------------------------------------- /img/lenet-vert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/lenet-vert.png -------------------------------------------------------------------------------- /img/lenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/lenet.png -------------------------------------------------------------------------------- /img/limits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/limits.png -------------------------------------------------------------------------------- /img/linearregression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/linearregression.png -------------------------------------------------------------------------------- /img/linreg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/linreg.png -------------------------------------------------------------------------------- /img/lstm_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/lstm_0.png -------------------------------------------------------------------------------- /img/lstm_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/lstm_1.png -------------------------------------------------------------------------------- /img/lstm_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/lstm_2.png -------------------------------------------------------------------------------- /img/lstm_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/lstm_3.png -------------------------------------------------------------------------------- /img/mask-rcnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/mask-rcnn.png -------------------------------------------------------------------------------- /img/ml-loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/ml-loop.png -------------------------------------------------------------------------------- /img/mlp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/mlp.png -------------------------------------------------------------------------------- /img/multi-head-attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/multi-head-attention.png -------------------------------------------------------------------------------- /img/mxnet-website.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/mxnet-website.png -------------------------------------------------------------------------------- /img/neural-style-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/neural-style-1.png -------------------------------------------------------------------------------- /img/neural-style-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/neural-style-2.png -------------------------------------------------------------------------------- /img/neural-style.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/neural-style.png -------------------------------------------------------------------------------- /img/nin-compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/nin-compare.png -------------------------------------------------------------------------------- /img/nin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/nin.png -------------------------------------------------------------------------------- /img/ones_like.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/ones_like.png -------------------------------------------------------------------------------- /img/os.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/os.png -------------------------------------------------------------------------------- /img/p2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/p2x.png -------------------------------------------------------------------------------- /img/pikachu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/pikachu.jpg -------------------------------------------------------------------------------- /img/pooling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/pooling.png -------------------------------------------------------------------------------- /img/popvssoda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/popvssoda.png -------------------------------------------------------------------------------- /img/r-cnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/r-cnn.png -------------------------------------------------------------------------------- /img/rainier.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/rainier.jpg -------------------------------------------------------------------------------- /img/ratbrain.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/ratbrain.jpg -------------------------------------------------------------------------------- /img/residual-block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/residual-block.png -------------------------------------------------------------------------------- /img/residual-rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/residual-rnn.png -------------------------------------------------------------------------------- /img/rl-environment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/rl-environment.png -------------------------------------------------------------------------------- /img/rnn-bptt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/rnn-bptt.png -------------------------------------------------------------------------------- /img/rnn-train.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/rnn-train.png -------------------------------------------------------------------------------- /img/rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/rnn.png -------------------------------------------------------------------------------- /img/roi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/roi.png -------------------------------------------------------------------------------- /img/s2s_prob1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/s2s_prob1.png -------------------------------------------------------------------------------- /img/s2s_prob2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/s2s_prob2.png -------------------------------------------------------------------------------- /img/segmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/segmentation.png -------------------------------------------------------------------------------- /img/self-attention-predict.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/self-attention-predict.png -------------------------------------------------------------------------------- /img/self-attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/self-attention.png -------------------------------------------------------------------------------- /img/seq2seq-attention-details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/seq2seq-attention-details.png -------------------------------------------------------------------------------- /img/seq2seq-details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/seq2seq-details.png -------------------------------------------------------------------------------- /img/seq2seq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/seq2seq.png -------------------------------------------------------------------------------- /img/seq2seq_attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/seq2seq_attention.png -------------------------------------------------------------------------------- /img/seq2seq_predict.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/seq2seq_predict.png -------------------------------------------------------------------------------- /img/sharks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/sharks.png -------------------------------------------------------------------------------- /img/singlelayer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/singlelayer.png -------------------------------------------------------------------------------- /img/singleneuron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/singleneuron.png -------------------------------------------------------------------------------- /img/skip-gram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/skip-gram.png -------------------------------------------------------------------------------- /img/softmaxreg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/softmaxreg.png -------------------------------------------------------------------------------- /img/speech.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/speech.png -------------------------------------------------------------------------------- /img/ssd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/ssd.png -------------------------------------------------------------------------------- /img/ssh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/ssh.png -------------------------------------------------------------------------------- /img/stackedanimals.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/stackedanimals.jpg -------------------------------------------------------------------------------- /img/stopterminate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/stopterminate.png -------------------------------------------------------------------------------- /img/style-transfer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/style-transfer.png -------------------------------------------------------------------------------- /img/supervised-learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/supervised-learning.png -------------------------------------------------------------------------------- /img/taxonomy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/taxonomy.jpg -------------------------------------------------------------------------------- /img/textcnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/textcnn.png -------------------------------------------------------------------------------- /img/timemachine-5gram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/timemachine-5gram.png -------------------------------------------------------------------------------- /img/transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/transformer.png -------------------------------------------------------------------------------- /img/truncated-bptt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/truncated-bptt.png -------------------------------------------------------------------------------- /img/vgg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/vgg.png -------------------------------------------------------------------------------- /img/wake-word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/wake-word.png -------------------------------------------------------------------------------- /img/waldo-mask.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/waldo-mask.jpg -------------------------------------------------------------------------------- /img/waldo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/waldo.jpg -------------------------------------------------------------------------------- /img/wattvsprice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/wattvsprice.png -------------------------------------------------------------------------------- /img/where-wally-walker-books.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/where-wally-walker-books.jpg -------------------------------------------------------------------------------- /img/whitecat10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitecat10.jpg -------------------------------------------------------------------------------- /img/whitecat160.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitecat160.jpg -------------------------------------------------------------------------------- /img/whitecat20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitecat20.jpg -------------------------------------------------------------------------------- /img/whitecat320.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitecat320.jpg -------------------------------------------------------------------------------- /img/whitecat40.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitecat40.jpg -------------------------------------------------------------------------------- /img/whitecat80.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitecat80.jpg -------------------------------------------------------------------------------- /img/whitedog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitedog.jpg -------------------------------------------------------------------------------- /img/whitedog10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitedog10.jpg -------------------------------------------------------------------------------- /img/whitedog160.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitedog160.jpg -------------------------------------------------------------------------------- /img/whitedog20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitedog20.jpg -------------------------------------------------------------------------------- /img/whitedog320.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitedog320.jpg -------------------------------------------------------------------------------- /img/whitedog40.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitedog40.jpg -------------------------------------------------------------------------------- /img/whitedog80.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/notebooks/cec6fdc9e9aa0c8fd9b2f7b2cd1b79d4a6c139d6/img/whitedog80.jpg --------------------------------------------------------------------------------