├── .gitattributes ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── build ├── 404.md ├── Makefile ├── _static │ ├── gluon.css │ ├── gluon.png │ ├── gluon_s2.png │ ├── gluon_white.png │ └── google_analytics.js ├── build.sh ├── build.yml ├── conf.py ├── index.rst └── md2ipynb.py ├── chapter01_crashcourse ├── autograd.ipynb ├── chapter-one-problem-set.ipynb ├── introduction.ipynb ├── linear-algebra.ipynb ├── ndarray.ipynb ├── preface.ipynb └── probability.ipynb ├── chapter02_supervised-learning ├── environment.ipynb ├── linear-regression-gluon.ipynb ├── linear-regression-scratch.ipynb ├── logistic-regression-gluon.ipynb ├── perceptron.ipynb ├── regularization-gluon.ipynb ├── regularization-scratch.ipynb ├── softmax-regression-gluon.ipynb └── softmax-regression-scratch.ipynb ├── chapter03_deep-neural-networks ├── custom-layer.ipynb ├── kaggle-gluon-kfold.ipynb ├── mlp-dropout-gluon.ipynb ├── mlp-dropout-scratch.ipynb ├── mlp-gluon.ipynb ├── mlp-scratch.ipynb ├── plumbing.ipynb └── serialization.ipynb ├── chapter04_convolutional-neural-networks ├── cnn-batch-norm-gluon.ipynb ├── cnn-batch-norm-scratch.ipynb ├── cnn-gluon.ipynb ├── cnn-scratch.ipynb ├── deep-cnns-alexnet.ipynb └── very-deep-nets-vgg.ipynb ├── chapter05_recurrent-neural-networks ├── gru-scratch.ipynb ├── lstm-scratch.ipynb ├── rnns-gluon.ipynb └── simple-rnn.ipynb ├── chapter06_optimization ├── adadelta-gluon.ipynb ├── adadelta-scratch.ipynb ├── adagrad-gluon.ipynb ├── adagrad-scratch.ipynb ├── adam-gluon.ipynb ├── adam-scratch.ipynb ├── gd-sgd-gluon.ipynb ├── gd-sgd-scratch.ipynb ├── momentum-gluon.ipynb ├── momentum-scratch.ipynb ├── optimization-intro.ipynb ├── rmsprop-gluon.ipynb └── rmsprop-scratch.ipynb ├── chapter07_distributed-learning ├── hybridize.ipynb ├── multiple-gpus-gluon.ipynb ├── multiple-gpus-scratch.ipynb └── training-with-multiple-machines.ipynb ├── chapter08_computer-vision ├── fine-tuning.ipynb ├── object-detection.ipynb └── visual-question-answer.ipynb ├── chapter09_natural-language-processing └── tree-lstm.ipynb ├── chapter11_recommender-systems ├── intro-recommender-systems.ipynb └── introduction-to-recommender-systems.ipynb ├── chapter12_time-series ├── intro-forecasting-2-gluon.ipynb ├── intro-forecasting-gluon.ipynb ├── issm-scratch.ipynb └── lds-scratch.ipynb ├── chapter13_unsupervised-learning ├── .gitignore └── vae-gluon.ipynb ├── chapter14_generative-adversarial-networks ├── conditional.ipynb ├── dcgan.ipynb ├── gan-intro.ipynb └── pixel2pixel.ipynb ├── chapter16_tensor_methods └── tensor_basics.ipynb ├── chapter17_deep-reinforcement-learning ├── DDQN.ipynb └── DQN.ipynb ├── chapter18_variational-methods-and-uncertainty ├── bayes-by-backprop-gluon.ipynb ├── bayes-by-backprop-rnn.ipynb └── bayes-by-backprop.ipynb ├── chapter19_graph-neural-networks └── Graph-Neural-Networks.ipynb ├── cheatsheets ├── kaggle-gluon-kfold.ipynb └── pytorch_gluon.md ├── data ├── adult │ ├── a1a.test │ └── a1a.train ├── kaggle │ ├── house_pred_test.csv │ └── house_pred_train.csv └── nlp │ ├── ptb.test.txt │ ├── ptb.train.txt │ ├── ptb.valid.txt │ ├── timemachine.txt │ └── tinyshakespeare.txt ├── docs ├── C01-install.md ├── C01-install.rst ├── C02-contribute.md ├── C02-contribute.rst └── publish.sh ├── environment.yml ├── img ├── Assault-clipped.png ├── Assault.png ├── Assualt_DDQN.png ├── Assualt_DDQN_Clipped.png ├── Pixel2pixel-Unet.png ├── bbb_nn_bayes.png ├── bbb_nn_classic.png ├── berliner.jpg ├── cat-cartoon1.png ├── cat-cartoon2.png ├── cat1.jpg ├── cat2.jpg ├── catdog.jpg ├── cgan.png ├── comic-hot-dog.png ├── data-collection.png ├── dcgan.png ├── death_cap.jpg ├── deeplearning_amazon.png ├── dist_kv.svg ├── dog-cartoon1.png ├── dog-cartoon2.jpg ├── dog1.jpg ├── dog2.jpg ├── dog_hotdog.jpg ├── dogdogcat.png ├── doughnut.jpg ├── dropout.png ├── fake_bedrooms.png ├── filters.png ├── fine-tune.png ├── gd-move.png ├── growth-2-20-girls.png ├── gtx-580-gpu.jpeg ├── house_pricing.png ├── imagenet.jpeg ├── kaggle.png ├── kaggle_submit.png ├── kaggle_submit2.png ├── leg_hotdog.jpg ├── legendre.jpeg ├── linear-regression.png ├── ml-loop.png ├── momentum-move.png ├── multi-gpu.svg ├── multi-machines.svg ├── multilayer-perceptron.png ├── mxnet_google.png ├── onelayer.graffle │ ├── data.plist │ ├── image4.pdf │ ├── image5.pdf │ └── image6.pdf ├── onelayer.png ├── operator-context.png ├── overfitting-low-data.png ├── pikachu.jpg ├── pizza.png ├── real_hotdog.jpg ├── recommended-prime-tv.png ├── recurrent-batching.png ├── recurrent-lm.png ├── recurrent-motivation.png ├── regularization-overfitting.png ├── regularization.graffle ├── regularization.png ├── rl-environment.png ├── road-cliff.jpg ├── simple-gan.png ├── simple-net-linear.png ├── simple-rnn.png ├── simple-softmax-net.png ├── sodapopcoke.png ├── speech.jpg ├── ssd.svg ├── supervised-learning.png ├── taxonomy.jpg ├── tensor_cartoon.jpg ├── tensor_contraction.png ├── tensor_fibers.png ├── training_model.png ├── wake-word.png ├── whitecat160.jpg ├── whitecat20.jpg ├── whitecat320.jpg ├── whitecat40.jpg ├── whitecat80.jpg ├── whitedog160.jpg ├── whitedog20.jpg ├── whitedog320.jpg ├── whitedog40.jpg └── whitedog80.jpg ├── media └── polly.mp3 └── proto-P02-C02.6-loss.ipynb /.gitattributes: -------------------------------------------------------------------------------- 1 | slides/ICML[[:space:]]2017.key filter=lfs diff=lfs merge=lfs -text 2 | slides/ICML[[:space:]]2017.pdf filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # some data files 2 | data/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # dotenv 86 | .env 87 | 88 | # virtualenv 89 | .venv 90 | venv/ 91 | ENV/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | _build 106 | **/.DS_Store 107 | 108 | # mxnet 109 | *.rec 110 | *.params 111 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: html 2 | 3 | build/%.ipynb: %.md 4 | @mkdir -p $(@D) 5 | cd $(@D); python ../md2ipynb.py ../../$< ../../$@ 6 | 7 | build/%.ipynb: %.ipynb 8 | @mkdir -p $(@D) 9 | @cp $< $@ 10 | 11 | build/%.md: %.md 12 | @mkdir -p $(@D) 13 | @cp $< $@ 14 | 15 | 16 | # markdown files that don't want to be converted 17 | PURE_MK = $(wildcard chapter00_preface/*.md */index.md) 18 | # markdown files that will be converted to .ipynb 19 | MK_NOTEBOOKS = $(filter-out $(MARKDOWN), $(wildcard chapter*/*.md)) 20 | # jupyter notebooks 21 | IPYNBS = $(wildcard chapter*/*.ipynb) 22 | 23 | 24 | OBJ = $(patsubst %.md, build/%.md, $(PURE_MK)) \ 25 | $(patsubst %.md, build/%.ipynb, $(MK_NOTEBOOKS)) \ 26 | $(patsubst %.ipynb, build/%.ipynb, $(IPYNBS)) 27 | 28 | ORIGN_DEPS = $(wildcard img/* data/* media/*) environment.yml README.md 29 | DEPS = $(patsubst %, build/%, $(ORIGN_DEPS)) 30 | 31 | PKG = build/_build/html/gluon_tutorials.tar.gz build/_build/html/gluon_tutorials.zip 32 | 33 | pkg: $(PKG) 34 | 35 | build/_build/html/gluon_tutorials.zip: $(OBJ) $(DEPS) 36 | cd build; zip -r $(patsubst build/%, %, $@ $(DEPS)) chapter* 37 | 38 | build/_build/html/gluon_tutorials.tar.gz: $(OBJ) $(DEPS) 39 | cd build; tar -zcvf $(patsubst build/%, %, $@ $(DEPS)) chapter* 40 | 41 | build/%: % 42 | @mkdir -p $(@D) 43 | @cp -r $< $@ 44 | 45 | html: $(DEPS) $(OBJ) 46 | make -C build html 47 | 48 | SVG=$(wildcard img/*.svg) 49 | 50 | build/_build/latex/%.png: img/%.svg 51 | convert $< $@ 52 | 53 | pdf: $(DEPS) $(OBJ) $(patsubst img/%.svg, build/_build/latex/%.png, $(SVG)) 54 | make -C build latex 55 | sed -i s/\.svg/\.png/ build/_build/latex/gluon_tutorials.tex 56 | cd build/_build/latex; make 57 | 58 | clean: 59 | rm -rf build/chapter* $(DEPS) $(PKG) 60 | -------------------------------------------------------------------------------- /build/404.md: -------------------------------------------------------------------------------- 1 | # Page not found 2 | -------------------------------------------------------------------------------- /build/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " epub3 to make an epub3" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | @echo " dummy to check syntax errors of document sources" 51 | 52 | .PHONY: clean 53 | clean: 54 | rm -rf $(BUILDDIR)/* 55 | 56 | .PHONY: html 57 | html: 58 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 61 | 62 | .PHONY: dirhtml 63 | dirhtml: 64 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 65 | @echo 66 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 67 | 68 | .PHONY: singlehtml 69 | singlehtml: 70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 71 | @echo 72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 73 | 74 | .PHONY: pickle 75 | pickle: 76 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 77 | @echo 78 | @echo "Build finished; now you can process the pickle files." 79 | 80 | .PHONY: json 81 | json: 82 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 83 | @echo 84 | @echo "Build finished; now you can process the JSON files." 85 | 86 | .PHONY: htmlhelp 87 | htmlhelp: 88 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 89 | @echo 90 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 91 | ".hhp project file in $(BUILDDIR)/htmlhelp." 92 | 93 | .PHONY: qthelp 94 | qthelp: 95 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 96 | @echo 97 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 98 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 99 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/TheStraightDope.qhcp" 100 | @echo "To view the help file:" 101 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/TheStraightDope.qhc" 102 | 103 | .PHONY: applehelp 104 | applehelp: 105 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 106 | @echo 107 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 108 | @echo "N.B. You won't be able to view it unless you put it in" \ 109 | "~/Library/Documentation/Help or install it in your application" \ 110 | "bundle." 111 | 112 | .PHONY: devhelp 113 | devhelp: 114 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 115 | @echo 116 | @echo "Build finished." 117 | @echo "To view the help file:" 118 | @echo "# mkdir -p $$HOME/.local/share/devhelp/TheStraightDope" 119 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/TheStraightDope" 120 | @echo "# devhelp" 121 | 122 | .PHONY: epub 123 | epub: 124 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 125 | @echo 126 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 127 | 128 | .PHONY: epub3 129 | epub3: 130 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 131 | @echo 132 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 133 | 134 | .PHONY: latex 135 | latex: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo 138 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 139 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 140 | "(use \`make latexpdf' here to do that automatically)." 141 | 142 | .PHONY: latexpdf 143 | latexpdf: 144 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 145 | @echo "Running LaTeX files through pdflatex..." 146 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 147 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 148 | 149 | .PHONY: latexpdfja 150 | latexpdfja: 151 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 152 | @echo "Running LaTeX files through platex and dvipdfmx..." 153 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 154 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 155 | 156 | .PHONY: text 157 | text: 158 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 159 | @echo 160 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 161 | 162 | .PHONY: man 163 | man: 164 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 165 | @echo 166 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 167 | 168 | .PHONY: texinfo 169 | texinfo: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo 172 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 173 | @echo "Run \`make' in that directory to run these through makeinfo" \ 174 | "(use \`make info' here to do that automatically)." 175 | 176 | .PHONY: info 177 | info: 178 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 179 | @echo "Running Texinfo files through makeinfo..." 180 | make -C $(BUILDDIR)/texinfo info 181 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 182 | 183 | .PHONY: gettext 184 | gettext: 185 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 186 | @echo 187 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 188 | 189 | .PHONY: changes 190 | changes: 191 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 192 | @echo 193 | @echo "The overview file is in $(BUILDDIR)/changes." 194 | 195 | .PHONY: linkcheck 196 | linkcheck: 197 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 198 | @echo 199 | @echo "Link check complete; look for any errors in the above output " \ 200 | "or in $(BUILDDIR)/linkcheck/output.txt." 201 | 202 | .PHONY: doctest 203 | doctest: 204 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 205 | @echo "Testing of doctests in the sources finished, look at the " \ 206 | "results in $(BUILDDIR)/doctest/output.txt." 207 | 208 | .PHONY: coverage 209 | coverage: 210 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 211 | @echo "Testing of coverage in the sources finished, look at the " \ 212 | "results in $(BUILDDIR)/coverage/python.txt." 213 | 214 | .PHONY: xml 215 | xml: 216 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 217 | @echo 218 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 219 | 220 | .PHONY: pseudoxml 221 | pseudoxml: 222 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 223 | @echo 224 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 225 | 226 | .PHONY: dummy 227 | dummy: 228 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 229 | @echo 230 | @echo "Build finished. Dummy builder generates no files." 231 | -------------------------------------------------------------------------------- /build/_static/gluon.css: -------------------------------------------------------------------------------- 1 | code, .rst-content tt, .rst-content code { 2 | font-size: 85%; 3 | } 4 | 5 | .rst-content img { 6 | display: block; 7 | margin-left: auto; 8 | margin-right: auto; 9 | } 10 | -------------------------------------------------------------------------------- /build/_static/gluon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/build/_static/gluon.png -------------------------------------------------------------------------------- /build/_static/gluon_s2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/build/_static/gluon_s2.png -------------------------------------------------------------------------------- /build/_static/gluon_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/build/_static/gluon_white.png -------------------------------------------------------------------------------- /build/_static/google_analytics.js: -------------------------------------------------------------------------------- 1 | (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ 2 | (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), 3 | m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) 4 | })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); 5 | 6 | ga('create', 'UA-96378503-3', 'auto'); 7 | ga('send', 'pageview'); 8 | -------------------------------------------------------------------------------- /build/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Build and publish all docs into Pulish all notebooks to mxnet. 4 | set -x 5 | set -e 6 | 7 | # prepare the env 8 | conda env update -f build/build.yml -n build_sd_tutorials 9 | conda activate build_sd_tutorials 10 | conda list --export 11 | 12 | make html 13 | 14 | # rm -rf build/data 15 | # make pkg 16 | 17 | # make pdf 18 | # cp build/_build/latex/gluon_tutorials.pdf build/_build/html/ 19 | 20 | aws s3 sync --delete build/_build/html/ s3://gluon.mxnet.io/ --acl public-read 21 | -------------------------------------------------------------------------------- /build/build.yml: -------------------------------------------------------------------------------- 1 | name: build_gluon_tutorials 2 | dependencies: 3 | - python 4 | - libgfortran 5 | - jupyter 6 | - sphinx 7 | - sphinx_rtd_theme 8 | - matplotlib 9 | - pandas 10 | - notebook=5.0.0 11 | - pip: 12 | - pyopenssl>= 17.3.0 13 | - nbsphinx 14 | - recommonmark 15 | - https://github.com/mli/notedown/tarball/master 16 | - mxnet-cu90>=0.11.1b20171003 17 | -------------------------------------------------------------------------------- /build/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # The Straight Dope documentation build configuration file, created by 5 | # sphinx-quickstart on Tue Jul 18 10:40:45 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | from recommonmark.parser import CommonMarkParser 19 | from recommonmark.transform import AutoStructify 20 | 21 | 22 | # If extensions (or modules to document with autodoc) are in another directory, 23 | # add these directories to sys.path here. If the directory is relative to the 24 | # documentation root, use os.path.abspath to make it absolute, like shown here. 25 | #sys.path.insert(0, os.path.abspath('.')) 26 | 27 | # -- General configuration ------------------------------------------------ 28 | 29 | # If your documentation needs a minimal Sphinx version, state it here. 30 | #needs_sphinx = '1.0' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = [ 36 | 'sphinx.ext.autodoc', 37 | 'sphinx.ext.doctest', 38 | 'sphinx.ext.intersphinx', 39 | 'sphinx.ext.todo', 40 | 'sphinx.ext.mathjax', 41 | 'sphinx.ext.ifconfig', 42 | 'sphinx.ext.viewcode', 43 | 'nbsphinx', 44 | 'IPython.sphinxext.ipython_console_highlighting', 45 | ] 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ['_templates'] 49 | 50 | source_parsers = {'.md': CommonMarkParser} 51 | 52 | # The suffix(es) of source filenames. 53 | # You can specify multiple suffix as a list of string: 54 | # source_suffix = ['.rst', '.md'] 55 | source_suffix = ['.rst', '.ipynb', '.md'] 56 | 57 | # The encoding of source files. 58 | #source_encoding = 'utf-8-sig' 59 | 60 | # The master toctree document. 61 | master_doc = 'index' 62 | 63 | # General information about the project. 64 | project = 'The Straight Dope' 65 | copyright = '2017, Contributors' 66 | author = "MXNet Community" 67 | 68 | 69 | # The version info for the project you're documenting, acts as replacement for 70 | # |version| and |release|, also used in various other places throughout the 71 | # built documents. 72 | # 73 | # The short X.Y version. 74 | version = '0.1' 75 | # The full version, including alpha/beta/rc tags. 76 | release = '0.1' 77 | 78 | # The language for content autogenerated by Sphinx. Refer to documentation 79 | # for a list of supported languages. 80 | # 81 | # This is also used if you do content translation via gettext catalogs. 82 | # Usually you set "language" from the command line for these cases. 83 | language = None 84 | 85 | # There are two options for replacing |today|: either, you set today to some 86 | # non-false value, then it is used: 87 | #today = '' 88 | # Else, today_fmt is used as the format for a strftime call. 89 | #today_fmt = '%B %d, %Y' 90 | 91 | # List of patterns, relative to source directory, that match files and 92 | # directories to ignore when looking for source files. 93 | # This patterns also effect to html_static_path and html_extra_path 94 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints'] 95 | 96 | # The reST default role (used for this markup: `text`) to use for all 97 | # documents. 98 | #default_role = None 99 | 100 | # If true, '()' will be appended to :func: etc. cross-reference text. 101 | #add_function_parentheses = True 102 | 103 | # If true, the current module name will be prepended to all description 104 | # unit titles (such as .. function::). 105 | #add_module_names = True 106 | 107 | # If true, sectionauthor and moduleauthor directives will be shown in the 108 | # output. They are ignored by default. 109 | #show_authors = False 110 | 111 | # The name of the Pygments (syntax highlighting) style to use. 112 | pygments_style = 'sphinx' 113 | 114 | # A list of ignored prefixes for module index sorting. 115 | #modindex_common_prefix = [] 116 | 117 | # If true, keep warnings as "system message" paragraphs in the built documents. 118 | #keep_warnings = False 119 | 120 | # If true, `todo` and `todoList` produce output, else they produce nothing. 121 | todo_include_todos = True 122 | 123 | 124 | # -- Options for HTML output ---------------------------------------------- 125 | 126 | # The theme to use for HTML and HTML Help pages. See the documentation for 127 | # a list of builtin themes. 128 | html_theme = 'sphinx_rtd_theme' 129 | 130 | # Theme options are theme-specific and customize the look and feel of a theme 131 | # further. For a list of options available for each theme, see the 132 | # documentation. 133 | #html_theme_options = {} 134 | 135 | # Add any paths that contain custom themes here, relative to this directory. 136 | #html_theme_path = [] 137 | 138 | # The name for this set of Sphinx documents. 139 | # " v documentation" by default. 140 | #html_title = 'The Straight Dope v0.1' 141 | 142 | # A shorter title for the navigation bar. Default is the same as html_title. 143 | #html_short_title = None 144 | 145 | # The name of an image file (relative to this directory) to place at the top 146 | # of the sidebar. 147 | html_logo = '_static/gluon_white.png' 148 | 149 | # The name of an image file (relative to this directory) to use as a favicon of 150 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 151 | # pixels large. 152 | html_favicon = '_static/gluon_s2.png' 153 | 154 | # Add any paths that contain custom static files (such as style sheets) here, 155 | # relative to this directory. They are copied after the builtin static files, 156 | # so a file named "default.css" will overwrite the builtin "default.css". 157 | html_static_path = ['_static'] 158 | 159 | # Add any extra paths that contain custom files (such as robots.txt or 160 | # .htaccess) here, relative to this directory. These files are copied 161 | # directly to the root of the documentation. 162 | #html_extra_path = [] 163 | 164 | # If not None, a 'Last updated on:' timestamp is inserted at every page 165 | # bottom, using the given strftime format. 166 | # The empty string is equivalent to '%b %d, %Y'. 167 | #html_last_updated_fmt = None 168 | 169 | # If true, SmartyPants will be used to convert quotes and dashes to 170 | # typographically correct entities. 171 | #html_use_smartypants = True 172 | 173 | # Custom sidebar templates, maps document names to template names. 174 | #html_sidebars = {} 175 | 176 | # Additional templates that should be rendered to pages, maps page names to 177 | # template names. 178 | #html_additional_pages = {} 179 | 180 | # If false, no module index is generated. 181 | #html_domain_indices = True 182 | 183 | # If false, no index is generated. 184 | #html_use_index = True 185 | 186 | # If true, the index is split into individual pages for each letter. 187 | #html_split_index = False 188 | 189 | # If true, links to the reST sources are added to the pages. 190 | #html_show_sourcelink = True 191 | 192 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 193 | #html_show_sphinx = True 194 | 195 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 196 | #html_show_copyright = True 197 | 198 | # If true, an OpenSearch description file will be output, and all pages will 199 | # contain a tag referring to it. The value of this option must be the 200 | # base URL from which the finished HTML is served. 201 | #html_use_opensearch = '' 202 | 203 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 204 | #html_file_suffix = None 205 | 206 | # Language to be used for generating the HTML full-text search index. 207 | # Sphinx supports the following languages: 208 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 209 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' 210 | # html_search_language = 'zh' 211 | 212 | # A dictionary with options for the search language support, empty by default. 213 | # 'ja' uses this config value. 214 | # 'zh' user can custom change `jieba` dictionary path. 215 | 216 | # The name of a javascript file (relative to the configuration directory) that 217 | # implements a search results scorer. If empty, the default will be used. 218 | #html_search_scorer = 'scorer.js' 219 | 220 | # Output file base name for HTML help builder. 221 | htmlhelp_basename = 'TheStraightDopedoc' 222 | 223 | # -- Options for LaTeX output --------------------------------------------- 224 | 225 | latex_elements = { 226 | # 'papersize' : 'a4paper', 227 | 'utf8extra' : '', 228 | 'inputenc' : '', 229 | 'babel' : r'''\usepackage[english]{babel}''', 230 | 'preamble' : r''' 231 | ''', 232 | # The paper size ('letterpaper' or 'a4paper'). 233 | #'papersize': 'letterpaper', 234 | 235 | # The font size ('10pt', '11pt' or '12pt'). 236 | 'pointsize': '11pt', 237 | 238 | # Additional stuff for the LaTeX preamble. 239 | #'preamble': '', 240 | 241 | # Latex figure (float) alignment 242 | #'figure_align': 'htbp', 243 | } 244 | 245 | # Grouping the document tree into LaTeX files. List of tuples 246 | # (source start file, target name, title, 247 | # author, documentclass [howto, manual, or own class]). 248 | latex_documents = [ 249 | (master_doc, 'gluon_tutorials.tex', 'Deep Learning - The Straight Dope', 250 | author, 'manual'), 251 | ] 252 | 253 | # The name of an image file (relative to this directory) to place at the top of 254 | # the title page. 255 | latex_logo = '_static/gluon.png' 256 | 257 | # latex_engine = 'xelatex' 258 | # For "manual" documents, if this is true, then toplevel headings are parts, 259 | # not chapters. 260 | #latex_use_parts = False 261 | 262 | # If true, show page references after internal links. 263 | #latex_show_pagerefs = False 264 | 265 | # If true, show URL addresses after external links. 266 | #latex_show_urls = False 267 | 268 | # Documents to append as an appendix to all manuals. 269 | #latex_appendices = [] 270 | 271 | # If false, no module index is generated. 272 | latex_domain_indices = False 273 | 274 | 275 | # -- Options for manual page output --------------------------------------- 276 | 277 | # One entry per manual page. List of tuples 278 | # (source start file, name, description, authors, manual section). 279 | man_pages = [ 280 | (master_doc, 'thestraightdope', 'The Straight Dope Documentation', 281 | [author], 1) 282 | ] 283 | 284 | # If true, show URL addresses after external links. 285 | #man_show_urls = False 286 | 287 | 288 | # -- Options for Texinfo output ------------------------------------------- 289 | 290 | # Grouping the document tree into Texinfo files. List of tuples 291 | # (source start file, target name, title, author, 292 | # dir menu entry, description, category) 293 | texinfo_documents = [ 294 | (master_doc, 'TheStraightDope', 'The Straight Dope Documentation', 295 | author, 'TheStraightDope', 'One line description of project.', 296 | 'Miscellaneous'), 297 | ] 298 | 299 | # Documents to append as an appendix to all manuals. 300 | #texinfo_appendices = [] 301 | 302 | # If false, no module index is generated. 303 | #texinfo_domain_indices = True 304 | 305 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 306 | #texinfo_show_urls = 'footnote' 307 | 308 | # If true, do not generate a @detailmenu in the "Top" node's menu. 309 | #texinfo_no_detailmenu = False 310 | 311 | 312 | # Example configuration for intersphinx: refer to the Python standard library. 313 | # intersphinx_mapping = {'https://docs.python.org/': None} 314 | 315 | intersphinx_mapping = { 316 | # 'python': 'https://docs.python.org/3.5', 317 | # 'matplotlib': 'https://matplotlib.org', 318 | # 'numpy': ('http://docs.scipy.org/doc/numpy/', None), 319 | # 'mxnet': ('http://mxnet.io', None) 320 | } 321 | 322 | # notebooks will be executed by sphnix_plugin 323 | nbsphinx_execute = 'never' 324 | 325 | def setup(app): 326 | app.add_transform(AutoStructify) 327 | app.add_config_value('recommonmark_config', { 328 | }, True) 329 | app.add_javascript('google_analytics.js') 330 | app.add_stylesheet('gluon.css') 331 | -------------------------------------------------------------------------------- /build/index.rst: -------------------------------------------------------------------------------- 1 | Deep Learning - The Straight Dope 2 | ================================== 3 | 4 | This repo contains an incremental sequence of notebooks designed to teach deep learning, `Apache MXNet (incubating) `_, and the gluon interface. Our goal is to leverage the strengths of Jupyter notebooks to present prose, graphics, equations, and code together in one place. If we're successful, the result will be a resource that could be simultaneously a book, course material, a prop for live tutorials, and a resource for plagiarising (with our blessing) useful code. To our knowledge there's no source out there that teaches either (1) the full breadth of concepts in modern deep learning or (2) interleaves an engaging textbook with runnable code. We'll find out by the end of this venture whether or not that void exists for a good reason. 5 | 6 | Another unique aspect of this book is its authorship process. We are developing this resource fully in the public view and are making it available for free in its entirety. While the book has a few primary authors to set the tone and shape the content, we welcome contributions from the community and hope to coauthor chapters and entire sections with experts and community members. Already we've received contributions spanning typo corrections through full working examples. 7 | 8 | 9 | How to contribute 10 | ================= 11 | 12 | To clone or contribute, visit `Deep Learning - The Straight Dope `_ on Github. 13 | 14 | Dependencies 15 | ============ 16 | 17 | To run these notebooks, a recent version of MXNet is required. The easiest way is to install the nightly build MXNet through ``pip``. E.g.:: 18 | 19 | $ pip install mxnet --pre --user 20 | 21 | More detailed instructions are available `here `_ 22 | 23 | 24 | Part 1: Deep Learning Fundamentals 25 | ================================== 26 | 27 | .. toctree:: 28 | :glob: 29 | :maxdepth: 1 30 | :caption: Crash course 31 | 32 | chapter01_crashcourse/preface 33 | chapter01_crashcourse/introduction 34 | chapter01_crashcourse/ndarray 35 | chapter01_crashcourse/linear-algebra 36 | chapter01_crashcourse/probability 37 | chapter01_crashcourse/autograd 38 | 39 | 40 | .. toctree:: 41 | :glob: 42 | :maxdepth: 1 43 | :caption: Introduction to supervised learning 44 | 45 | chapter02_supervised-learning/linear-regression-scratch 46 | chapter02_supervised-learning/linear-regression-gluon 47 | chapter02_supervised-learning/logistic-regression-gluon 48 | chapter02_supervised-learning/softmax-regression-scratch 49 | chapter02_supervised-learning/softmax-regression-gluon 50 | chapter02_supervised-learning/regularization-scratch 51 | chapter02_supervised-learning/regularization-gluon 52 | chapter02_supervised-learning/perceptron 53 | chapter02_supervised-learning/environment 54 | 55 | .. toctree:: 56 | :glob: 57 | :maxdepth: 1 58 | :caption: Deep neural networks 59 | 60 | chapter03_deep-neural-networks/mlp-scratch 61 | chapter03_deep-neural-networks/mlp-gluon 62 | chapter03_deep-neural-networks/mlp-dropout-scratch 63 | chapter03_deep-neural-networks/mlp-dropout-gluon 64 | chapter03_deep-neural-networks/plumbing 65 | chapter03_deep-neural-networks/custom-layer 66 | chapter03_deep-neural-networks/serialization 67 | 68 | .. toctree:: 69 | :glob: 70 | :maxdepth: 1 71 | :caption: Convolutional neural networks 72 | 73 | chapter04_convolutional-neural-networks/cnn-scratch 74 | chapter04_convolutional-neural-networks/cnn-gluon 75 | chapter04_convolutional-neural-networks/deep-cnns-alexnet 76 | chapter04_convolutional-neural-networks/very-deep-nets-vgg 77 | chapter04_convolutional-neural-networks/cnn-batch-norm-scratch 78 | chapter04_convolutional-neural-networks/cnn-batch-norm-gluon 79 | 80 | .. toctree:: 81 | :glob: 82 | :maxdepth: 1 83 | :caption: Recurrent neural networks 84 | 85 | chapter05_recurrent-neural-networks/simple-rnn 86 | chapter05_recurrent-neural-networks/lstm-scratch 87 | chapter05_recurrent-neural-networks/gru-scratch 88 | chapter05_recurrent-neural-networks/rnns-gluon 89 | 90 | .. toctree:: 91 | :glob: 92 | :maxdepth: 1 93 | :caption: Optimization 94 | 95 | chapter06_optimization/optimization-intro 96 | chapter06_optimization/gd-sgd-scratch 97 | chapter06_optimization/gd-sgd-gluon 98 | chapter06_optimization/momentum-scratch 99 | chapter06_optimization/momentum-gluon 100 | chapter06_optimization/adagrad-scratch 101 | chapter06_optimization/adagrad-gluon 102 | chapter06_optimization/rmsprop-scratch 103 | chapter06_optimization/rmsprop-gluon 104 | chapter06_optimization/adadelta-scratch 105 | chapter06_optimization/adadelta-gluon 106 | chapter06_optimization/adam-scratch 107 | chapter06_optimization/adam-gluon 108 | 109 | .. toctree:: 110 | :glob: 111 | :maxdepth: 1 112 | :caption: High-performance and distributed training 113 | 114 | chapter07_distributed-learning/hybridize 115 | chapter07_distributed-learning/multiple-gpus-scratch 116 | chapter07_distributed-learning/multiple-gpus-gluon 117 | chapter07_distributed-learning/training-with-multiple-machines 118 | 119 | 120 | 121 | Part 2: Applications 122 | ==================== 123 | 124 | .. toctree:: 125 | :glob: 126 | :maxdepth: 1 127 | :caption: Computer vision 128 | 129 | chapter08_computer-vision/object-detection 130 | chapter08_computer-vision/fine-tuning 131 | chapter08_computer-vision/visual-question-answer 132 | .. toctree:: 133 | :glob: 134 | :maxdepth: 1 135 | :caption: Natural language processing 136 | 137 | chapter09_natural-language-processing/tree-lstm 138 | 139 | .. toctree:: 140 | :glob: 141 | :maxdepth: 1 142 | :caption: Recommender systems 143 | 144 | chapter11_recommender-systems/intro-recommender-systems 145 | 146 | .. toctree:: 147 | :glob: 148 | :maxdepth: 1 149 | :caption: Time series 150 | 151 | chapter12_time-series/lds-scratch 152 | chapter12_time-series/issm-scratch 153 | 154 | Part 3: Advanced Topics 155 | ======================= 156 | 157 | .. toctree:: Unsupervised Learning 158 | :glob: 159 | :maxdepth: 1 160 | :caption: High-performance and distributed training 161 | 162 | chapter13_unsupervised-learning/vae-gluon 163 | 164 | .. toctree:: 165 | :glob: 166 | :maxdepth: 1 167 | :caption: Generative adversarial networks 168 | 169 | chapter14_generative-adversarial-networks/gan-intro 170 | chapter14_generative-adversarial-networks/dcgan 171 | chapter14_generative-adversarial-networks/pixel2pixel 172 | 173 | 174 | .. toctree:: 175 | :glob: 176 | :maxdepth: 1 177 | :caption: Variational methods 178 | 179 | chapter18_variational-methods-and-uncertainty/bayes-by-backprop.ipynb 180 | chapter18_variational-methods-and-uncertainty/bayes-by-backprop-gluon.ipynb 181 | 182 | 183 | .. toctree:: 184 | :glob: 185 | :maxdepth: 1 186 | :caption: Cheat sheets 187 | 188 | cheatsheets/kaggle-gluon-kfold.ipynb 189 | 190 | .. toctree:: 191 | :glob: 192 | :maxdepth: 1 193 | :caption: Developer documents 194 | 195 | docs/* 196 | -------------------------------------------------------------------------------- /build/md2ipynb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | import notedown 5 | import nbformat 6 | 7 | assert len(sys.argv) == 3, 'usage: input.md output.ipynb' 8 | 9 | # timeout for each notebook, in sec 10 | timeout = 20 * 60 11 | 12 | # the files will be ingored for execution 13 | ignore_execution = [] 14 | 15 | input_fn = sys.argv[1] 16 | output_fn = sys.argv[2] 17 | 18 | reader = notedown.MarkdownReader(match='strict') 19 | 20 | do_eval = int(os.environ.get('EVAL', True)) 21 | 22 | # read 23 | with open(input_fn, 'r') as f: 24 | notebook = reader.read(f) 25 | 26 | if do_eval and not any([i in input_fn for i in ignore_execution]): 27 | tic = time.time() 28 | notedown.run(notebook, timeout) 29 | print('=== Finished evaluation in %f sec'%(time.time()-tic)) 30 | 31 | # write 32 | # need to add language info to for syntax highlight 33 | notebook['metadata'].update({'language_info':{'name':'python'}}) 34 | 35 | with open(output_fn, 'w') as f: 36 | f.write(nbformat.writes(notebook)) 37 | -------------------------------------------------------------------------------- /chapter01_crashcourse/autograd.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Automatic differentiation with ``autograd`` \n", 8 | "\n", 9 | "\n", 10 | "In machine learning, we *train* models to get better and better as a function of experience. Usually, *getting better* means minimizing a *loss function*, i.e. a score that answers \"how *bad* is our model?\" With neural networks, we choose loss functions to be differentiable with respect to our parameters. Put simply, this means that for each of the model's parameters, we can determine how much *increasing* or *decreasing* it might affect the loss. While the calculations are straightforward, for complex models, working it out by hand can be a pain.\n", 11 | "\n", 12 | "_MXNet_'s autograd package expedites this work by automatically calculating derivatives. And while most other libraries require that we compile a symbolic graph to take automatic derivatives, ``mxnet.autograd``, like PyTorch, allows you to take derivatives while writing ordinary imperative code. Every time you make pass through your model, ``autograd`` builds a graph on the fly, through which it can immediately backpropagate gradients.\n", 13 | "\n", 14 | "Let's go through it step by step. For this tutorial, we'll only need to import ``mxnet.ndarray``, and ``mxnet.autograd``." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import mxnet as mx\n", 26 | "from mxnet import nd, autograd\n", 27 | "mx.random.seed(1)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Attaching gradients\n", 35 | "\n", 36 | "As a toy example, Let's say that we are interested in differentiating a function ``f = 2 * (x ** 2)`` with respect to parameter x. We can start by assigning an initial value of ``x``." 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "x = nd.array([[1, 2], [3, 4]])" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "Once we compute the gradient of ``f`` with respect to ``x``, we'll need a place to store it. In _MXNet_, we can tell an NDArray that we plan to store a gradient by invoking its ``attach_grad()`` method." 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": { 61 | "collapsed": true 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "x.attach_grad()" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "Now we're going to define the function ``f`` and *MXNet* will generate a computation graph on the fly. It's as if *MXNet* turned on a recording device and captured the exact path by which each variable was generated. \n", 73 | "\n", 74 | "Note that building the computation graph requires a nontrivial amount of computation. So *MXNet* will only build the graph when explicitly told to do so. We can instruct *MXNet* to start recording by placing code inside a ``with autograd.record():`` block." 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 4, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "with autograd.record():\n", 86 | " y = x * 2\n", 87 | " z = y * x" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "Let's backprop by calling ``z.backward()``. When ``z`` has more than one entry, ``z.backward()`` is equivalent to mx.nd.sum(z).backward().\n", 95 | "\n" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 5, 101 | "metadata": { 102 | "collapsed": true 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "z.backward()" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "Now, let's see if this is the expected output. Remember that ``y = x * 2``, and ``z = x * y``, so ``z`` should be equal to ``2 * x * x``. After, doing backprop with ``z.backward()``, we expect to get back gradient dz/dx as follows: dy/dx = ``2``, dz/dx = ``4 * x``. So, if everything went according to plan, ``x.grad`` should consist of an NDArray with the values ``[[4, 8],[12, 16]]``." 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | "\n", 126 | "[[ 4. 8.]\n", 127 | " [ 12. 16.]]\n", 128 | "\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "print(x.grad)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "## Head gradients and the chain rule\n", 141 | "\n", 142 | "*Caution: This part is tricky, but not necessary to understanding subsequent sections.*\n", 143 | "\n", 144 | "Sometimes when we call the backward method on an NDArray, e.g. ``y.backward()``, where ``y`` is a function of ``x`` we are just interested in the derivative of ``y`` with respect to ``x``. Mathematicians write this as $\\frac{dy(x)}{dx}$. At other times, we may be interested in the gradient of ``z`` with respect to ``x``, where ``z`` is a function of ``y``, which in turn, is a function of ``x``. That is, we are interested in $\\frac{d}{dx} z(y(x))$. Recall that by the chain rule $\\frac{d}{dx} z(y(x)) = \\frac{dz(y)}{dy} \\frac{dy(x)}{dx}$. So, when ``y`` is part of a larger function ``z``, and we want ``x.grad`` to store $\\frac{dz}{dx}$, we can pass in the *head gradient* $\\frac{dz}{dy}$ as an input to ``backward()``. The default argument is ``nd.ones_like(y)``. See [Wikipedia](https://en.wikipedia.org/wiki/Chain_rule) for more details." 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 7, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "name": "stdout", 154 | "output_type": "stream", 155 | "text": [ 156 | "\n", 157 | "[[ 40. 8. ]\n", 158 | " [ 1.20000005 0.16 ]]\n", 159 | "\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "with autograd.record():\n", 165 | " y = x * 2\n", 166 | " z = y * x\n", 167 | "\n", 168 | "head_gradient = nd.array([[10, 1.], [.1, .01]])\n", 169 | "z.backward(head_gradient)\n", 170 | "print(x.grad)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "Now that we know the basics, we can do some wild things with autograd, including building differentiable functions using Pythonic control flow." 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 8, 183 | "metadata": { 184 | "collapsed": true 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "a = nd.random_normal(shape=3)\n", 189 | "a.attach_grad()\n", 190 | "\n", 191 | "with autograd.record():\n", 192 | " b = a * 2\n", 193 | " while (nd.norm(b) < 1000).asscalar():\n", 194 | " b = b * 2\n", 195 | "\n", 196 | " if (mx.nd.sum(b) > 0).asscalar():\n", 197 | " c = b\n", 198 | " else:\n", 199 | " c = 100 * b" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 9, 205 | "metadata": { 206 | "collapsed": true 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "head_gradient = nd.array([0.01, 1.0, .1])\n", 211 | "c.backward(head_gradient)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 10, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "name": "stdout", 221 | "output_type": "stream", 222 | "text": [ 223 | "\n", 224 | "[ 2048. 204800. 20480.]\n", 225 | "\n" 226 | ] 227 | } 228 | ], 229 | "source": [ 230 | "print(a.grad)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "## Next\n", 238 | "[Chapter 1 Problem Set](../chapter01_crashcourse/chapter-one-problem-set)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "For whinges or inquiries, [open an issue on GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": { 252 | "collapsed": true 253 | }, 254 | "outputs": [], 255 | "source": [] 256 | } 257 | ], 258 | "metadata": { 259 | "kernelspec": { 260 | "display_name": "Python 3", 261 | "language": "python", 262 | "name": "python3" 263 | }, 264 | "language_info": { 265 | "codemirror_mode": { 266 | "name": "ipython", 267 | "version": 3 268 | }, 269 | "file_extension": ".py", 270 | "mimetype": "text/x-python", 271 | "name": "python", 272 | "nbconvert_exporter": "python", 273 | "pygments_lexer": "ipython3", 274 | "version": "3.6.2" 275 | } 276 | }, 277 | "nbformat": 4, 278 | "nbformat_minor": 2 279 | } 280 | -------------------------------------------------------------------------------- /chapter01_crashcourse/preface.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Preface\n", 8 | "\n", 9 | "If you're a reasonable person, you might ask, \"what is *mxnet-the-straight-dope*?\" You might also ask, \"why does it have such an ostentatious name?\" Speaking to the former question, *mxnet-the-straight-dope* is an attempt to create a new kind of educational resource for deep learning. Our goal is to leverage the strengths of Jupyter notebooks to present prose, graphics, equations, and (importantly) code together in one place. If we're successful, the result will be a resource that could be simultaneously a book, course material, a prop for live tutorials, and a resource for plagiarising (with our blessing) useful code. To our knowledge, few available resources aim to teach either (1) the full breadth of concepts in modern machine learning or (2) interleave an engaging textbook with runnable code. We'll find out by the end of this venture whether or not that void exists for a good reason.\n", 10 | "\n", 11 | "Regarding the name, we are cognizant that the machine learning community and the ecosystem in which we operate have lurched into an absurd place. In the early 2000s, comparatively few tasks in machine learning had been conquered, but we felt that we understood *how* and *why* those models worked (with some caveats). By contrast, today's machine learning systems are extremely powerful and *actually work* for a growing list of tasks, but huge open questions remain as to precisely *why* they are so effective. \n", 12 | "\n", 13 | "This new world offers enormous opportunity, but has also given rise to considerable buffoonery. Research preprints like [the arXiv](http://arxiv.org) are flooded by clickbait, AI startups have sometimes received overly optimistic valuations, and the blogosphere is flooded with thought leadership pieces written by marketers bereft of any technical knowledge. Amid the chaos, easy money, and lax standards, we believe it's important not to take our models or the environment in which they are worshipped too seriously. Also, in order to both explain, visualize, and code the full breadth of models that we aim to address, it's important that the authors do not get bored while writing. \n", 14 | "\n", 15 | "## Organization\n", 16 | "\n", 17 | "At present, we're aiming for the following format: aside from a few (optional) notebooks providing a crash course in the basic mathematical background, each subsequent notebook will both:\n", 18 | "\n", 19 | "1. Introduce a reasonable number (perhaps one) of new concepts\n", 20 | "2. Provide a single self-contained working example, using a real dataset\n", 21 | "\n", 22 | "This presents an organizational challenge. Some models might logically be grouped together in a single notebook. \n", 23 | "And some ideas might be best taught by executing several models in succession. \n", 24 | "On the other hand, there's a big advantage to adhering to a policy of *1 working example, 1 notebook*:\n", 25 | "This makes it as easy as possible for you to start your own research projects \n", 26 | "by plagiarising our code. Just copy a single notebook and start modifying it.\n", 27 | "\n", 28 | "We will interleave the runnable code with background material as needed. \n", 29 | "In general, we will often err on the side of making tools available before explaining them fully \n", 30 | "(and we will follow up by explaining the background later). \n", 31 | "For instance, we might use *stochastic gradient descent* \n", 32 | "before fully explaining why it is useful or why it works. \n", 33 | "This helps to give practitioners the necessary ammunition to solve problems quickly, \n", 34 | "at the expense of requiring the reader to trust us with some decisions, at least in the short term. \n", 35 | "Throughout, we'll be working with the MXNet library, \n", 36 | "which has the rare property of being flexible enough for research \n", 37 | "while being fast enough for production. \n", 38 | "Our more advanced chapters will mostly rely \n", 39 | "on MXNet's new high-level imperative interface ``gluon``. \n", 40 | "Note that this is not the same as ``mxnet.module``, \n", 41 | "an older, symbolic interface supported by MXNet. \n", 42 | "\n", 43 | "This book will teach deep learning concepts from scratch. \n", 44 | "Sometimes, we'll want to delve into fine details about the models \n", 45 | "that are hidden from the user by ``gluon``'s advanced features. \n", 46 | "This comes up especially in the basic tutorials, \n", 47 | "where we'll want you to understand everything that happens in a given layer. \n", 48 | "In these cases, we'll generally present two versions of the example: \n", 49 | "one where we implement everything from scratch, \n", 50 | "relying only on NDArray and automatic differentiation, \n", 51 | "and another where we show how to do things succinctly with ``gluon``. \n", 52 | "Once we've taught you how a layer works, \n", 53 | "we can just use the ``gluon`` version in subsequent tutorials.\n", 54 | "\n", 55 | "## Learning by doing\n", 56 | "\n", 57 | "Many textbooks teach a series of topics, each in exhaustive detail. For example, Chris Bishop's excellent textbook, [Pattern Recognition and Machine Learning](https://www.amazon.com/Pattern-Recognition-Learning-Information-Statistics/dp/0387310738), teaches each topic so thoroughly, that getting to the chapter on linear regression requires a non-trivial amount of work. When I (Zack) was first learning machine learning, this actually limited the book's usefulness as an introductory text. When I rediscovered it a couple years later, I loved it precisely for its thoroughness, and I hope you check it out after working through this material! But perhaps the traditional textbook aproach is not the easiest way to get started in the first place. \n", 58 | "\n", 59 | "Instead, in this book, we'll teach most concepts just in time. For \n", 60 | "the fundamental preliminaries like linear algebra and probability, \n", 61 | "we'll provide a brief crash course from the outset, \n", 62 | "but we want you to taste the satisfaction of training your first model \n", 63 | "before worrying about exotic probability distributions. \n", 64 | "\n", 65 | "## Next steps\n", 66 | "\n", 67 | "If you're ready to get started, head over to [the introduction](../chapter01_crashcourse/introduction.ipynb) or go straight to [our basic primer on NDArray](./ndarray.ipynb), MXNet's workhorse data structure.\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "For whinges or inquiries, [open an issue on GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)" 75 | ] 76 | } 77 | ], 78 | "metadata": { 79 | "kernelspec": { 80 | "display_name": "Python 3", 81 | "language": "python", 82 | "name": "python3" 83 | }, 84 | "language_info": { 85 | "codemirror_mode": { 86 | "name": "ipython", 87 | "version": 3 88 | }, 89 | "file_extension": ".py", 90 | "mimetype": "text/x-python", 91 | "name": "python", 92 | "nbconvert_exporter": "python", 93 | "pygments_lexer": "ipython3", 94 | "version": "3.4.3" 95 | } 96 | }, 97 | "nbformat": 4, 98 | "nbformat_minor": 2 99 | } 100 | -------------------------------------------------------------------------------- /chapter02_supervised-learning/regularization-gluon.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Overfitting and regularization (with ``gluon``)\n", 8 | "\n", 9 | "Now that we've built a [regularized logistic regression model from scratch](regularization-scratch.html), let's make this more efficient with ``gluon``. We recommend that you read that section for a description as to why regularization is a good idea. As always, we begin by loading libraries and some data.\n", 10 | "\n", 11 | "[**REFINED DRAFT - RELEASE STAGE: CATFOOD**]" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "from __future__ import print_function\n", 23 | "import mxnet as mx\n", 24 | "from mxnet import autograd\n", 25 | "from mxnet import gluon\n", 26 | "import mxnet.ndarray as nd\n", 27 | "import numpy as np\n", 28 | "ctx = mx.cpu()\n", 29 | "\n", 30 | "# for plotting purposes\n", 31 | "%matplotlib inline\n", 32 | "import matplotlib\n", 33 | "import matplotlib.pyplot as plt" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## The MNIST Dataset" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "mnist = mx.test_utils.get_mnist()\n", 52 | "num_examples = 1000\n", 53 | "batch_size = 64\n", 54 | "train_data = mx.gluon.data.DataLoader(\n", 55 | " mx.gluon.data.ArrayDataset(mnist[\"train_data\"][:num_examples],\n", 56 | " mnist[\"train_label\"][:num_examples].astype(np.float32)), \n", 57 | " batch_size, shuffle=True)\n", 58 | "test_data = mx.gluon.data.DataLoader(\n", 59 | " mx.gluon.data.ArrayDataset(mnist[\"test_data\"][:num_examples],\n", 60 | " mnist[\"test_label\"][:num_examples].astype(np.float32)), \n", 61 | " batch_size, shuffle=False)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## Multiclass Logistic Regression" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "net = gluon.nn.Sequential()\n", 80 | "with net.name_scope():\n", 81 | " net.add(gluon.nn.Dense(10))" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## Parameter initialization\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": true 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "## Softmax Cross Entropy Loss" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": true 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "loss = gluon.loss.SoftmaxCrossEntropyLoss()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## Optimizer\n", 125 | "\n", 126 | "By default ``gluon`` tries to keep the coefficients from diverging by using a *weight decay* penalty. So, to get the real overfitting experience we need to switch it off. We do this by passing `'wd': 0.0'` when we instantiate the trainer. " 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "collapsed": true 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01, 'wd': 0.0})" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "## Evaluation Metric" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": true 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "def evaluate_accuracy(data_iterator, net, loss_fun):\n", 156 | " acc = mx.metric.Accuracy()\n", 157 | " loss_avg = 0.\n", 158 | " for i, (data, label) in enumerate(data_iterator):\n", 159 | " data = data.as_in_context(ctx).reshape((-1,784))\n", 160 | " label = label.as_in_context(ctx)\n", 161 | " output = net(data)\n", 162 | " loss = loss_fun(output, label) \n", 163 | " predictions = nd.argmax(output, axis=1)\n", 164 | " acc.update(preds=predictions, labels=label)\n", 165 | " loss_avg = loss_avg*i/(i+1) + nd.mean(loss).asscalar()/(i+1)\n", 166 | " return acc.get()[1], loss_avg\n", 167 | "\n", 168 | "def plot_learningcurves(loss_tr,loss_ts, acc_tr,acc_ts):\n", 169 | " xs = list(range(len(loss_tr)))\n", 170 | " \n", 171 | " f = plt.figure(figsize=(12,6))\n", 172 | " fg1 = f.add_subplot(121)\n", 173 | " fg2 = f.add_subplot(122)\n", 174 | " \n", 175 | " fg1.set_xlabel('epoch',fontsize=14)\n", 176 | " fg1.set_title('Comparing loss functions')\n", 177 | " fg1.semilogy(xs, loss_tr)\n", 178 | " fg1.semilogy(xs, loss_ts)\n", 179 | " fg1.grid(True,which=\"both\")\n", 180 | "\n", 181 | " fg1.legend(['training loss', 'testing loss'],fontsize=14)\n", 182 | " \n", 183 | " fg2.set_title('Comparing accuracy')\n", 184 | " fg1.set_xlabel('epoch',fontsize=14)\n", 185 | " fg2.plot(xs, acc_tr)\n", 186 | " fg2.plot(xs, acc_ts)\n", 187 | " fg2.grid(True,which=\"both\")\n", 188 | " fg2.legend(['training accuracy', 'testing accuracy'],fontsize=14)\n", 189 | " plt.show()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "## Execute training loop" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": { 203 | "collapsed": true 204 | }, 205 | "outputs": [], 206 | "source": [ 207 | "epochs = 700\n", 208 | "moving_loss = 0.\n", 209 | "niter=0\n", 210 | "\n", 211 | "loss_seq_train = []\n", 212 | "loss_seq_test = []\n", 213 | "acc_seq_train = []\n", 214 | "acc_seq_test = []\n", 215 | "\n", 216 | "for e in range(epochs):\n", 217 | " for i, (data, label) in enumerate(train_data):\n", 218 | " data = data.as_in_context(ctx).reshape((-1,784))\n", 219 | " label = label.as_in_context(ctx)\n", 220 | " with autograd.record():\n", 221 | " output = net(data)\n", 222 | " cross_entropy = loss(output, label)\n", 223 | " cross_entropy.backward()\n", 224 | " trainer.step(data.shape[0])\n", 225 | " \n", 226 | " ##########################\n", 227 | " # Keep a moving average of the losses\n", 228 | " ##########################\n", 229 | " niter +=1\n", 230 | " moving_loss = .99 * moving_loss + .01 * nd.mean(cross_entropy).asscalar()\n", 231 | " est_loss = moving_loss/(1-0.99**niter)\n", 232 | " \n", 233 | " test_accuracy, test_loss = evaluate_accuracy(test_data, net, loss)\n", 234 | " train_accuracy, train_loss = evaluate_accuracy(train_data, net, loss)\n", 235 | " \n", 236 | " # save them for later\n", 237 | " loss_seq_train.append(train_loss)\n", 238 | " loss_seq_test.append(test_loss)\n", 239 | " acc_seq_train.append(train_accuracy)\n", 240 | " acc_seq_test.append(test_accuracy)\n", 241 | " \n", 242 | " \n", 243 | " if e % 20 == 0:\n", 244 | " print(\"Completed epoch %s. Train Loss: %s, Test Loss %s, Train_acc %s, Test_acc %s\" % \n", 245 | " (e+1, train_loss, test_loss, train_accuracy, test_accuracy)) \n", 246 | "\n", 247 | "## Plotting the learning curves\n", 248 | "plot_learningcurves(loss_seq_train,loss_seq_test,acc_seq_train,acc_seq_test)" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "## Regularization\n", 256 | "\n", 257 | "Now let's see what this mysterious *weight decay* is all about. We begin with a bit of math. When we add an L2 penalty to the weights we are effectively adding $\\frac{\\lambda}{2} \\|w\\|^2$ to the loss. Hence, every time we compute the gradient it gets an additional $\\lambda w$ term that is added to $g_t$, since this is the very derivative of the L2 penalty. As a result we end up taking a descent step not in the direction $-\\eta g_t$ but rather in the direction $-\\eta (g_t + \\lambda w)$. This effectively shrinks $w$ at each step by $\\eta \\lambda w$, thus the name weight decay. To make this work in practice we just need to set the weight decay to something nonzero." 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": { 264 | "collapsed": true 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx, force_reinit=True)\n", 269 | "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01, 'wd': 0.001})\n", 270 | "\n", 271 | "moving_loss = 0.\n", 272 | "niter=0\n", 273 | "loss_seq_train = []\n", 274 | "loss_seq_test = []\n", 275 | "acc_seq_train = []\n", 276 | "acc_seq_test = []\n", 277 | "\n", 278 | "for e in range(epochs):\n", 279 | " for i, (data, label) in enumerate(train_data):\n", 280 | " data = data.as_in_context(ctx).reshape((-1,784))\n", 281 | " label = label.as_in_context(ctx)\n", 282 | " with autograd.record():\n", 283 | " output = net(data)\n", 284 | " cross_entropy = loss(output, label)\n", 285 | " cross_entropy.backward()\n", 286 | " trainer.step(data.shape[0])\n", 287 | " \n", 288 | " ##########################\n", 289 | " # Keep a moving average of the losses\n", 290 | " ##########################\n", 291 | " niter +=1\n", 292 | " moving_loss = .99 * moving_loss + .01 * nd.mean(cross_entropy).asscalar()\n", 293 | " est_loss = moving_loss/(1-0.99**niter)\n", 294 | " \n", 295 | " test_accuracy, test_loss = evaluate_accuracy(test_data, net,loss)\n", 296 | " train_accuracy, train_loss = evaluate_accuracy(train_data, net, loss)\n", 297 | " \n", 298 | " # save them for later\n", 299 | " loss_seq_train.append(train_loss)\n", 300 | " loss_seq_test.append(test_loss)\n", 301 | " acc_seq_train.append(train_accuracy)\n", 302 | " acc_seq_test.append(test_accuracy)\n", 303 | " \n", 304 | " if e % 20 == 0:\n", 305 | " print(\"Completed epoch %s. Train Loss: %s, Test Loss %s, Train_acc %s, Test_acc %s\" % \n", 306 | " (e+1, train_loss, test_loss, train_accuracy, test_accuracy)) \n", 307 | " \n", 308 | "## Plotting the learning curves\n", 309 | "plot_learningcurves(loss_seq_train,loss_seq_test,acc_seq_train,acc_seq_test)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "As we can see, the test accuracy improves a bit. Note that the amount by which it improves actually depends on the amount of weight decay. We recommend that you try and experiment with different extents of weight decay. For instance, a larger weight decay (e.g. $0.01$) will lead to inferior performance, one that's larger still ($0.1$) will lead to terrible results. This is one of the reasons why tuning parameters is quite so important in getting good experimental results in practice." 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "## Next\n", 324 | "[Learning environments](../chapter02_supervised-learning/environment.ipynb)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": { 330 | "collapsed": true 331 | }, 332 | "source": [ 333 | "For whinges or inquiries, [open an issue on GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)" 334 | ] 335 | } 336 | ], 337 | "metadata": { 338 | "kernelspec": { 339 | "display_name": "Python 3", 340 | "language": "python", 341 | "name": "python3" 342 | }, 343 | "language_info": { 344 | "codemirror_mode": { 345 | "name": "ipython", 346 | "version": 3 347 | }, 348 | "file_extension": ".py", 349 | "mimetype": "text/x-python", 350 | "name": "python", 351 | "nbconvert_exporter": "python", 352 | "pygments_lexer": "ipython3", 353 | "version": "3.6.2" 354 | } 355 | }, 356 | "nbformat": 4, 357 | "nbformat_minor": 2 358 | } 359 | -------------------------------------------------------------------------------- /chapter03_deep-neural-networks/serialization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Serialization - saving, loading and checkpointing\n", 8 | "\n", 9 | "At this point we've already covered quite a lot of ground. \n", 10 | "We know how to manipulate data and labels.\n", 11 | "We know how to construct flexible models capable of expressing plausible hypotheses.\n", 12 | "We know how to fit those models to our dataset.\n", 13 | "We know of loss functions to use for classification and for regression,\n", 14 | "and we know how to minimize those losses with respect to our models' parameters. \n", 15 | "We even know how to write our own neural network layers in ``gluon``.\n", 16 | "\n", 17 | "But even with all this knowledge, we're not ready to build a real machine learning system.\n", 18 | "That's because we haven't yet covered how to save and load models. \n", 19 | "In reality, we often train a model on one device\n", 20 | "and then want to run it to make predictions on many devices simultaneously.\n", 21 | "In order for our models to persist beyond the execution of a single Python script, \n", 22 | "we need mechanisms to save and load NDArrays, ``gluon`` Parameters, and models themselves. " 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "from __future__ import print_function\n", 32 | "import mxnet as mx\n", 33 | "from mxnet import nd, autograd\n", 34 | "from mxnet import gluon\n", 35 | "ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Saving and loading NDArrays\n", 43 | "\n", 44 | "To start, let's show how you can save and load a list of NDArrays for future use. Note that while it's possible to use a general Python serialization package like ``Pickle``, it's not optimized for use with NDArrays and will be unnecessarily slow. We prefer to use ``ndarray.save`` and ``ndarray.load``. " 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "X = nd.ones((100, 100))\n", 54 | "Y = nd.zeros((100, 100))\n", 55 | "import os\n", 56 | "\n", 57 | "dir_name = 'checkpoints'\n", 58 | "if not os.path.exists(dir_name):\n", 59 | " os.makedirs(dir_name)\n", 60 | "\n", 61 | "filename = os.path.join(dir_name, \"test1.params\")\n", 62 | "nd.save(filename, [X, Y])" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "It's just as easy to load a saved NDArray." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "A, B = nd.load(filename)\n", 79 | "print(A)\n", 80 | "print(B)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "We can also save a dictionary where the keys are strings and the values are NDArrays." 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "mydict = {\"X\": X, \"Y\": Y}\n", 97 | "filename = os.path.join(dir_name, \"test2.params\")\n", 98 | "nd.save(filename, mydict)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "C = nd.load(filename)\n", 108 | "print(C)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "## Saving and loading the parameters of ``gluon`` models\n", 116 | "\n", 117 | "Recall from [our first look at the plumbing behind ``gluon`` blocks](P03.5-C01-plumbing.ipynb]) \n", 118 | "that ``gluon`` wraps the NDArrays corresponding to model parameters in ``Parameter`` objects. \n", 119 | "We'll often want to store and load an entire model's parameters without \n", 120 | "having to individually extract or load the NDarrays from the Parameters via ParameterDicts in each block.\n", 121 | "\n", 122 | "Fortunately, ``gluon`` blocks make our lives very easy by providing a ``.save_parameters()`` and ``.load_parameters()`` methods. To see them in work, let's just spin up a simple MLP." 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "num_hidden = 256\n", 132 | "num_outputs = 1\n", 133 | "net = gluon.nn.Sequential()\n", 134 | "with net.name_scope():\n", 135 | " net.add(gluon.nn.Dense(num_hidden, activation=\"relu\"))\n", 136 | " net.add(gluon.nn.Dense(num_hidden, activation=\"relu\"))\n", 137 | " net.add(gluon.nn.Dense(num_outputs))" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "Now, let's initialize the parameters by attaching an initializer and actually passing in a datapoint to induce shape inference." 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "net.collect_params().initialize(mx.init.Normal(sigma=1.), ctx=ctx)\n", 154 | "net(nd.ones((1, 100), ctx=ctx))" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "So this randomly initialized model maps a 100-dimensional vector of all ones to the number 362.53 (that's the number on my machine--your mileage may vary).\n", 162 | "Let's save the parameters, instantiate a new network, load them in and make sure that we get the same result." 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "filename = os.path.join(dir_name, \"testnet.params\")\n", 172 | "net.save_parameters(filename)\n", 173 | "net2 = gluon.nn.Sequential()\n", 174 | "with net2.name_scope():\n", 175 | " net2.add(gluon.nn.Dense(num_hidden, activation=\"relu\"))\n", 176 | " net2.add(gluon.nn.Dense(num_hidden, activation=\"relu\"))\n", 177 | " net2.add(gluon.nn.Dense(num_outputs))\n", 178 | "net2.load_parameters(filename, ctx=ctx)\n", 179 | "net2(nd.ones((1, 100), ctx=ctx))" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "Great! Now we're ready to save our work. \n", 187 | "The practice of saving models is sometimes called *checkpointing*\n", 188 | "and it's especially important for a number of reasons.\n", 189 | "1. We can preserve and syndicate models that are trained once.\n", 190 | "2. Some models perform best (as determined on validation data) at some epoch in the middle of training. If we checkpoint the model after each epoch, we can later select the best epoch.\n", 191 | "3. We might want to ask questions about our trained model that we didn't think of when we first wrote the scripts for our experiments. Having the parameters lying around allows us to examine our past work without having to train from scratch.\n", 192 | "4. Sometimes people might want to run our models who don't know how to execute training themselves or can't access a suitable dataset for training. Checkpointing gives us a way to share our work with others." 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "## Next\n", 209 | "[Convolutional neural networks from scratch](../chapter04_convolutional-neural-networks/cnn-scratch.ipynb)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": { 215 | "collapsed": true 216 | }, 217 | "source": [ 218 | "For whinges or inquiries, [open an issue on GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)" 219 | ] 220 | } 221 | ], 222 | "metadata": { 223 | "kernelspec": { 224 | "display_name": "Python 3", 225 | "language": "python", 226 | "name": "python3" 227 | }, 228 | "language_info": { 229 | "codemirror_mode": { 230 | "name": "ipython", 231 | "version": 3 232 | }, 233 | "file_extension": ".py", 234 | "mimetype": "text/x-python", 235 | "name": "python", 236 | "nbconvert_exporter": "python", 237 | "pygments_lexer": "ipython3", 238 | "version": "3.6.5" 239 | } 240 | }, 241 | "nbformat": 4, 242 | "nbformat_minor": 2 243 | } 244 | -------------------------------------------------------------------------------- /chapter04_convolutional-neural-networks/cnn-batch-norm-gluon.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Batch Normalization in `gluon`\n", 8 | "\n", 9 | "In the preceding section, [we implemented batch normalization ourselves](../chapter04_convolutional-neural-networks/cnn-batch-norm-scratch.ipynb) using NDArray and autograd.\n", 10 | "As with most commonly used neural network layers,\n", 11 | "Gluon has batch normalization predefined,\n", 12 | "so this section is going to be straightforward." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "ExecuteTime": { 20 | "end_time": "2017-10-18T03:21:49.174951Z", 21 | "start_time": "2017-10-18T03:21:48.205450Z" 22 | } 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "from __future__ import print_function\n", 27 | "import mxnet as mx\n", 28 | "from mxnet import nd, autograd\n", 29 | "from mxnet import gluon\n", 30 | "import numpy as np\n", 31 | "mx.random.seed(1)\n", 32 | "ctx = mx.cpu()" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## The MNIST dataset" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "ExecuteTime": { 47 | "end_time": "2017-10-18T03:21:50.220488Z", 48 | "start_time": "2017-10-18T03:21:49.176860Z" 49 | } 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "batch_size = 64\n", 54 | "num_inputs = 784\n", 55 | "num_outputs = 10\n", 56 | "def transform(data, label):\n", 57 | " return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)\n", 58 | "train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),\n", 59 | " batch_size, shuffle=True)\n", 60 | "test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),\n", 61 | " batch_size, shuffle=False)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## Define a CNN with Batch Normalization\n", 69 | "\n", 70 | "To add batchnormalization to a ``gluon`` model defined with Sequential,\n", 71 | "we only need to add a few lines. \n", 72 | "Specifically, we just insert `BatchNorm` layers before the applying the ReLU activations." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "ExecuteTime": { 80 | "end_time": "2017-10-18T03:21:50.292271Z", 81 | "start_time": "2017-10-18T03:21:50.222527Z" 82 | } 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "num_fc = 512\n", 87 | "net = gluon.nn.Sequential()\n", 88 | "with net.name_scope():\n", 89 | " net.add(gluon.nn.Conv2D(channels=20, kernel_size=5))\n", 90 | " net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True))\n", 91 | " net.add(gluon.nn.Activation(activation='relu'))\n", 92 | " net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n", 93 | " \n", 94 | " net.add(gluon.nn.Conv2D(channels=50, kernel_size=5))\n", 95 | " net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True))\n", 96 | " net.add(gluon.nn.Activation(activation='relu'))\n", 97 | " net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n", 98 | " \n", 99 | " # The Flatten layer collapses all axis, except the first one, into one axis.\n", 100 | " net.add(gluon.nn.Flatten())\n", 101 | " \n", 102 | " net.add(gluon.nn.Dense(num_fc))\n", 103 | " net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True))\n", 104 | " net.add(gluon.nn.Activation(activation='relu'))\n", 105 | " \n", 106 | " net.add(gluon.nn.Dense(num_outputs))" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "## Parameter initialization\n" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "ExecuteTime": { 121 | "end_time": "2017-10-18T03:21:50.311368Z", 122 | "start_time": "2017-10-18T03:21:50.296296Z" 123 | } 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "## Softmax cross-entropy Loss" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "ExecuteTime": { 142 | "end_time": "2017-10-18T03:21:50.335025Z", 143 | "start_time": "2017-10-18T03:21:50.322603Z" 144 | } 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "## Optimizer" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "ExecuteTime": { 163 | "end_time": "2017-10-18T03:21:50.350590Z", 164 | "start_time": "2017-10-18T03:21:50.339939Z" 165 | } 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## Write evaluation loop to calculate accuracy" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "ExecuteTime": { 184 | "start_time": "2017-10-18T03:21:48.241Z" 185 | } 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "def evaluate_accuracy(data_iterator, net):\n", 190 | " acc = mx.metric.Accuracy()\n", 191 | " for i, (data, label) in enumerate(data_iterator):\n", 192 | " data = data.as_in_context(ctx)\n", 193 | " label = label.as_in_context(ctx)\n", 194 | " output = net(data)\n", 195 | " predictions = nd.argmax(output, axis=1)\n", 196 | " acc.update(preds=predictions, labels=label)\n", 197 | " return acc.get()[1]" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "## Training Loop" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "ExecuteTime": { 212 | "start_time": "2017-10-18T03:21:48.244Z" 213 | } 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "epochs = 1\n", 218 | "smoothing_constant = .01\n", 219 | "\n", 220 | "for e in range(epochs):\n", 221 | " for i, (data, label) in enumerate(train_data):\n", 222 | " data = data.as_in_context(ctx)\n", 223 | " label = label.as_in_context(ctx)\n", 224 | " with autograd.record():\n", 225 | " output = net(data)\n", 226 | " loss = softmax_cross_entropy(output, label)\n", 227 | " loss.backward()\n", 228 | " trainer.step(data.shape[0])\n", 229 | " \n", 230 | " ##########################\n", 231 | " # Keep a moving average of the losses\n", 232 | " ##########################\n", 233 | " curr_loss = nd.mean(loss).asscalar()\n", 234 | " moving_loss = (curr_loss if ((i == 0) and (e == 0)) \n", 235 | " else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)\n", 236 | " \n", 237 | " test_accuracy = evaluate_accuracy(test_data, net)\n", 238 | " train_accuracy = evaluate_accuracy(train_data, net)\n", 239 | " print(\"Epoch %s. Loss: %s, Train_acc %s, Test_acc %s\" % (e, moving_loss, train_accuracy, test_accuracy)) " 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "## Next\n", 247 | "[Introduction to recurrent neural networks](../chapter05_recurrent-neural-networks/simple-rnn.ipynb)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "For whinges or inquiries, [open an issue on GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)" 255 | ] 256 | } 257 | ], 258 | "metadata": { 259 | "kernelspec": { 260 | "display_name": "Python 3", 261 | "language": "python", 262 | "name": "python3" 263 | }, 264 | "language_info": { 265 | "codemirror_mode": { 266 | "name": "ipython", 267 | "version": 3 268 | }, 269 | "file_extension": ".py", 270 | "mimetype": "text/x-python", 271 | "name": "python", 272 | "nbconvert_exporter": "python", 273 | "pygments_lexer": "ipython3", 274 | "version": "3.6.1" 275 | } 276 | }, 277 | "nbformat": 4, 278 | "nbformat_minor": 2 279 | } 280 | -------------------------------------------------------------------------------- /chapter04_convolutional-neural-networks/cnn-gluon.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Convolutional Neural Networks in ``gluon``\n", 8 | "\n", 9 | "Now let's see how succinctly we can express a convolutional neural network using ``gluon``. You might be relieved to find out that this too requires hardly any more code than logistic regression. " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "from __future__ import print_function\n", 21 | "import numpy as np\n", 22 | "import mxnet as mx\n", 23 | "from mxnet import nd, autograd, gluon\n", 24 | "mx.random.seed(1)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## Set the context" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# ctx = mx.gpu()\n", 43 | "ctx = mx.cpu()" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Grab the MNIST dataset" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "collapsed": true 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "batch_size = 64\n", 62 | "num_inputs = 784\n", 63 | "num_outputs = 10\n", 64 | "def transform(data, label):\n", 65 | " return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)\n", 66 | "train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True, transform=transform),\n", 67 | " batch_size, shuffle=True)\n", 68 | "test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False, transform=transform),\n", 69 | " batch_size, shuffle=False)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## Define a convolutional neural network\n", 77 | "\n", 78 | "Again, a few lines here is all we need in order to change the model. Let's add a couple of convolutional layers using ``gluon.nn``." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "collapsed": true 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "num_fc = 512\n", 90 | "net = gluon.nn.Sequential()\n", 91 | "with net.name_scope():\n", 92 | " net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu'))\n", 93 | " net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2)) \n", 94 | " net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu'))\n", 95 | " net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n", 96 | " # The Flatten layer collapses all axis, except the first one, into one axis.\n", 97 | " net.add(gluon.nn.Flatten())\n", 98 | " net.add(gluon.nn.Dense(num_fc, activation=\"relu\"))\n", 99 | " net.add(gluon.nn.Dense(num_outputs))" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "## Parameter initialization\n" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": true 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## Softmax cross-entropy Loss" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "## Optimizer" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "collapsed": true 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "## Write evaluation loop to calculate accuracy" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "def evaluate_accuracy(data_iterator, net):\n", 172 | " acc = mx.metric.Accuracy()\n", 173 | " for i, (data, label) in enumerate(data_iterator):\n", 174 | " data = data.as_in_context(ctx)\n", 175 | " label = label.as_in_context(ctx)\n", 176 | " output = net(data)\n", 177 | " predictions = nd.argmax(output, axis=1)\n", 178 | " acc.update(preds=predictions, labels=label)\n", 179 | " return acc.get()[1]" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "## Training Loop" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "epochs = 1\n", 196 | "smoothing_constant = .01\n", 197 | "\n", 198 | "for e in range(epochs):\n", 199 | " for i, (data, label) in enumerate(train_data):\n", 200 | " data = data.as_in_context(ctx)\n", 201 | " label = label.as_in_context(ctx)\n", 202 | " with autograd.record():\n", 203 | " output = net(data)\n", 204 | " loss = softmax_cross_entropy(output, label)\n", 205 | " loss.backward()\n", 206 | " trainer.step(data.shape[0])\n", 207 | " \n", 208 | " ##########################\n", 209 | " # Keep a moving average of the losses\n", 210 | " ##########################\n", 211 | " curr_loss = nd.mean(loss).asscalar()\n", 212 | " moving_loss = (curr_loss if ((i == 0) and (e == 0)) \n", 213 | " else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)\n", 214 | " \n", 215 | " test_accuracy = evaluate_accuracy(test_data, net)\n", 216 | " train_accuracy = evaluate_accuracy(train_data, net)\n", 217 | " print(\"Epoch %s. Loss: %s, Train_acc %s, Test_acc %s\" % (e, moving_loss, train_accuracy, test_accuracy)) " 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "## Conclusion\n", 225 | "\n", 226 | "You might notice that by using ``gluon``, we get code that runs much faster whether on CPU or GPU. That's largely because ``gluon`` can call down to highly optimized layers that have been written in C++. " 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "## Next\n", 234 | "[Deep convolutional networks (AlexNet)](../chapter04_convolutional-neural-networks/deep-cnns-alexnet.ipynb)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "For whinges or inquiries, [open an issue on GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)" 242 | ] 243 | } 244 | ], 245 | "metadata": { 246 | "kernelspec": { 247 | "display_name": "Python 3", 248 | "language": "python", 249 | "name": "python3" 250 | }, 251 | "language_info": { 252 | "codemirror_mode": { 253 | "name": "ipython", 254 | "version": 3 255 | }, 256 | "file_extension": ".py", 257 | "mimetype": "text/x-python", 258 | "name": "python", 259 | "nbconvert_exporter": "python", 260 | "pygments_lexer": "ipython3", 261 | "version": "3.6.2" 262 | } 263 | }, 264 | "nbformat": 4, 265 | "nbformat_minor": 2 266 | } 267 | -------------------------------------------------------------------------------- /chapter04_convolutional-neural-networks/very-deep-nets-vgg.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Very deep networks with repeating elements\n", 8 | "\n", 9 | "As we already noticed in AlexNet, the number of layers in networks keeps on increasing. This means that it becomes extremely tedious to write code that piles on one layer after the other manually. Fortunately, programming languages have a wonderful fix for this: subroutines and loops. This way we can express networks as *code*. Just like we would use a for loop to count from 1 to 10, we'll use code to combine layers. The first network that had this structure was VGG. " 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## VGG\n", 17 | "\n", 18 | "We begin with the usual import ritual" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "ExecuteTime": { 26 | "end_time": "2017-10-18T06:00:51.744769Z", 27 | "start_time": "2017-10-18T06:00:51.019959Z" 28 | } 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "from __future__ import print_function\n", 33 | "import mxnet as mx\n", 34 | "from mxnet import nd, autograd\n", 35 | "from mxnet import gluon\n", 36 | "import numpy as np\n", 37 | "mx.random.seed(1)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "ExecuteTime": { 45 | "end_time": "2017-10-18T06:00:51.749941Z", 46 | "start_time": "2017-10-18T06:00:51.746808Z" 47 | } 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "ctx = mx.gpu()" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "## Load up a dataset\n" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "ExecuteTime": { 66 | "end_time": "2017-10-18T06:00:52.252105Z", 67 | "start_time": "2017-10-18T06:00:51.752991Z" 68 | } 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "batch_size = 64\n", 73 | "\n", 74 | "def transform(data, label):\n", 75 | " return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)\n", 76 | "\n", 77 | "train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),\n", 78 | " batch_size, shuffle=True)\n", 79 | "test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),\n", 80 | " batch_size, shuffle=False)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "## The VGG architecture\n", 88 | "\n", 89 | "A key aspect of VGG was to use many convolutional blocks with relatively narrow kernels, followed by a max-pooling step and to repeat this block multiple times. What is pretty neat about the code below is that we use functions to *return* network blocks. These are then combined to larger networks (e.g. in `vgg_stack`) and this allows us to construct VGG from components. What is particularly useful here is that we can use it to reparameterize the architecture simply by changing a few lines rather than adding and removing many lines of network definitions. " 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "ExecuteTime": { 97 | "end_time": "2017-10-18T06:00:52.283905Z", 98 | "start_time": "2017-10-18T06:00:52.254227Z" 99 | } 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "from mxnet.gluon import nn\n", 104 | "\n", 105 | "def vgg_block(num_convs, channels):\n", 106 | " out = nn.Sequential()\n", 107 | " for _ in range(num_convs):\n", 108 | " out.add(nn.Conv2D(channels=channels, kernel_size=3,\n", 109 | " padding=1, activation='relu'))\n", 110 | " out.add(nn.MaxPool2D(pool_size=2, strides=2))\n", 111 | " return out\n", 112 | "\n", 113 | "def vgg_stack(architecture):\n", 114 | " out = nn.Sequential()\n", 115 | " for (num_convs, channels) in architecture:\n", 116 | " out.add(vgg_block(num_convs, channels))\n", 117 | " return out\n", 118 | "\n", 119 | "num_outputs = 10\n", 120 | "architecture = ((1,64), (1,128), (2,256), (2,512))\n", 121 | "net = nn.Sequential()\n", 122 | "with net.name_scope():\n", 123 | " net.add(vgg_stack(architecture))\n", 124 | " net.add(nn.Flatten())\n", 125 | " net.add(nn.Dense(512, activation=\"relu\"))\n", 126 | " net.add(nn.Dropout(.5))\n", 127 | " net.add(nn.Dense(512, activation=\"relu\"))\n", 128 | " net.add(nn.Dropout(.5))\n", 129 | " net.add(nn.Dense(num_outputs))" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "## Initialize parameters" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "ExecuteTime": { 144 | "end_time": "2017-10-18T06:00:53.879036Z", 145 | "start_time": "2017-10-18T06:00:52.285901Z" 146 | } 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "## Optimizer" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "ExecuteTime": { 165 | "end_time": "2017-10-18T06:00:53.920533Z", 166 | "start_time": "2017-10-18T06:00:53.898827Z" 167 | } 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .05})" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "## Softmax cross-entropy loss" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": { 185 | "ExecuteTime": { 186 | "end_time": "2017-10-18T06:00:53.941011Z", 187 | "start_time": "2017-10-18T06:00:53.922904Z" 188 | } 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "## Evaluation loop" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": { 206 | "ExecuteTime": { 207 | "end_time": "2017-10-18T06:00:53.962279Z", 208 | "start_time": "2017-10-18T06:00:53.943086Z" 209 | } 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "def evaluate_accuracy(data_iterator, net):\n", 214 | " acc = mx.metric.Accuracy()\n", 215 | " for d, l in data_iterator:\n", 216 | " data = d.as_in_context(ctx)\n", 217 | " label = l.as_in_context(ctx)\n", 218 | " output = net(data)\n", 219 | " predictions = nd.argmax(output, axis=1)\n", 220 | " acc.update(preds=predictions, labels=label)\n", 221 | " return acc.get()[1]" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "## Training loop" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": { 235 | "ExecuteTime": { 236 | "end_time": "2017-10-18T06:02:36.461653Z", 237 | "start_time": "2017-10-18T06:00:53.965101Z" 238 | } 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "###########################\n", 243 | "# Only one epoch so tests can run quickly, increase this variable to actually run\n", 244 | "###########################\n", 245 | "epochs = 1\n", 246 | "smoothing_constant = .01\n", 247 | "\n", 248 | "for e in range(epochs):\n", 249 | " for i, (d, l) in enumerate(train_data):\n", 250 | " data = d.as_in_context(ctx)\n", 251 | " label = l.as_in_context(ctx)\n", 252 | " with autograd.record():\n", 253 | " output = net(data)\n", 254 | " loss = softmax_cross_entropy(output, label)\n", 255 | " loss.backward()\n", 256 | " trainer.step(data.shape[0])\n", 257 | " \n", 258 | " ##########################\n", 259 | " # Keep a moving average of the losses\n", 260 | " ##########################\n", 261 | " curr_loss = nd.mean(loss).asscalar()\n", 262 | " moving_loss = (curr_loss if ((i == 0) and (e == 0)) \n", 263 | " else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)\n", 264 | " \n", 265 | " if i > 0 and i % 200 == 0:\n", 266 | " print('Batch %d. Loss: %f' % (i, moving_loss))\n", 267 | " \n", 268 | " test_accuracy = evaluate_accuracy(test_data, net)\n", 269 | " train_accuracy = evaluate_accuracy(train_data, net)\n", 270 | " print(\"Epoch %s. Loss: %s, Train_acc %s, Test_acc %s\" % (e, moving_loss, train_accuracy, test_accuracy)) " 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "## Next\n", 278 | "[Batch normalization from scratch](../chapter04_convolutional-neural-networks/cnn-batch-norm-scratch.ipynb)" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "For whinges or inquiries, [open an issue on GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)" 286 | ] 287 | } 288 | ], 289 | "metadata": { 290 | "kernelspec": { 291 | "display_name": "Python 3", 292 | "language": "python", 293 | "name": "python3" 294 | }, 295 | "language_info": { 296 | "codemirror_mode": { 297 | "name": "ipython", 298 | "version": 3 299 | }, 300 | "file_extension": ".py", 301 | "mimetype": "text/x-python", 302 | "name": "python", 303 | "nbconvert_exporter": "python", 304 | "pygments_lexer": "ipython3", 305 | "version": "3.6.1" 306 | } 307 | }, 308 | "nbformat": 4, 309 | "nbformat_minor": 2 310 | } 311 | -------------------------------------------------------------------------------- /chapter07_distributed-learning/multiple-gpus-gluon.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Training on multiple GPUs with `gluon`\n", 8 | "\n", 9 | "Gluon makes it easy to implement data parallel training.\n", 10 | "In this notebook, we'll implement data parallel training for a convolutional neural network.\n", 11 | "If you'd like a finer grained view of the concepts, \n", 12 | "you might want to first read the previous notebook,\n", 13 | "[multi gpu from scratch](./multiple-gpus-scratch.ipynb) with `gluon`.\n", 14 | "\n", 15 | "To get started, let's first define a simple convolutional neural network and loss function." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "import mxnet as mx\n", 27 | "from mxnet import nd, gluon, autograd\n", 28 | "net = gluon.nn.Sequential(prefix='cnn_')\n", 29 | "with net.name_scope():\n", 30 | " net.add(gluon.nn.Conv2D(channels=20, kernel_size=3, activation='relu'))\n", 31 | " net.add(gluon.nn.MaxPool2D(pool_size=(2,2), strides=(2,2)))\n", 32 | " net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu'))\n", 33 | " net.add(gluon.nn.MaxPool2D(pool_size=(2,2), strides=(2,2)))\n", 34 | " net.add(gluon.nn.Flatten())\n", 35 | " net.add(gluon.nn.Dense(128, activation=\"relu\"))\n", 36 | " net.add(gluon.nn.Dense(10))\n", 37 | " \n", 38 | "loss = gluon.loss.SoftmaxCrossEntropyLoss()" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## Initialize on multiple devices\n", 46 | "\n", 47 | "Gluon supports initialization of network parameters over multiple devices. We accomplish this by passing in an array of device contexts, instead of the single contexts we've used in earlier notebooks.\n", 48 | "When we pass in an array of contexts, the parameters are initialized \n", 49 | "to be identical across all of our devices." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 2, 55 | "metadata": { 56 | "collapsed": true 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "GPU_COUNT = 2 # increase if you have more\n", 61 | "ctx = [mx.gpu(i) for i in range(GPU_COUNT)]\n", 62 | "net.collect_params().initialize(ctx=ctx)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "Given a batch of input data,\n", 70 | "we can split it into parts (equal to the number of contexts) \n", 71 | "by calling `gluon.utils.split_and_load(batch, ctx)`.\n", 72 | "The `split_and_load` function doesn't just split the data,\n", 73 | "it also loads each part onto the appropriate device context. \n", 74 | "\n", 75 | "So now when we call the forward pass on two separate parts,\n", 76 | "each one is computed on the appropriate corresponding device and using the version of the parameters stored there." 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 3, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "\n", 89 | "[[-0.01876061 -0.02165037 -0.01293943 0.03837404 -0.00821797 -0.00911531\n", 90 | " 0.00416799 -0.00729158 -0.00232711 -0.00155549]\n", 91 | " [ 0.00441474 -0.01953595 -0.00128483 0.02768224 0.01389615 -0.01320441\n", 92 | " -0.01166505 -0.00637776 0.0135425 -0.00611765]]\n", 93 | "\n", 94 | "\n", 95 | "[[ -6.78736670e-03 -8.86893831e-03 -1.04004676e-02 1.72976423e-02\n", 96 | " 2.26115398e-02 -6.36630831e-03 -1.54974898e-02 -1.22633884e-02\n", 97 | " 1.19591374e-02 -6.60043515e-05]\n", 98 | " [ -1.17358668e-02 -2.16879714e-02 1.71219767e-03 2.49827504e-02\n", 99 | " 1.16810966e-02 -9.52543691e-03 -1.03610428e-02 5.08510228e-03\n", 100 | " 7.06662657e-03 -9.25292261e-03]]\n", 101 | "\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "from mxnet.test_utils import get_mnist\n", 107 | "mnist = get_mnist()\n", 108 | "batch = mnist['train_data'][0:GPU_COUNT*2, :]\n", 109 | "data = gluon.utils.split_and_load(batch, ctx)\n", 110 | "print(net(data[0]))\n", 111 | "print(net(data[1]))" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "At any time, we can access the version of the parameters stored on each device. \n", 119 | "Recall from the first Chapter that our weights may not actually be initialized\n", 120 | "when we call `initialize` because the parameter shapes may not yet be known. \n", 121 | "In these cases, initialization is deferred pending shape inference. " 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 4, 127 | "metadata": { 128 | "scrolled": true 129 | }, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "=== channel 0 of the first conv on gpu(0) ===\n", 136 | "[[[ 0.04118239 0.05352169 -0.04762455]\n", 137 | " [ 0.06035256 -0.01528978 0.04946674]\n", 138 | " [ 0.06110793 -0.00081179 0.02191102]]]\n", 139 | "\n", 140 | "=== channel 0 of the first conv on gpu(1) ===\n", 141 | "[[[ 0.04118239 0.05352169 -0.04762455]\n", 142 | " [ 0.06035256 -0.01528978 0.04946674]\n", 143 | " [ 0.06110793 -0.00081179 0.02191102]]]\n", 144 | "\n" 145 | ] 146 | } 147 | ], 148 | "source": [ 149 | "weight = net.collect_params()['cnn_conv0_weight']\n", 150 | "\n", 151 | "for c in ctx:\n", 152 | " print('=== channel 0 of the first conv on {} ==={}'.format(\n", 153 | " c, weight.data(ctx=c)[0]))\n", 154 | " " 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "Similarly, we can access the gradients on each of the GPUs. Because each GPU gets a different part of the batch (a different subset of examples), the gradients on each GPU vary. " 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 5, 167 | "metadata": { 168 | "scrolled": true 169 | }, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "=== grad of channel 0 of the first conv2d on gpu(0) ===\n", 176 | "[[[-0.02078936 -0.00562428 0.01711007]\n", 177 | " [ 0.01138539 0.0280002 0.04094725]\n", 178 | " [ 0.00993335 0.01218192 0.02122578]]]\n", 179 | "\n", 180 | "=== grad of channel 0 of the first conv2d on gpu(1) ===\n", 181 | "[[[-0.02543036 -0.02789939 -0.00302115]\n", 182 | " [-0.04816786 -0.03347274 -0.00403483]\n", 183 | " [-0.03178394 -0.01254033 0.00855637]]]\n", 184 | "\n" 185 | ] 186 | } 187 | ], 188 | "source": [ 189 | "def forward_backward(net, data, label):\n", 190 | " with autograd.record():\n", 191 | " losses = [loss(net(X), Y) for X, Y in zip(data, label)]\n", 192 | " for l in losses:\n", 193 | " l.backward()\n", 194 | " \n", 195 | "label = gluon.utils.split_and_load(mnist['train_label'][0:4], ctx)\n", 196 | "forward_backward(net, data, label)\n", 197 | "for c in ctx:\n", 198 | " print('=== grad of channel 0 of the first conv2d on {} ==={}'.format(\n", 199 | " c, weight.grad(ctx=c)[0]))" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "## Put all things together\n", 207 | "\n", 208 | "Now we can implement the remaining functions. Most of them are the same as [when we did everything by hand](./chapter07_distributed-learning/multiple-gpus-scratch.ipynb); one notable difference is that if a `gluon` trainer recognizes multi-devices, it will automatically aggregate the gradients and synchronize the parameters. " 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "scrolled": true 216 | }, 217 | "outputs": [ 218 | { 219 | "name": "stdout", 220 | "output_type": "stream", 221 | "text": [ 222 | "Running on [gpu(0)]\n", 223 | "Batch size is 64\n", 224 | "Epoch 0, training time = 5.0 sec\n", 225 | " validation accuracy = 0.9738\n", 226 | "Epoch 1, training time = 4.8 sec\n", 227 | " validation accuracy = 0.9841\n", 228 | "Epoch 2, training time = 4.7 sec\n", 229 | " validation accuracy = 0.9863\n", 230 | "Epoch 3, training time = 4.7 sec\n", 231 | " validation accuracy = 0.9868\n", 232 | "Epoch 4, training time = 4.7 sec\n", 233 | " validation accuracy = 0.9877\n", 234 | "Running on [gpu(0), gpu(1)]\n", 235 | "Batch size is 128\n" 236 | ] 237 | } 238 | ], 239 | "source": [ 240 | "from mxnet.io import NDArrayIter\n", 241 | "from time import time\n", 242 | "\n", 243 | "def train_batch(batch, ctx, net, trainer):\n", 244 | " # split the data batch and load them on GPUs\n", 245 | " data = gluon.utils.split_and_load(batch.data[0], ctx)\n", 246 | " label = gluon.utils.split_and_load(batch.label[0], ctx)\n", 247 | " # compute gradient\n", 248 | " forward_backward(net, data, label)\n", 249 | " # update parameters\n", 250 | " trainer.step(batch.data[0].shape[0])\n", 251 | " \n", 252 | "def valid_batch(batch, ctx, net):\n", 253 | " data = batch.data[0].as_in_context(ctx[0])\n", 254 | " pred = nd.argmax(net(data), axis=1)\n", 255 | " return nd.sum(pred == batch.label[0].as_in_context(ctx[0])).asscalar() \n", 256 | "\n", 257 | "def run(num_gpus, batch_size, lr): \n", 258 | " # the list of GPUs will be used\n", 259 | " ctx = [mx.gpu(i) for i in range(num_gpus)]\n", 260 | " print('Running on {}'.format(ctx))\n", 261 | " \n", 262 | " # data iterator\n", 263 | " mnist = get_mnist()\n", 264 | " train_data = NDArrayIter(mnist[\"train_data\"], mnist[\"train_label\"], batch_size)\n", 265 | " valid_data = NDArrayIter(mnist[\"test_data\"], mnist[\"test_label\"], batch_size)\n", 266 | " print('Batch size is {}'.format(batch_size))\n", 267 | " \n", 268 | " net.collect_params().initialize(force_reinit=True, ctx=ctx)\n", 269 | " trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})\n", 270 | " for epoch in range(5):\n", 271 | " # train\n", 272 | " start = time()\n", 273 | " train_data.reset()\n", 274 | " for batch in train_data:\n", 275 | " train_batch(batch, ctx, net, trainer)\n", 276 | " nd.waitall() # wait until all computations are finished to benchmark the time\n", 277 | " print('Epoch %d, training time = %.1f sec'%(epoch, time()-start))\n", 278 | " \n", 279 | " # validating\n", 280 | " valid_data.reset()\n", 281 | " correct, num = 0.0, 0.0\n", 282 | " for batch in valid_data:\n", 283 | " correct += valid_batch(batch, ctx, net)\n", 284 | " num += batch.data[0].shape[0] \n", 285 | " print(' validation accuracy = %.4f'%(correct/num))\n", 286 | " \n", 287 | "run(1, 64, .3) \n", 288 | "run(GPU_COUNT, 64*GPU_COUNT, .3) " 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "## Conclusion\n", 296 | "\n", 297 | "Both parameters and trainers in `gluon` support multi-devices. Moving from one device to multi-devices is straightforward. " 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "## Next\n", 305 | "[Distributed training with multiple machines](../chapter07_distributed-learning/training-with-multiple-machines.ipynb)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": {}, 311 | "source": [ 312 | "For whinges or inquiries, [open an issue on GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)" 313 | ] 314 | } 315 | ], 316 | "metadata": { 317 | "anaconda-cloud": {}, 318 | "kernelspec": { 319 | "display_name": "Python 3", 320 | "language": "python", 321 | "name": "python3" 322 | }, 323 | "language_info": { 324 | "codemirror_mode": { 325 | "name": "ipython", 326 | "version": 3 327 | }, 328 | "file_extension": ".py", 329 | "mimetype": "text/x-python", 330 | "name": "python", 331 | "nbconvert_exporter": "python", 332 | "pygments_lexer": "ipython3", 333 | "version": "3.4.3" 334 | } 335 | }, 336 | "nbformat": 4, 337 | "nbformat_minor": 2 338 | } 339 | -------------------------------------------------------------------------------- /chapter07_distributed-learning/training-with-multiple-machines.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Distributed training with multiple machines\n", 8 | "\n", 9 | "In the previous two tutorials, we saw \n", 10 | "that using multiple GPUs within a machine \n", 11 | "can accelerate training. \n", 12 | "The speedup, however, is limited \n", 13 | "by the number of GPUs installed in that machine.\n", 14 | "And it's rare to find a single machine with more than 16 GPUs nowadays. \n", 15 | "For some truly large-scale applications, \n", 16 | "this speedup might still be insufficient.\n", 17 | "For example, it could still take many days \n", 18 | "to train a state-of-the-art CNN on millions of images.\n", 19 | "\n", 20 | "In this tutorial, we'll discuss the key concepts you'll need \n", 21 | "in order to go from a program that does single-machine training\n", 22 | "to one that executes distributed training across multiple machines. \n", 23 | "We depict a typical distributed system in the following figure, where\n", 24 | "multiple machines are connected by network switches.\n", 25 | "\n", 26 | "![](../img/multi-machines.svg)\n", 27 | "\n", 28 | "Note that the way we used `copyto` to copy data from one GPU to another in the [multiple-GPU tutorial](../multiple-gpus-scratch.ipynb) does not work when our GPUs are sitting on different machines. To make use of the available resources here well need a better abstraction." 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "source": [ 37 | "## Key-value store\n", 38 | "\n", 39 | "MXNet provides a key-value store to synchronize data among devices. The following code initializes an `ndarray` associated with the key \"weight\" on a key-value store." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 1, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "=== init \"weight\" ===\n", 52 | "[[ 0.54881352 0.59284461 0.71518934]\n", 53 | " [ 0.84426576 0.60276335 0.85794562]]\n", 54 | "\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "from mxnet import kv, nd\n", 60 | "store = kv.create('local')\n", 61 | "shape = (2, 3)\n", 62 | "x = nd.random_uniform(shape=shape)\n", 63 | "store.init('weight', x) \n", 64 | "print('=== init \"weight\" ==={}'.format(x))" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "After initialization, we can pull the value to multiple devices. " 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 2, 77 | "metadata": { 78 | "scrolled": true 79 | }, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "=== pull \"weight\" to [gpu(0), gpu(1)] ===\n", 86 | "[\n", 87 | "[[ 0.54881352 0.59284461 0.71518934]\n", 88 | " [ 0.84426576 0.60276335 0.85794562]]\n", 89 | ", \n", 90 | "[[ 0.54881352 0.59284461 0.71518934]\n", 91 | " [ 0.84426576 0.60276335 0.85794562]]\n", 92 | "]\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "from mxnet import gpu\n", 98 | "ctx = [gpu(0), gpu(1)]\n", 99 | "y = [nd.zeros(shape, ctx=c) for c in ctx]\n", 100 | "store.pull('weight', out=y)\n", 101 | "print('=== pull \"weight\" to {} ===\\n{}'.format(ctx, y))" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "We can also push new data value into the store. It will first sum the data on the same key and then overwrite the current value." 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 3, 114 | "metadata": { 115 | "scrolled": true 116 | }, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "=== push to \"weight\" ===\n", 123 | "[\n", 124 | "[[ 1. 1. 1.]\n", 125 | " [ 1. 1. 1.]]\n", 126 | ", \n", 127 | "[[ 2. 2. 2.]\n", 128 | " [ 2. 2. 2.]]\n", 129 | "]\n", 130 | "=== pull \"weight\" ===\n", 131 | "[\n", 132 | "[[ 3. 3. 3.]\n", 133 | " [ 3. 3. 3.]]\n", 134 | ", \n", 135 | "[[ 3. 3. 3.]\n", 136 | " [ 3. 3. 3.]]\n", 137 | "]\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "z = [nd.ones(shape, ctx=ctx[i])+i for i in range(len(ctx))]\n", 143 | "store.push('weight', z)\n", 144 | "print('=== push to \"weight\" ===\\n{}'.format(z))\n", 145 | "store.pull('weight', out=y)\n", 146 | "print('=== pull \"weight\" ===\\n{}'.format(y))" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "With `push` and `pull` we can replace the `allreduce` function defined in [multiple-gpus-scratch](P14-C02-multiple-gpus-scratch.ipynb) by\n", 154 | "\n", 155 | "```python\n", 156 | "def allreduce(data, data_name, store):\n", 157 | " store.push(data_name, data)\n", 158 | " store.pull(data_name, out=data)\n", 159 | "```" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## Distributed key-value store\n", 167 | "\n", 168 | "Not only can we synchronize data within a machine, with the key-value store we can facilitate inter-machine communication. To use it, one can create a distributed kvstore by using the following command: (Note: distributed key-value store requires `MXNet` to be compiled with the flag `USE_DIST_KVSTORE=1`, e.g. `make USE_DIST_KVSTORE=1`.)\n", 169 | "\n", 170 | "```python\n", 171 | "store = kv.create('dist')\n", 172 | "```\n", 173 | "\n", 174 | "Now if we run the code from the previous section on two machines at the same time, then the store will aggregate the two ndarrays pushed from each machine, and after that, the pulled results will be: \n", 175 | "\n", 176 | "```\n", 177 | "[[ 6. 6. 6.]\n", 178 | " [ 6. 6. 6.]]\n", 179 | "```\n", 180 | "\n", 181 | "In the distributed setting, `MXNet` launches three kinds of processes (each time, running `python myprog.py` will create a process). One is a *worker*, which runs the user program, such as the code in the previous section. The other two are the *server*, which maintains the data pushed into the store, and the *scheduler*, which monitors the aliveness of each node.\n", 182 | "\n", 183 | "It's up to users which machines to run these processes on. But to simplify the process placement and launching, MXNet provides a tool located at [tools/launch.py](https://github.com/dmlc/mxnet/blob/master/tools/launch.py). \n", 184 | "\n", 185 | "Assume there are two machines, A and B. They are ssh-able, and their IPs are saved in a file named `hostfile`. Then we can start one worker in each machine through: \n", 186 | "\n", 187 | "```\n", 188 | "$ mxnet_path/tools/launch.py -H hostfile -n 2 python myprog.py\n", 189 | "```\n", 190 | "\n", 191 | "It will also start a server in each machine, and the scheduler on the same machine we are currently on.\n", 192 | "\n", 193 | "![](img/dist_kv.svg)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "## Using `kvstore` in `gluon`\n", 201 | "\n", 202 | "As mentioned in [our section on training with multiple GPUs from scratch](multiple-gpus-scratch.ipynb#data-parallelism), to implement data parallelism we just need to specify \n", 203 | "\n", 204 | "- how to split data\n", 205 | "- how to synchronize gradients and weights\n", 206 | "\n", 207 | "We already see from [multiple-gpu-gluon](P14-C03-multiple-gpus-gluon.ipynb#put-all-things-together) that a `gluon` trainer can automatically aggregate the gradients among different GPUs. What it really does is having a key-value store with type `local` within it. Therefore, to change to multi-machine training we only need to pass a distributed key-value store, for example,\n", 208 | "\n", 209 | "```python\n", 210 | "store = kv.create('dist')\n", 211 | "trainer = gluon.Trainer(..., kvstore=store)\n", 212 | "```\n", 213 | "\n", 214 | "To split the data, however, we cannot directly copy the previous approach. One commonly used solution is to split the whole dataset into *k* parts at the beginning, then let the *i*-th worker only read the *i*-th part of the data.\n", 215 | "\n", 216 | "We can obtain the total number of workers by reading the attribute `num_workers` and the rank of the current worker from the attribute `rank`." 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 4, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "name": "stdout", 226 | "output_type": "stream", 227 | "text": [ 228 | "total number of workers: 1\n", 229 | "my rank among workers: 0\n" 230 | ] 231 | } 232 | ], 233 | "source": [ 234 | "print('total number of workers: %d'%(store.num_workers))\n", 235 | "print('my rank among workers: %d'%(store.rank))" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "With this information, we can manually access the proper chunk of the input data. In addition, several data iterators provided by `MXNet` already support reading only part of the data. For example,\n", 243 | "\n", 244 | "```python\n", 245 | "from mxnet.io import ImageRecordIter\n", 246 | "data = ImageRecordIter(num_parts=store.num_workers, part_index=store.rank, ...)\n", 247 | "```" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": { 253 | "collapsed": true 254 | }, 255 | "source": [ 256 | "For whinges or inquiries, [open an issue on GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)" 257 | ] 258 | } 259 | ], 260 | "metadata": { 261 | "kernelspec": { 262 | "display_name": "Python 3", 263 | "language": "python", 264 | "name": "python3" 265 | }, 266 | "language_info": { 267 | "codemirror_mode": { 268 | "name": "ipython", 269 | "version": 3 270 | }, 271 | "file_extension": ".py", 272 | "mimetype": "text/x-python", 273 | "name": "python", 274 | "nbconvert_exporter": "python", 275 | "pygments_lexer": "ipython3", 276 | "version": "3.4.3" 277 | } 278 | }, 279 | "nbformat": 4, 280 | "nbformat_minor": 2 281 | } 282 | -------------------------------------------------------------------------------- /chapter11_recommender-systems/intro-recommender-systems.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction to recommender systems\n", 8 | "[Early, early draft]\n", 9 | "\n", 10 | "This chapter introduces recommender systems (commonly called RecSys),\n", 11 | "tools that recommmend *items* to *users*.\n", 12 | "Many of the most popular uses of recommender systems \n", 13 | "involve to suggesting products to customers.\n", 14 | "Amazon, for example, uses recommender systems to choose which retail products to display.\n", 15 | "Recommender systems aren't limited to physical products. \n", 16 | "For example, the algorithms that Pandora and Spotify use to curate playlists\n", 17 | "are recommender systems.\n", 18 | "Personalized suggestions on news websites are recommender systems.\n", 19 | "And as of this writing, several carousels on the home page for \n", 20 | "Amazon's Prime Videos's contain personalized TV and Movie recommendations.\n", 21 | "\n", 22 | "![](../img/recommended-prime-tv.png)\n", 23 | "\n", 24 | "I (Zack) have honestly no idea why Amazon wants me to watch Bubble Guppies. \n", 25 | "It's possible that Bubble Guppies is a masterpiece,\n", 26 | "and the recommender systems knows that my life will change upon watching it.\n", 27 | "It's also possible that the recommender made a mistake.\n", 28 | "For example, it might have extrapolated incorrectly from my affinity for the anime Death Note,\n", 29 | "thinking that I would similarly love any animated series.\n", 30 | "And, since I've never rated a nickelodean series (either postiively or negatively),\n", 31 | "the system may have no knowledge to the contrary.\n", 32 | "It's also possible that this series is a new addition to the catalogue,\n", 33 | "and thus they need to recommend the item to many users in ordder to develop a sense of *who* likes Bubble Guppies.\n", 34 | "This problem, of sorting out how to handle a new item, is called the *cold-start* problem.\n", 35 | "\n", 36 | "\n", 37 | "A recommender system doesn't have to use any sophisticated machine learning techniques.\n", 38 | "And it doesn't even have to be personalized.\n", 39 | "One reasonable baseline for most applications \n", 40 | "is to suggest the most popular items to everyone. \n", 41 | "But we have to be careful.\n", 42 | "Depending on how we define popularity,\n", 43 | "we might create a feedback loop.\n", 44 | "The most popular items get recommended which makes them even more popular,\n", 45 | "which makes them even more frequently recommended, etc.\n", 46 | "\n", 47 | "For services with diverse users,\n", 48 | "however, personalization can be essential.\n", 49 | "Diapers are among the most popular items on Amazon,\n", 50 | "but we probably shouldn't recommend diapers \n", 51 | "to adolescents. \n", 52 | "We also probably *should not* recommend anything associated with Justin Bieber\n", 53 | "to a user who *isn't* an adolescent. \n", 54 | "Moreover, we might want to personalize, not only to the user, but to the context.\n", 55 | "For example, just after I bought a Pixel phone,\n", 56 | "I was in the market for a phone case.\n", 57 | "But I have no interested in buying a phone case one year later.\n", 58 | "\n", 59 | "\n", 60 | "## Many ways to pose the problem \n", 61 | "\n", 62 | "While it might seem obvious,\n", 63 | "that personalization is a good strategy,\n", 64 | "it's not immediately obvious how best to articualate \n", 65 | "recommendation as a machine learning problem. \n", 66 | "\n", 67 | "Discuss:\n", 68 | "* Rating prediction\n", 69 | "* Passive feedback (view/notview)\n", 70 | "* Content-based recommendation\n", 71 | "\n", 72 | "## Amazon review dataset\n", 73 | "\n", 74 | "* introduce dataset\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "import mxnet\n", 86 | "import mxnet.ndarray as nd\n", 87 | "import urllib\n", 88 | "import gzip" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 10, 94 | "metadata": { 95 | "collapsed": true 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "with gzip.open(urllib.request.urlopen(\"http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Grocery_and_Gourmet_Food_5.json.gz\")) as f:\n", 100 | " data = [eval(l) for l in f]\n" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 11, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "data": { 110 | "text/plain": [ 111 | "{'asin': '616719923X',\n", 112 | " 'helpful': [0, 0],\n", 113 | " 'overall': 4.0,\n", 114 | " 'reviewText': 'Just another flavor of Kit Kat but the taste is unique and a bit different. The only thing that is bothersome is the price. I thought it was a bit expensive....',\n", 115 | " 'reviewTime': '06 1, 2013',\n", 116 | " 'reviewerID': 'A1VEELTKS8NLZB',\n", 117 | " 'reviewerName': 'Amazon Customer',\n", 118 | " 'summary': 'Good Taste',\n", 119 | " 'unixReviewTime': 1370044800}" 120 | ] 121 | }, 122 | "execution_count": 11, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "data[0]" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "## [Do some dataset exploration]\n", 136 | "* Look at the average rating\n", 137 | "* Look at the number of unique users and items\n", 138 | "* Plot a histogram of the number of ratings/reviews corresponding to each user\n", 139 | "* \"\" for items" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 17, 145 | "metadata": { 146 | "collapsed": true 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "users = [d['reviewerID'] for d in data]" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 18, 156 | "metadata": { 157 | "collapsed": true 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "items = [d['asin'] for d in data]" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 14, 167 | "metadata": { 168 | "collapsed": true 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "ratings = [d['overall'] for d in data]" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "## Models \n", 180 | "* Just the average\n", 181 | "* Offset plus user and item biases\n", 182 | "* Latent factor model / matrix factorization" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "collapsed": true 190 | }, 191 | "outputs": [], 192 | "source": [] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": true 199 | }, 200 | "outputs": [], 201 | "source": [] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": true 208 | }, 209 | "outputs": [], 210 | "source": [] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": { 216 | "collapsed": true 217 | }, 218 | "outputs": [], 219 | "source": [] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "collapsed": true 226 | }, 227 | "outputs": [], 228 | "source": [] 229 | } 230 | ], 231 | "metadata": { 232 | "kernelspec": { 233 | "display_name": "Python 3", 234 | "language": "python", 235 | "name": "python3" 236 | }, 237 | "language_info": { 238 | "codemirror_mode": { 239 | "name": "ipython", 240 | "version": 3 241 | }, 242 | "file_extension": ".py", 243 | "mimetype": "text/x-python", 244 | "name": "python", 245 | "nbconvert_exporter": "python", 246 | "pygments_lexer": "ipython3", 247 | "version": "3.4.3" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 2 252 | } 253 | -------------------------------------------------------------------------------- /chapter11_recommender-systems/introduction-to-recommender-systems.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction to recommender systems\n", 8 | "\n", 9 | "* Explain recsys from the beginning\n", 10 | "* Introduce task of rating prediction\n", 11 | "* introduce dataset\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 5, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import mxnet\n", 23 | "import mxnet.ndarray as nd\n", 24 | "import urllib\n", 25 | "import gzip" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 10, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "with gzip.open(urllib.request.urlopen(\"http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Grocery_and_Gourmet_Food_5.json.gz\")) as f:\n", 35 | " data = [eval(l) for l in f]\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 11, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/plain": [ 46 | "{'asin': '616719923X',\n", 47 | " 'helpful': [0, 0],\n", 48 | " 'overall': 4.0,\n", 49 | " 'reviewText': 'Just another flavor of Kit Kat but the taste is unique and a bit different. The only thing that is bothersome is the price. I thought it was a bit expensive....',\n", 50 | " 'reviewTime': '06 1, 2013',\n", 51 | " 'reviewerID': 'A1VEELTKS8NLZB',\n", 52 | " 'reviewerName': 'Amazon Customer',\n", 53 | " 'summary': 'Good Taste',\n", 54 | " 'unixReviewTime': 1370044800}" 55 | ] 56 | }, 57 | "execution_count": 11, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "data[0]" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "## [Do some dataset exploration]\n", 71 | "* Look at the average rating\n", 72 | "* Look at the number of unique users and items\n", 73 | "* Plot a histogram of the number of ratings/reviews corresponding to each user\n", 74 | "* \"\" for items" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 17, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "users = [d['reviewerID'] for d in data]" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 18, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "items = [d['asin'] for d in data]" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 14, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "ratings = [d['overall'] for d in data]" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "## Models \n", 111 | "* Just the average\n", 112 | "* Offset plus user and item biases\n", 113 | "* Latent factor model / matrix factorization" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "collapsed": true 121 | }, 122 | "outputs": [], 123 | "source": [] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "collapsed": true 130 | }, 131 | "outputs": [], 132 | "source": [] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "collapsed": true 139 | }, 140 | "outputs": [], 141 | "source": [] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "outputs": [], 150 | "source": [] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": true 157 | }, 158 | "outputs": [], 159 | "source": [] 160 | } 161 | ], 162 | "metadata": { 163 | "kernelspec": { 164 | "display_name": "Python 3", 165 | "language": "python", 166 | "name": "python3" 167 | }, 168 | "language_info": { 169 | "codemirror_mode": { 170 | "name": "ipython", 171 | "version": 3 172 | }, 173 | "file_extension": ".py", 174 | "mimetype": "text/x-python", 175 | "name": "python", 176 | "nbconvert_exporter": "python", 177 | "pygments_lexer": "ipython3", 178 | "version": "3.4.3" 179 | } 180 | }, 181 | "nbformat": 4, 182 | "nbformat_minor": 2 183 | } 184 | -------------------------------------------------------------------------------- /chapter13_unsupervised-learning/.gitignore: -------------------------------------------------------------------------------- 1 | *.gz 2 | *.params 3 | *.png 4 | -------------------------------------------------------------------------------- /docs/C01-install.md: -------------------------------------------------------------------------------- 1 | # Run these tutorials 2 | 3 | Each tutorial consists of a Jupyter notebook, which is editable and 4 | runnable. To run these notebooks, you must have `python` installed. 5 | Additionally, you'll need `jupyter` and a recent version of `mxnet`. 6 | The following commands install them through `pip`: 7 | 8 | ```bash 9 | # optional: update pip to the newest version 10 | sudo pip install --upgrade pip 11 | # install jupyter 12 | pip install jupyter --user 13 | # install the nightly built mxnet 14 | pip install mxnet --pre --user 15 | ``` 16 | 17 | The default `MXNet` package only supports CPU but some tutorials require 18 | GPUs. If you are running on a computer that has a GPU and either CUDA 7.5 19 | or 8.0 is installed, then the following commands install a GPU-enabled 20 | version of MXNet. 21 | 22 | ```bash 23 | pip install mxnet-cu80 --pre --user # for CUDA 8.0 24 | pip install mxnet-cu90 --pre --user # for CUDA 9.0 25 | ``` 26 | 27 | After completing installation, you're ready to obtain and run the source code: 28 | 29 | ```bash 30 | git clone https://github.com/zackchase/mxnet-the-straight-dope/ 31 | cd mxnet-the-straight-dope 32 | jupyter notebook 33 | ``` 34 | 35 | The last command starts the Jupyter notebook. You can now run and edit the 36 | notebooks in a web browser. If you're running the notebooks on a server, 37 | then you might want to ssh with the `-L` flag to tie localhost:8888 38 | on your machine and on the server: 39 | 40 | ``` 41 | ssh myserver -L 8888:localhost:8888 42 | ``` 43 | 44 | Pro tip: if you'd like to run your notebook on some other port (than 8888), 45 | launch it with: 46 | 47 | ```jupyter notebook --port ``` 48 | -------------------------------------------------------------------------------- /docs/C01-install.rst: -------------------------------------------------------------------------------- 1 | Run these tutorials 2 | =========================== 3 | 4 | Each tutorial is made from a Jupyter notebook, which is editable and 5 | runable. Assume ``python`` in already installed, then in additional, both 6 | ``jupyter`` and a recent version of ``mxnet`` are required. The following 7 | commands install them through ``pip``: 8 | 9 | .. code-block:: bash 10 | 11 | # optional: update pip to the newest version 12 | sudo pip install --upgrade pip 13 | # install jupyter 14 | pip install jupyter --user 15 | # install the nightly built mxnet 16 | pip install mxnet --pre --user 17 | 18 | The default ``MXNet`` package only supports CPU while some tutorials may need 19 | GPUs. If GPU is available and either CUDA 7.5 or 8.0 is installed, then we can 20 | install the GPU-supported package 21 | 22 | .. code-block:: bash 23 | 24 | pip install mxnet-cu80 --pre --user # for CUDA 8.0 25 | pip install mxnet-cu90 --pre --user # for CUDA 9.0 26 | 27 | Now we are ready to obtain the source codes and run them 28 | 29 | .. code-block:: bash 30 | 31 | git clone https://github.com/zackchase/mxnet-the-straight-dope/ 32 | cd mxnet-the-straight-dope 33 | jupyter notebook 34 | 35 | The last command starts the jupyter notebook, and now you can edit and run these 36 | tutorials now. 37 | -------------------------------------------------------------------------------- /docs/C02-contribute.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | For whinges and inquiries, please open 4 | [an issue at github](https://github.com/zackchase/mxnet-the-straight-dope/issues). 5 | 6 | To contribute codes, please follow the following guidelines: 7 | 8 | 1. Check the 9 | [roadmap](https://github.com/zackchase/mxnet-the-straight-dope/#roadmap) 10 | before creating a new tutorial. 11 | 12 | 2. Only cover a single new concept on a tutorial, and explain it in detail. Do 13 | not assume readers will know it before. 14 | 15 | 3. Make both words and codes as simple as possible. Each tutorial should take 16 | no more than 20 minutes to read 17 | 18 | 4. Do not submit large files, such as dataset or images, to the repo. You can 19 | upload them to a different repo and cross reference it. For example 20 | 21 | - Insert an image: 22 | 23 | ``` 24 | ![](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mnist.png) 25 | ``` 26 | 27 | - Download a dataset if not exists in local: 28 | 29 | ``` 30 | mx.test_utils.download('https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/ptb/ptb.train.txt') 31 | ``` 32 | 33 | 5. Resize the images to proper sizes. Large size images look fine in notebook, 34 | but they may be ugly in the HTML or PDF format. 35 | 36 | 6. Either restart and evaluate all code blocks or clean all outputs before 37 | submitting 38 | 39 | - For the former, you can click `Kernel -> Restart & Run All` in the 40 | Jupyter notebook menu. 41 | - For the latter, use `Kernel -> Restart & Clear Output`. Then our Jenkins 42 | server will evaluate this notebook when building the documents. It is 43 | recommended because it can be used as a unit test. But only do it if this 44 | notebook is fast to run (e.g. less than 5 minutes) and does not require 45 | GPU. 46 | 47 | 7. You can build the documents locally to preview the changes. It requires GPU 48 | is available with `CUDA 8.0` installed, and also `conda` is installed. T 49 | following commands create an environment with all requirements installed: 50 | 51 | ```bash 52 | # assume at the root directory of this project 53 | conda env create -f environment.yml 54 | source activate gluon_docs 55 | ``` 56 | 57 | Now you are able to build the HTMLs:: 58 | 59 | ```bash 60 | make html 61 | ``` 62 | -------------------------------------------------------------------------------- /docs/C02-contribute.rst: -------------------------------------------------------------------------------- 1 | How to contribute 2 | =================== 3 | 4 | For whinges and inquiries, please open `an issue at github 5 | `_. 6 | 7 | To contribute codes, please follow the following guidelines: 8 | 9 | 1. Check the `roadmap 10 | `_ before 11 | creating a new tutorial. 12 | 13 | 2. Only cover a single new concept on a tutorial, and explain it in detail. Do 14 | not assume readers will know it before. 15 | 16 | 3. Make both words and codes as simple as possible. Each tutorial should take 17 | no more than 20 minutes to read 18 | 19 | 4. Do not submit large files, such as dataset or images, to the repo. You can 20 | upload them to a different repo and cross reference it. For example 21 | 22 | - Insert an image:: 23 | 24 | ![](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mnist.png) 25 | 26 | - Download a dataset if not exists in local:: 27 | 28 | mx.test_utils.download('https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/ptb/ptb.train.txt') 29 | 30 | 5. Resize the images to proper sizes. Large size images look fine in notebook, 31 | but they may be ugly in the HTML or PDF format. 32 | 33 | 6. Either restart and evaluate all code blocks or clean all outputs before 34 | submitting 35 | 36 | - For the former, you can click ``Kernel -> Restart & Run All`` in the 37 | Jupyter notebook menu. 38 | - For the latter, use ``Kernel -> Restart & Clear Output``. Then our Jenkins 39 | server will evaluate this notebook when building the documents. It is 40 | recommended because it can be used as a unit test. But only do it if this 41 | notebook is fast to run (e.g. less than 5 minutes) and does not require 42 | GPU. 43 | 44 | 7. (Update, this feature is not availabe for Jupyter now.) If you want to reference a function or class, use 45 | `sphinx domains `_. For example 46 | 47 | - function: ``:func:`mxnet.ndarray.zeros``` to :func:`mxnet.ndarray.zeros` 48 | - class ``:class:`mxnet.gluon.Parameter``` to :class:`mxnet.gluon.Parameter` 49 | - also works for numpy: ``:func:`numpy.zeros``` to :func:`numpy.zeros` 50 | 51 | 8. You can build the documents locally to preview the changes. Assume ``conda`` 52 | is available, then following commands create an environment with all 53 | requirements installed:: 54 | 55 | # assume at the root directory of this project 56 | conda env create -f environment.yml 57 | source activate gluon_docs 58 | 59 | Now you are able to build the HTMLs:: 60 | 61 | make html 62 | 63 | If latex is installed, you can also build the PDF version:: 64 | 65 | make latex 66 | make -C _build/latex 67 | -------------------------------------------------------------------------------- /docs/publish.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Build and publish all docs into Pulish all notebooks to mxnet. 4 | set -x 5 | set -e 6 | 7 | NOTEBOOK_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.." 8 | cd ${NOTEBOOK_DIR} 9 | 10 | # install a gpu version 11 | # sed -i.bak s/mxnet/mxnet-cu90/g environment.yml 12 | 13 | # prepare the env 14 | conda env update -f environment.yml 15 | source activate gluon_docs 16 | 17 | make html 18 | 19 | rm -rf ~/www/latest 20 | mv _build/html ~/www/latest 21 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: gluon 2 | dependencies: 3 | - python 4 | - libgfortran 5 | - jupyter 6 | - matplotlib 7 | - pandas 8 | - pip: 9 | - requests 10 | - mxnet>=0.11.1b20171003 11 | -------------------------------------------------------------------------------- /img/Assault-clipped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/Assault-clipped.png -------------------------------------------------------------------------------- /img/Assault.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/Assault.png -------------------------------------------------------------------------------- /img/Assualt_DDQN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/Assualt_DDQN.png -------------------------------------------------------------------------------- /img/Assualt_DDQN_Clipped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/Assualt_DDQN_Clipped.png -------------------------------------------------------------------------------- /img/Pixel2pixel-Unet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/Pixel2pixel-Unet.png -------------------------------------------------------------------------------- /img/bbb_nn_bayes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/bbb_nn_bayes.png -------------------------------------------------------------------------------- /img/bbb_nn_classic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/bbb_nn_classic.png -------------------------------------------------------------------------------- /img/berliner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/berliner.jpg -------------------------------------------------------------------------------- /img/cat-cartoon1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/cat-cartoon1.png -------------------------------------------------------------------------------- /img/cat-cartoon2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/cat-cartoon2.png -------------------------------------------------------------------------------- /img/cat1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/cat1.jpg -------------------------------------------------------------------------------- /img/cat2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/cat2.jpg -------------------------------------------------------------------------------- /img/catdog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/catdog.jpg -------------------------------------------------------------------------------- /img/cgan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/cgan.png -------------------------------------------------------------------------------- /img/comic-hot-dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/comic-hot-dog.png -------------------------------------------------------------------------------- /img/data-collection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/data-collection.png -------------------------------------------------------------------------------- /img/dcgan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dcgan.png -------------------------------------------------------------------------------- /img/death_cap.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/death_cap.jpg -------------------------------------------------------------------------------- /img/deeplearning_amazon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/deeplearning_amazon.png -------------------------------------------------------------------------------- /img/dist_kv.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Produced by OmniGraffle 6.6.1 2017-07-24 05:08:25 +0000Canvas 2Layer 1Worker 0Worker 1Server 0Server 1Schedulerfrom mxnet import kvstore = kv.create('dist')store.init('w0', …)store.push('w0', …)store.pull('w0', …) from mxnet import kvstore = kv.create('dist')store.init('w0', …)store.push('w0', …)store.pull('w0', …) myprog.pymyprog.pymachine Amachine B 4 | -------------------------------------------------------------------------------- /img/dog-cartoon1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dog-cartoon1.png -------------------------------------------------------------------------------- /img/dog-cartoon2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dog-cartoon2.jpg -------------------------------------------------------------------------------- /img/dog1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dog1.jpg -------------------------------------------------------------------------------- /img/dog2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dog2.jpg -------------------------------------------------------------------------------- /img/dog_hotdog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dog_hotdog.jpg -------------------------------------------------------------------------------- /img/dogdogcat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dogdogcat.png -------------------------------------------------------------------------------- /img/doughnut.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/doughnut.jpg -------------------------------------------------------------------------------- /img/dropout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dropout.png -------------------------------------------------------------------------------- /img/fake_bedrooms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/fake_bedrooms.png -------------------------------------------------------------------------------- /img/filters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/filters.png -------------------------------------------------------------------------------- /img/fine-tune.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/fine-tune.png -------------------------------------------------------------------------------- /img/gd-move.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/gd-move.png -------------------------------------------------------------------------------- /img/growth-2-20-girls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/growth-2-20-girls.png -------------------------------------------------------------------------------- /img/gtx-580-gpu.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/gtx-580-gpu.jpeg -------------------------------------------------------------------------------- /img/house_pricing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/house_pricing.png -------------------------------------------------------------------------------- /img/imagenet.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/imagenet.jpeg -------------------------------------------------------------------------------- /img/kaggle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/kaggle.png -------------------------------------------------------------------------------- /img/kaggle_submit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/kaggle_submit.png -------------------------------------------------------------------------------- /img/kaggle_submit2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/kaggle_submit2.png -------------------------------------------------------------------------------- /img/leg_hotdog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/leg_hotdog.jpg -------------------------------------------------------------------------------- /img/legendre.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/legendre.jpeg -------------------------------------------------------------------------------- /img/linear-regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/linear-regression.png -------------------------------------------------------------------------------- /img/ml-loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/ml-loop.png -------------------------------------------------------------------------------- /img/momentum-move.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/momentum-move.png -------------------------------------------------------------------------------- /img/multi-gpu.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Produced by OmniGraffle 6.6.1 2017-07-22 21:18:22 +0000Canvas 1Layer 1GPU 0GPU 1GPU 2GPU 3PCIe Switch CPU 4 | -------------------------------------------------------------------------------- /img/multi-machines.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Produced by OmniGraffle 6.6.1 2017-07-23 10:21:45 +0000Canvas 1Layer 1GPU 0GPU 1GPU 2GPU 3PCIe Switch CPUGPU 0GPU 1GPU 2GPU 3PCIe Switch CPUGPU 0GPU 1GPU 2GPU 3PCIe Switch CPUGPU 0GPU 1GPU 2GPU 3PCIe Switch CPUNetwork Switch 4 | -------------------------------------------------------------------------------- /img/multilayer-perceptron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/multilayer-perceptron.png -------------------------------------------------------------------------------- /img/mxnet_google.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/mxnet_google.png -------------------------------------------------------------------------------- /img/onelayer.graffle/data.plist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/onelayer.graffle/data.plist -------------------------------------------------------------------------------- /img/onelayer.graffle/image4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/onelayer.graffle/image4.pdf -------------------------------------------------------------------------------- /img/onelayer.graffle/image5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/onelayer.graffle/image5.pdf -------------------------------------------------------------------------------- /img/onelayer.graffle/image6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/onelayer.graffle/image6.pdf -------------------------------------------------------------------------------- /img/onelayer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/onelayer.png -------------------------------------------------------------------------------- /img/operator-context.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/operator-context.png -------------------------------------------------------------------------------- /img/overfitting-low-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/overfitting-low-data.png -------------------------------------------------------------------------------- /img/pikachu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/pikachu.jpg -------------------------------------------------------------------------------- /img/pizza.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/pizza.png -------------------------------------------------------------------------------- /img/real_hotdog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/real_hotdog.jpg -------------------------------------------------------------------------------- /img/recommended-prime-tv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/recommended-prime-tv.png -------------------------------------------------------------------------------- /img/recurrent-batching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/recurrent-batching.png -------------------------------------------------------------------------------- /img/recurrent-lm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/recurrent-lm.png -------------------------------------------------------------------------------- /img/recurrent-motivation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/recurrent-motivation.png -------------------------------------------------------------------------------- /img/regularization-overfitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/regularization-overfitting.png -------------------------------------------------------------------------------- /img/regularization.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/regularization.graffle -------------------------------------------------------------------------------- /img/regularization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/regularization.png -------------------------------------------------------------------------------- /img/rl-environment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/rl-environment.png -------------------------------------------------------------------------------- /img/road-cliff.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/road-cliff.jpg -------------------------------------------------------------------------------- /img/simple-gan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/simple-gan.png -------------------------------------------------------------------------------- /img/simple-net-linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/simple-net-linear.png -------------------------------------------------------------------------------- /img/simple-rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/simple-rnn.png -------------------------------------------------------------------------------- /img/simple-softmax-net.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/simple-softmax-net.png -------------------------------------------------------------------------------- /img/sodapopcoke.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/sodapopcoke.png -------------------------------------------------------------------------------- /img/speech.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/speech.jpg -------------------------------------------------------------------------------- /img/ssd.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Produced by OmniGraffle 6.6.1 2017-09-12 18:59:01 +0000Canvas 1Layer 1class predictorbox predictorbodyscale 0scale 1inputdownsampleclass predictorbox predictor 4 | -------------------------------------------------------------------------------- /img/supervised-learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/supervised-learning.png -------------------------------------------------------------------------------- /img/taxonomy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/taxonomy.jpg -------------------------------------------------------------------------------- /img/tensor_cartoon.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/tensor_cartoon.jpg -------------------------------------------------------------------------------- /img/tensor_contraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/tensor_contraction.png -------------------------------------------------------------------------------- /img/tensor_fibers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/tensor_fibers.png -------------------------------------------------------------------------------- /img/training_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/training_model.png -------------------------------------------------------------------------------- /img/wake-word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/wake-word.png -------------------------------------------------------------------------------- /img/whitecat160.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitecat160.jpg -------------------------------------------------------------------------------- /img/whitecat20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitecat20.jpg -------------------------------------------------------------------------------- /img/whitecat320.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitecat320.jpg -------------------------------------------------------------------------------- /img/whitecat40.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitecat40.jpg -------------------------------------------------------------------------------- /img/whitecat80.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitecat80.jpg -------------------------------------------------------------------------------- /img/whitedog160.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitedog160.jpg -------------------------------------------------------------------------------- /img/whitedog20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitedog20.jpg -------------------------------------------------------------------------------- /img/whitedog320.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitedog320.jpg -------------------------------------------------------------------------------- /img/whitedog40.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitedog40.jpg -------------------------------------------------------------------------------- /img/whitedog80.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitedog80.jpg -------------------------------------------------------------------------------- /media/polly.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/media/polly.mp3 --------------------------------------------------------------------------------