├── .gitattributes
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── build
    ├── 404.md
    ├── Makefile
    ├── _static
    │   ├── gluon.css
    │   ├── gluon.png
    │   ├── gluon_s2.png
    │   ├── gluon_white.png
    │   └── google_analytics.js
    ├── build.sh
    ├── build.yml
    ├── conf.py
    ├── index.rst
    └── md2ipynb.py
├── chapter01_crashcourse
    ├── autograd.ipynb
    ├── chapter-one-problem-set.ipynb
    ├── introduction.ipynb
    ├── linear-algebra.ipynb
    ├── ndarray.ipynb
    ├── preface.ipynb
    └── probability.ipynb
├── chapter02_supervised-learning
    ├── environment.ipynb
    ├── linear-regression-gluon.ipynb
    ├── linear-regression-scratch.ipynb
    ├── logistic-regression-gluon.ipynb
    ├── perceptron.ipynb
    ├── regularization-gluon.ipynb
    ├── regularization-scratch.ipynb
    ├── softmax-regression-gluon.ipynb
    └── softmax-regression-scratch.ipynb
├── chapter03_deep-neural-networks
    ├── custom-layer.ipynb
    ├── kaggle-gluon-kfold.ipynb
    ├── mlp-dropout-gluon.ipynb
    ├── mlp-dropout-scratch.ipynb
    ├── mlp-gluon.ipynb
    ├── mlp-scratch.ipynb
    ├── plumbing.ipynb
    └── serialization.ipynb
├── chapter04_convolutional-neural-networks
    ├── cnn-batch-norm-gluon.ipynb
    ├── cnn-batch-norm-scratch.ipynb
    ├── cnn-gluon.ipynb
    ├── cnn-scratch.ipynb
    ├── deep-cnns-alexnet.ipynb
    └── very-deep-nets-vgg.ipynb
├── chapter05_recurrent-neural-networks
    ├── gru-scratch.ipynb
    ├── lstm-scratch.ipynb
    ├── rnns-gluon.ipynb
    └── simple-rnn.ipynb
├── chapter06_optimization
    ├── adadelta-gluon.ipynb
    ├── adadelta-scratch.ipynb
    ├── adagrad-gluon.ipynb
    ├── adagrad-scratch.ipynb
    ├── adam-gluon.ipynb
    ├── adam-scratch.ipynb
    ├── gd-sgd-gluon.ipynb
    ├── gd-sgd-scratch.ipynb
    ├── momentum-gluon.ipynb
    ├── momentum-scratch.ipynb
    ├── optimization-intro.ipynb
    ├── rmsprop-gluon.ipynb
    └── rmsprop-scratch.ipynb
├── chapter07_distributed-learning
    ├── hybridize.ipynb
    ├── multiple-gpus-gluon.ipynb
    ├── multiple-gpus-scratch.ipynb
    └── training-with-multiple-machines.ipynb
├── chapter08_computer-vision
    ├── fine-tuning.ipynb
    ├── object-detection.ipynb
    └── visual-question-answer.ipynb
├── chapter09_natural-language-processing
    └── tree-lstm.ipynb
├── chapter11_recommender-systems
    ├── intro-recommender-systems.ipynb
    └── introduction-to-recommender-systems.ipynb
├── chapter12_time-series
    ├── intro-forecasting-2-gluon.ipynb
    ├── intro-forecasting-gluon.ipynb
    ├── issm-scratch.ipynb
    └── lds-scratch.ipynb
├── chapter13_unsupervised-learning
    ├── .gitignore
    └── vae-gluon.ipynb
├── chapter14_generative-adversarial-networks
    ├── conditional.ipynb
    ├── dcgan.ipynb
    ├── gan-intro.ipynb
    └── pixel2pixel.ipynb
├── chapter16_tensor_methods
    └── tensor_basics.ipynb
├── chapter17_deep-reinforcement-learning
    ├── DDQN.ipynb
    └── DQN.ipynb
├── chapter18_variational-methods-and-uncertainty
    ├── bayes-by-backprop-gluon.ipynb
    ├── bayes-by-backprop-rnn.ipynb
    └── bayes-by-backprop.ipynb
├── chapter19_graph-neural-networks
    └── Graph-Neural-Networks.ipynb
├── cheatsheets
    ├── kaggle-gluon-kfold.ipynb
    └── pytorch_gluon.md
├── data
    ├── adult
    │   ├── a1a.test
    │   └── a1a.train
    ├── kaggle
    │   ├── house_pred_test.csv
    │   └── house_pred_train.csv
    └── nlp
    │   ├── ptb.test.txt
    │   ├── ptb.train.txt
    │   ├── ptb.valid.txt
    │   ├── timemachine.txt
    │   └── tinyshakespeare.txt
├── docs
    ├── C01-install.md
    ├── C01-install.rst
    ├── C02-contribute.md
    ├── C02-contribute.rst
    └── publish.sh
├── environment.yml
├── img
    ├── Assault-clipped.png
    ├── Assault.png
    ├── Assualt_DDQN.png
    ├── Assualt_DDQN_Clipped.png
    ├── Pixel2pixel-Unet.png
    ├── bbb_nn_bayes.png
    ├── bbb_nn_classic.png
    ├── berliner.jpg
    ├── cat-cartoon1.png
    ├── cat-cartoon2.png
    ├── cat1.jpg
    ├── cat2.jpg
    ├── catdog.jpg
    ├── cgan.png
    ├── comic-hot-dog.png
    ├── data-collection.png
    ├── dcgan.png
    ├── death_cap.jpg
    ├── deeplearning_amazon.png
    ├── dist_kv.svg
    ├── dog-cartoon1.png
    ├── dog-cartoon2.jpg
    ├── dog1.jpg
    ├── dog2.jpg
    ├── dog_hotdog.jpg
    ├── dogdogcat.png
    ├── doughnut.jpg
    ├── dropout.png
    ├── fake_bedrooms.png
    ├── filters.png
    ├── fine-tune.png
    ├── gd-move.png
    ├── growth-2-20-girls.png
    ├── gtx-580-gpu.jpeg
    ├── house_pricing.png
    ├── imagenet.jpeg
    ├── kaggle.png
    ├── kaggle_submit.png
    ├── kaggle_submit2.png
    ├── leg_hotdog.jpg
    ├── legendre.jpeg
    ├── linear-regression.png
    ├── ml-loop.png
    ├── momentum-move.png
    ├── multi-gpu.svg
    ├── multi-machines.svg
    ├── multilayer-perceptron.png
    ├── mxnet_google.png
    ├── onelayer.graffle
    │   ├── data.plist
    │   ├── image4.pdf
    │   ├── image5.pdf
    │   └── image6.pdf
    ├── onelayer.png
    ├── operator-context.png
    ├── overfitting-low-data.png
    ├── pikachu.jpg
    ├── pizza.png
    ├── real_hotdog.jpg
    ├── recommended-prime-tv.png
    ├── recurrent-batching.png
    ├── recurrent-lm.png
    ├── recurrent-motivation.png
    ├── regularization-overfitting.png
    ├── regularization.graffle
    ├── regularization.png
    ├── rl-environment.png
    ├── road-cliff.jpg
    ├── simple-gan.png
    ├── simple-net-linear.png
    ├── simple-rnn.png
    ├── simple-softmax-net.png
    ├── sodapopcoke.png
    ├── speech.jpg
    ├── ssd.svg
    ├── supervised-learning.png
    ├── taxonomy.jpg
    ├── tensor_cartoon.jpg
    ├── tensor_contraction.png
    ├── tensor_fibers.png
    ├── training_model.png
    ├── wake-word.png
    ├── whitecat160.jpg
    ├── whitecat20.jpg
    ├── whitecat320.jpg
    ├── whitecat40.jpg
    ├── whitecat80.jpg
    ├── whitedog160.jpg
    ├── whitedog20.jpg
    ├── whitedog320.jpg
    ├── whitedog40.jpg
    └── whitedog80.jpg
├── media
    └── polly.mp3
└── proto-P02-C02.6-loss.ipynb


/.gitattributes:
--------------------------------------------------------------------------------
1 | slides/ICML[[:space:]]2017.key filter=lfs diff=lfs merge=lfs -text
2 | slides/ICML[[:space:]]2017.pdf filter=lfs diff=lfs merge=lfs -text
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # some data files
  2 | data/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | env/
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # dotenv
 86 | .env
 87 | 
 88 | # virtualenv
 89 | .venv
 90 | venv/
 91 | ENV/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | _build
106 | **/.DS_Store
107 | 
108 | # mxnet
109 | *.rec
110 | *.params
111 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: html
 2 | 
 3 | build/%.ipynb: %.md
 4 | 	@mkdir -p $(@D)
 5 | 	cd $(@D); python ../md2ipynb.py ../../$< ../../$@
 6 | 
 7 | build/%.ipynb: %.ipynb
 8 | 	@mkdir -p $(@D)
 9 | 	@cp $< $@
10 | 
11 | build/%.md: %.md
12 | 	@mkdir -p $(@D)
13 | 	@cp $< $@
14 | 
15 | 
16 | # markdown files that don't want to be converted
17 | PURE_MK = $(wildcard chapter00_preface/*.md */index.md)
18 | # markdown files that will be converted to .ipynb
19 | MK_NOTEBOOKS = $(filter-out $(MARKDOWN), $(wildcard chapter*/*.md))
20 | # jupyter notebooks
21 | IPYNBS = $(wildcard chapter*/*.ipynb)
22 | 
23 | 
24 | OBJ = $(patsubst %.md, build/%.md, $(PURE_MK)) \
25 | 	$(patsubst %.md, build/%.ipynb, $(MK_NOTEBOOKS)) \
26 | 	$(patsubst %.ipynb, build/%.ipynb, $(IPYNBS))
27 | 
28 | ORIGN_DEPS = $(wildcard img/* data/* media/*) environment.yml README.md
29 | DEPS = $(patsubst %, build/%, $(ORIGN_DEPS))
30 | 
31 | PKG = build/_build/html/gluon_tutorials.tar.gz build/_build/html/gluon_tutorials.zip
32 | 
33 | pkg: $(PKG)
34 | 
35 | build/_build/html/gluon_tutorials.zip: $(OBJ) $(DEPS)
36 | 	cd build; zip -r $(patsubst build/%, %, $@ $(DEPS)) chapter*
37 | 
38 | build/_build/html/gluon_tutorials.tar.gz: $(OBJ) $(DEPS)
39 | 	cd build; tar -zcvf $(patsubst build/%, %, $@ $(DEPS)) chapter*
40 | 
41 | build/%: %
42 | 	@mkdir -p $(@D)
43 | 	@cp -r $< $@
44 | 
45 | html: $(DEPS) $(OBJ)
46 | 	make -C build html
47 | 
48 | SVG=$(wildcard img/*.svg)
49 | 
50 | build/_build/latex/%.png: img/%.svg
51 | 	convert $< $@
52 | 
53 | pdf: $(DEPS) $(OBJ) $(patsubst img/%.svg, build/_build/latex/%.png, $(SVG))
54 | 	make -C build latex
55 | 	sed -i s/\.svg/\.png/ build/_build/latex/gluon_tutorials.tex
56 | 	cd build/_build/latex; make
57 | 
58 | clean:
59 | 	rm -rf build/chapter* $(DEPS) $(PKG)
60 | 


--------------------------------------------------------------------------------
/build/404.md:
--------------------------------------------------------------------------------
1 | # Page not found
2 | 


--------------------------------------------------------------------------------
/build/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | 	$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  epub3      to make an epub3"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 	@echo "  dummy      to check syntax errors of document sources"
 51 | 
 52 | .PHONY: clean
 53 | clean:
 54 | 	rm -rf $(BUILDDIR)/*
 55 | 
 56 | .PHONY: html
 57 | html:
 58 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 61 | 
 62 | .PHONY: dirhtml
 63 | dirhtml:
 64 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 65 | 	@echo
 66 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 67 | 
 68 | .PHONY: singlehtml
 69 | singlehtml:
 70 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 71 | 	@echo
 72 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 73 | 
 74 | .PHONY: pickle
 75 | pickle:
 76 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 77 | 	@echo
 78 | 	@echo "Build finished; now you can process the pickle files."
 79 | 
 80 | .PHONY: json
 81 | json:
 82 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 83 | 	@echo
 84 | 	@echo "Build finished; now you can process the JSON files."
 85 | 
 86 | .PHONY: htmlhelp
 87 | htmlhelp:
 88 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 89 | 	@echo
 90 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 91 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 92 | 
 93 | .PHONY: qthelp
 94 | qthelp:
 95 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 96 | 	@echo
 97 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 98 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 99 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/TheStraightDope.qhcp"
100 | 	@echo "To view the help file:"
101 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/TheStraightDope.qhc"
102 | 
103 | .PHONY: applehelp
104 | applehelp:
105 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
106 | 	@echo
107 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
108 | 	@echo "N.B. You won't be able to view it unless you put it in" \
109 | 	      "~/Library/Documentation/Help or install it in your application" \
110 | 	      "bundle."
111 | 
112 | .PHONY: devhelp
113 | devhelp:
114 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
115 | 	@echo
116 | 	@echo "Build finished."
117 | 	@echo "To view the help file:"
118 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/TheStraightDope"
119 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/TheStraightDope"
120 | 	@echo "# devhelp"
121 | 
122 | .PHONY: epub
123 | epub:
124 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
125 | 	@echo
126 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
127 | 
128 | .PHONY: epub3
129 | epub3:
130 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
131 | 	@echo
132 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
133 | 
134 | .PHONY: latex
135 | latex:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo
138 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
139 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
140 | 	      "(use \`make latexpdf' here to do that automatically)."
141 | 
142 | .PHONY: latexpdf
143 | latexpdf:
144 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
145 | 	@echo "Running LaTeX files through pdflatex..."
146 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
147 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
148 | 
149 | .PHONY: latexpdfja
150 | latexpdfja:
151 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
152 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
153 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
154 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
155 | 
156 | .PHONY: text
157 | text:
158 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
159 | 	@echo
160 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
161 | 
162 | .PHONY: man
163 | man:
164 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
165 | 	@echo
166 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
167 | 
168 | .PHONY: texinfo
169 | texinfo:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo
172 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
173 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
174 | 	      "(use \`make info' here to do that automatically)."
175 | 
176 | .PHONY: info
177 | info:
178 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
179 | 	@echo "Running Texinfo files through makeinfo..."
180 | 	make -C $(BUILDDIR)/texinfo info
181 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
182 | 
183 | .PHONY: gettext
184 | gettext:
185 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
186 | 	@echo
187 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
188 | 
189 | .PHONY: changes
190 | changes:
191 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
192 | 	@echo
193 | 	@echo "The overview file is in $(BUILDDIR)/changes."
194 | 
195 | .PHONY: linkcheck
196 | linkcheck:
197 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
198 | 	@echo
199 | 	@echo "Link check complete; look for any errors in the above output " \
200 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
201 | 
202 | .PHONY: doctest
203 | doctest:
204 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
205 | 	@echo "Testing of doctests in the sources finished, look at the " \
206 | 	      "results in $(BUILDDIR)/doctest/output.txt."
207 | 
208 | .PHONY: coverage
209 | coverage:
210 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
211 | 	@echo "Testing of coverage in the sources finished, look at the " \
212 | 	      "results in $(BUILDDIR)/coverage/python.txt."
213 | 
214 | .PHONY: xml
215 | xml:
216 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
217 | 	@echo
218 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
219 | 
220 | .PHONY: pseudoxml
221 | pseudoxml:
222 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
223 | 	@echo
224 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
225 | 
226 | .PHONY: dummy
227 | dummy:
228 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
229 | 	@echo
230 | 	@echo "Build finished. Dummy builder generates no files."
231 | 


--------------------------------------------------------------------------------
/build/_static/gluon.css:
--------------------------------------------------------------------------------
 1 | code, .rst-content tt, .rst-content code {
 2 |   font-size: 85%;
 3 | }
 4 | 
 5 | .rst-content img {
 6 |     display: block;
 7 |     margin-left: auto;
 8 |     margin-right: auto;
 9 | }
10 | 


--------------------------------------------------------------------------------
/build/_static/gluon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/build/_static/gluon.png


--------------------------------------------------------------------------------
/build/_static/gluon_s2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/build/_static/gluon_s2.png


--------------------------------------------------------------------------------
/build/_static/gluon_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/build/_static/gluon_white.png


--------------------------------------------------------------------------------
/build/_static/google_analytics.js:
--------------------------------------------------------------------------------
1 |   (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
2 |   (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
3 |   m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
4 |   })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
5 | 
6 |   ga('create', 'UA-96378503-3', 'auto');
7 |   ga('send', 'pageview');
8 | 


--------------------------------------------------------------------------------
/build/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Build and publish all docs into Pulish all notebooks to mxnet.
 4 | set -x
 5 | set -e
 6 | 
 7 | # prepare the env
 8 | conda env update -f build/build.yml -n build_sd_tutorials
 9 | conda activate build_sd_tutorials
10 | conda list --export
11 | 
12 | make html
13 | 
14 | # rm -rf build/data
15 | # make pkg
16 | 
17 | # make pdf
18 | # cp build/_build/latex/gluon_tutorials.pdf build/_build/html/
19 | 
20 | aws s3 sync --delete build/_build/html/ s3://gluon.mxnet.io/ --acl public-read
21 | 


--------------------------------------------------------------------------------
/build/build.yml:
--------------------------------------------------------------------------------
 1 | name: build_gluon_tutorials
 2 | dependencies:
 3 | - python
 4 | - libgfortran
 5 | - jupyter
 6 | - sphinx
 7 | - sphinx_rtd_theme
 8 | - matplotlib
 9 | - pandas
10 | - notebook=5.0.0
11 | - pip:
12 |   - pyopenssl>= 17.3.0
13 |   - nbsphinx
14 |   - recommonmark
15 |   - https://github.com/mli/notedown/tarball/master
16 |   - mxnet-cu90>=0.11.1b20171003
17 | 


--------------------------------------------------------------------------------
/build/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # The Straight Dope documentation build configuration file, created by
  5 | # sphinx-quickstart on Tue Jul 18 10:40:45 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | from recommonmark.parser import CommonMarkParser
 19 | from recommonmark.transform import AutoStructify
 20 | 
 21 | 
 22 | # If extensions (or modules to document with autodoc) are in another directory,
 23 | # add these directories to sys.path here. If the directory is relative to the
 24 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 25 | #sys.path.insert(0, os.path.abspath('.'))
 26 | 
 27 | # -- General configuration ------------------------------------------------
 28 | 
 29 | # If your documentation needs a minimal Sphinx version, state it here.
 30 | #needs_sphinx = '1.0'
 31 | 
 32 | # Add any Sphinx extension module names here, as strings. They can be
 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 34 | # ones.
 35 | extensions = [
 36 |     'sphinx.ext.autodoc',
 37 |     'sphinx.ext.doctest',
 38 |     'sphinx.ext.intersphinx',
 39 |     'sphinx.ext.todo',
 40 |     'sphinx.ext.mathjax',
 41 |     'sphinx.ext.ifconfig',
 42 |     'sphinx.ext.viewcode',
 43 |     'nbsphinx',
 44 |     'IPython.sphinxext.ipython_console_highlighting',
 45 | ]
 46 | 
 47 | # Add any paths that contain templates here, relative to this directory.
 48 | templates_path = ['_templates']
 49 | 
 50 | source_parsers = {'.md': CommonMarkParser}
 51 | 
 52 | # The suffix(es) of source filenames.
 53 | # You can specify multiple suffix as a list of string:
 54 | # source_suffix = ['.rst', '.md']
 55 | source_suffix = ['.rst', '.ipynb', '.md']
 56 | 
 57 | # The encoding of source files.
 58 | #source_encoding = 'utf-8-sig'
 59 | 
 60 | # The master toctree document.
 61 | master_doc = 'index'
 62 | 
 63 | # General information about the project.
 64 | project = 'The Straight Dope'
 65 | copyright = '2017, Contributors'
 66 | author = "MXNet Community"
 67 | 
 68 | 
 69 | # The version info for the project you're documenting, acts as replacement for
 70 | # |version| and |release|, also used in various other places throughout the
 71 | # built documents.
 72 | #
 73 | # The short X.Y version.
 74 | version = '0.1'
 75 | # The full version, including alpha/beta/rc tags.
 76 | release = '0.1'
 77 | 
 78 | # The language for content autogenerated by Sphinx. Refer to documentation
 79 | # for a list of supported languages.
 80 | #
 81 | # This is also used if you do content translation via gettext catalogs.
 82 | # Usually you set "language" from the command line for these cases.
 83 | language = None
 84 | 
 85 | # There are two options for replacing |today|: either, you set today to some
 86 | # non-false value, then it is used:
 87 | #today = ''
 88 | # Else, today_fmt is used as the format for a strftime call.
 89 | #today_fmt = '%B %d, %Y'
 90 | 
 91 | # List of patterns, relative to source directory, that match files and
 92 | # directories to ignore when looking for source files.
 93 | # This patterns also effect to html_static_path and html_extra_path
 94 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']
 95 | 
 96 | # The reST default role (used for this markup: `text`) to use for all
 97 | # documents.
 98 | #default_role = None
 99 | 
100 | # If true, '()' will be appended to :func: etc. cross-reference text.
101 | #add_function_parentheses = True
102 | 
103 | # If true, the current module name will be prepended to all description
104 | # unit titles (such as .. function::).
105 | #add_module_names = True
106 | 
107 | # If true, sectionauthor and moduleauthor directives will be shown in the
108 | # output. They are ignored by default.
109 | #show_authors = False
110 | 
111 | # The name of the Pygments (syntax highlighting) style to use.
112 | pygments_style = 'sphinx'
113 | 
114 | # A list of ignored prefixes for module index sorting.
115 | #modindex_common_prefix = []
116 | 
117 | # If true, keep warnings as "system message" paragraphs in the built documents.
118 | #keep_warnings = False
119 | 
120 | # If true, `todo` and `todoList` produce output, else they produce nothing.
121 | todo_include_todos = True
122 | 
123 | 
124 | # -- Options for HTML output ----------------------------------------------
125 | 
126 | # The theme to use for HTML and HTML Help pages.  See the documentation for
127 | # a list of builtin themes.
128 | html_theme = 'sphinx_rtd_theme'
129 | 
130 | # Theme options are theme-specific and customize the look and feel of a theme
131 | # further.  For a list of options available for each theme, see the
132 | # documentation.
133 | #html_theme_options = {}
134 | 
135 | # Add any paths that contain custom themes here, relative to this directory.
136 | #html_theme_path = []
137 | 
138 | # The name for this set of Sphinx documents.
139 | # "<project> v<release> documentation" by default.
140 | #html_title = 'The Straight Dope v0.1'
141 | 
142 | # A shorter title for the navigation bar.  Default is the same as html_title.
143 | #html_short_title = None
144 | 
145 | # The name of an image file (relative to this directory) to place at the top
146 | # of the sidebar.
147 | html_logo = '_static/gluon_white.png'
148 | 
149 | # The name of an image file (relative to this directory) to use as a favicon of
150 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
151 | # pixels large.
152 | html_favicon = '_static/gluon_s2.png'
153 | 
154 | # Add any paths that contain custom static files (such as style sheets) here,
155 | # relative to this directory. They are copied after the builtin static files,
156 | # so a file named "default.css" will overwrite the builtin "default.css".
157 | html_static_path = ['_static']
158 | 
159 | # Add any extra paths that contain custom files (such as robots.txt or
160 | # .htaccess) here, relative to this directory. These files are copied
161 | # directly to the root of the documentation.
162 | #html_extra_path = []
163 | 
164 | # If not None, a 'Last updated on:' timestamp is inserted at every page
165 | # bottom, using the given strftime format.
166 | # The empty string is equivalent to '%b %d, %Y'.
167 | #html_last_updated_fmt = None
168 | 
169 | # If true, SmartyPants will be used to convert quotes and dashes to
170 | # typographically correct entities.
171 | #html_use_smartypants = True
172 | 
173 | # Custom sidebar templates, maps document names to template names.
174 | #html_sidebars = {}
175 | 
176 | # Additional templates that should be rendered to pages, maps page names to
177 | # template names.
178 | #html_additional_pages = {}
179 | 
180 | # If false, no module index is generated.
181 | #html_domain_indices = True
182 | 
183 | # If false, no index is generated.
184 | #html_use_index = True
185 | 
186 | # If true, the index is split into individual pages for each letter.
187 | #html_split_index = False
188 | 
189 | # If true, links to the reST sources are added to the pages.
190 | #html_show_sourcelink = True
191 | 
192 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
193 | #html_show_sphinx = True
194 | 
195 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
196 | #html_show_copyright = True
197 | 
198 | # If true, an OpenSearch description file will be output, and all pages will
199 | # contain a <link> tag referring to it.  The value of this option must be the
200 | # base URL from which the finished HTML is served.
201 | #html_use_opensearch = ''
202 | 
203 | # This is the file name suffix for HTML files (e.g. ".xhtml").
204 | #html_file_suffix = None
205 | 
206 | # Language to be used for generating the HTML full-text search index.
207 | # Sphinx supports the following languages:
208 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
209 | #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
210 | # html_search_language = 'zh'
211 | 
212 | # A dictionary with options for the search language support, empty by default.
213 | # 'ja' uses this config value.
214 | # 'zh' user can custom change `jieba` dictionary path.
215 | 
216 | # The name of a javascript file (relative to the configuration directory) that
217 | # implements a search results scorer. If empty, the default will be used.
218 | #html_search_scorer = 'scorer.js'
219 | 
220 | # Output file base name for HTML help builder.
221 | htmlhelp_basename = 'TheStraightDopedoc'
222 | 
223 | # -- Options for LaTeX output ---------------------------------------------
224 | 
225 | latex_elements = {
226 |     # 'papersize' : 'a4paper',
227 |     'utf8extra' : '',
228 |     'inputenc'  : '',
229 |     'babel'     : r'''\usepackage[english]{babel}''',
230 |     'preamble' : r'''
231 | ''',
232 | # The paper size ('letterpaper' or 'a4paper').
233 | #'papersize': 'letterpaper',
234 | 
235 | # The font size ('10pt', '11pt' or '12pt').
236 | 'pointsize': '11pt',
237 | 
238 | # Additional stuff for the LaTeX preamble.
239 | #'preamble': '',
240 | 
241 | # Latex figure (float) alignment
242 | #'figure_align': 'htbp',
243 | }
244 | 
245 | # Grouping the document tree into LaTeX files. List of tuples
246 | # (source start file, target name, title,
247 | #  author, documentclass [howto, manual, or own class]).
248 | latex_documents = [
249 |     (master_doc, 'gluon_tutorials.tex', 'Deep Learning - The Straight Dope',
250 |      author, 'manual'),
251 | ]
252 | 
253 | # The name of an image file (relative to this directory) to place at the top of
254 | # the title page.
255 | latex_logo = '_static/gluon.png'
256 | 
257 | # latex_engine  = 'xelatex'
258 | # For "manual" documents, if this is true, then toplevel headings are parts,
259 | # not chapters.
260 | #latex_use_parts = False
261 | 
262 | # If true, show page references after internal links.
263 | #latex_show_pagerefs = False
264 | 
265 | # If true, show URL addresses after external links.
266 | #latex_show_urls = False
267 | 
268 | # Documents to append as an appendix to all manuals.
269 | #latex_appendices = []
270 | 
271 | # If false, no module index is generated.
272 | latex_domain_indices = False
273 | 
274 | 
275 | # -- Options for manual page output ---------------------------------------
276 | 
277 | # One entry per manual page. List of tuples
278 | # (source start file, name, description, authors, manual section).
279 | man_pages = [
280 |     (master_doc, 'thestraightdope', 'The Straight Dope Documentation',
281 |      [author], 1)
282 | ]
283 | 
284 | # If true, show URL addresses after external links.
285 | #man_show_urls = False
286 | 
287 | 
288 | # -- Options for Texinfo output -------------------------------------------
289 | 
290 | # Grouping the document tree into Texinfo files. List of tuples
291 | # (source start file, target name, title, author,
292 | #  dir menu entry, description, category)
293 | texinfo_documents = [
294 |     (master_doc, 'TheStraightDope', 'The Straight Dope Documentation',
295 |      author, 'TheStraightDope', 'One line description of project.',
296 |      'Miscellaneous'),
297 | ]
298 | 
299 | # Documents to append as an appendix to all manuals.
300 | #texinfo_appendices = []
301 | 
302 | # If false, no module index is generated.
303 | #texinfo_domain_indices = True
304 | 
305 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
306 | #texinfo_show_urls = 'footnote'
307 | 
308 | # If true, do not generate a @detailmenu in the "Top" node's menu.
309 | #texinfo_no_detailmenu = False
310 | 
311 | 
312 | # Example configuration for intersphinx: refer to the Python standard library.
313 | # intersphinx_mapping = {'https://docs.python.org/': None}
314 | 
315 | intersphinx_mapping = {
316 |     # 'python': 'https://docs.python.org/3.5',
317 |     # 'matplotlib': 'https://matplotlib.org',
318 |     # 'numpy': ('http://docs.scipy.org/doc/numpy/', None),
319 |     # 'mxnet': ('http://mxnet.io', None)
320 | }
321 | 
322 | # notebooks will be executed by sphnix_plugin
323 | nbsphinx_execute = 'never'
324 | 
325 | def setup(app):
326 |     app.add_transform(AutoStructify)
327 |     app.add_config_value('recommonmark_config', {
328 |     }, True)
329 |     app.add_javascript('google_analytics.js')
330 |     app.add_stylesheet('gluon.css')
331 | 


--------------------------------------------------------------------------------
/build/index.rst:
--------------------------------------------------------------------------------
  1 | Deep Learning - The Straight Dope
  2 | ==================================
  3 | 
  4 | This repo contains an incremental sequence of notebooks designed to teach deep learning, `Apache MXNet (incubating) <https://github.com/apache/incubator-mxnet>`_, and the gluon interface. Our goal is to leverage the strengths of Jupyter notebooks to present prose, graphics, equations, and code together in one place. If we're successful, the result will be a resource that could be simultaneously a book, course material, a prop for live tutorials, and a resource for plagiarising (with our blessing) useful code. To our knowledge there's no source out there that teaches either (1) the full breadth of concepts in modern deep learning or (2) interleaves an engaging textbook with runnable code. We'll find out by the end of this venture whether or not that void exists for a good reason.
  5 | 
  6 | Another unique aspect of this book is its authorship process. We are developing this resource fully in the public view and are making it available for free in its entirety. While the book has a few primary authors to set the tone and shape the content, we welcome contributions from the community and hope to coauthor chapters and entire sections with experts and community members. Already we've received contributions spanning typo corrections through full working examples. 
  7 | 
  8 | 
  9 | How to contribute
 10 | =================
 11 | 
 12 | To clone or contribute, visit `Deep Learning - The Straight Dope <http://github.com/zackchase/mxnet-the-straight-dope>`_ on Github.
 13 | 
 14 | Dependencies 
 15 | ============
 16 | 
 17 | To run these notebooks, a recent version of MXNet is required. The easiest way is to install the nightly build MXNet through ``pip``. E.g.::
 18 | 
 19 |     $ pip install mxnet --pre --user
 20 |     
 21 | More detailed instructions are available `here <https://mxnet.incubator.apache.org/install/index.html>`_
 22 | 
 23 | 
 24 | Part 1: Deep Learning Fundamentals
 25 | ==================================
 26 | 
 27 | .. toctree::
 28 |    :glob:
 29 |    :maxdepth: 1
 30 |    :caption: Crash course
 31 |    
 32 |    chapter01_crashcourse/preface
 33 |    chapter01_crashcourse/introduction
 34 |    chapter01_crashcourse/ndarray
 35 |    chapter01_crashcourse/linear-algebra
 36 |    chapter01_crashcourse/probability
 37 |    chapter01_crashcourse/autograd
 38 | 
 39 | 
 40 | .. toctree::
 41 |    :glob:
 42 |    :maxdepth: 1
 43 |    :caption: Introduction to supervised learning
 44 | 
 45 |    chapter02_supervised-learning/linear-regression-scratch
 46 |    chapter02_supervised-learning/linear-regression-gluon
 47 |    chapter02_supervised-learning/logistic-regression-gluon
 48 |    chapter02_supervised-learning/softmax-regression-scratch
 49 |    chapter02_supervised-learning/softmax-regression-gluon
 50 |    chapter02_supervised-learning/regularization-scratch
 51 |    chapter02_supervised-learning/regularization-gluon
 52 |    chapter02_supervised-learning/perceptron
 53 |    chapter02_supervised-learning/environment
 54 | 
 55 | .. toctree::
 56 |    :glob:
 57 |    :maxdepth: 1
 58 |    :caption: Deep neural networks
 59 | 
 60 |    chapter03_deep-neural-networks/mlp-scratch
 61 |    chapter03_deep-neural-networks/mlp-gluon
 62 |    chapter03_deep-neural-networks/mlp-dropout-scratch
 63 |    chapter03_deep-neural-networks/mlp-dropout-gluon
 64 |    chapter03_deep-neural-networks/plumbing
 65 |    chapter03_deep-neural-networks/custom-layer
 66 |    chapter03_deep-neural-networks/serialization
 67 | 
 68 | .. toctree::
 69 |    :glob:
 70 |    :maxdepth: 1
 71 |    :caption: Convolutional neural networks
 72 | 
 73 |    chapter04_convolutional-neural-networks/cnn-scratch
 74 |    chapter04_convolutional-neural-networks/cnn-gluon
 75 |    chapter04_convolutional-neural-networks/deep-cnns-alexnet
 76 |    chapter04_convolutional-neural-networks/very-deep-nets-vgg
 77 |    chapter04_convolutional-neural-networks/cnn-batch-norm-scratch
 78 |    chapter04_convolutional-neural-networks/cnn-batch-norm-gluon
 79 | 
 80 | .. toctree::
 81 |    :glob:
 82 |    :maxdepth: 1
 83 |    :caption: Recurrent neural networks
 84 | 
 85 |    chapter05_recurrent-neural-networks/simple-rnn
 86 |    chapter05_recurrent-neural-networks/lstm-scratch
 87 |    chapter05_recurrent-neural-networks/gru-scratch
 88 |    chapter05_recurrent-neural-networks/rnns-gluon
 89 | 
 90 | .. toctree::
 91 |    :glob:
 92 |    :maxdepth: 1
 93 |    :caption: Optimization
 94 | 
 95 |    chapter06_optimization/optimization-intro
 96 |    chapter06_optimization/gd-sgd-scratch
 97 |    chapter06_optimization/gd-sgd-gluon
 98 |    chapter06_optimization/momentum-scratch
 99 |    chapter06_optimization/momentum-gluon
100 |    chapter06_optimization/adagrad-scratch
101 |    chapter06_optimization/adagrad-gluon
102 |    chapter06_optimization/rmsprop-scratch
103 |    chapter06_optimization/rmsprop-gluon
104 |    chapter06_optimization/adadelta-scratch
105 |    chapter06_optimization/adadelta-gluon
106 |    chapter06_optimization/adam-scratch
107 |    chapter06_optimization/adam-gluon
108 | 
109 | .. toctree::
110 |    :glob:
111 |    :maxdepth: 1
112 |    :caption: High-performance and distributed training
113 | 
114 |    chapter07_distributed-learning/hybridize
115 |    chapter07_distributed-learning/multiple-gpus-scratch
116 |    chapter07_distributed-learning/multiple-gpus-gluon
117 |    chapter07_distributed-learning/training-with-multiple-machines
118 |    
119 | 
120 | 
121 | Part 2: Applications
122 | ====================
123 | 
124 | .. toctree::
125 |    :glob:
126 |    :maxdepth: 1
127 |    :caption: Computer vision
128 | 
129 |    chapter08_computer-vision/object-detection
130 |    chapter08_computer-vision/fine-tuning
131 |    chapter08_computer-vision/visual-question-answer 
132 | .. toctree::
133 |    :glob:
134 |    :maxdepth: 1
135 |    :caption: Natural language processing
136 | 
137 |    chapter09_natural-language-processing/tree-lstm
138 | 
139 | .. toctree::
140 |    :glob:
141 |    :maxdepth: 1
142 |    :caption: Recommender systems
143 | 
144 |    chapter11_recommender-systems/intro-recommender-systems   
145 | 
146 | .. toctree::
147 |    :glob:
148 |    :maxdepth: 1
149 |    :caption: Time series
150 | 
151 |    chapter12_time-series/lds-scratch   
152 |    chapter12_time-series/issm-scratch
153 | 
154 | Part 3: Advanced Topics
155 | =======================
156 | 
157 | .. toctree:: Unsupervised Learning
158 |    :glob:
159 |    :maxdepth: 1
160 |    :caption: High-performance and distributed training
161 | 
162 |    chapter13_unsupervised-learning/vae-gluon
163 | 
164 | .. toctree::
165 |    :glob:
166 |    :maxdepth: 1
167 |    :caption: Generative adversarial networks
168 | 
169 |    chapter14_generative-adversarial-networks/gan-intro
170 |    chapter14_generative-adversarial-networks/dcgan
171 |    chapter14_generative-adversarial-networks/pixel2pixel
172 |    
173 |    
174 | .. toctree::
175 |    :glob:
176 |    :maxdepth: 1
177 |    :caption: Variational methods
178 | 
179 |    chapter18_variational-methods-and-uncertainty/bayes-by-backprop.ipynb
180 |    chapter18_variational-methods-and-uncertainty/bayes-by-backprop-gluon.ipynb
181 | 
182 | 
183 | .. toctree::
184 |    :glob:
185 |    :maxdepth: 1
186 |    :caption: Cheat sheets
187 | 
188 |    cheatsheets/kaggle-gluon-kfold.ipynb
189 | 
190 | .. toctree::
191 |    :glob:
192 |    :maxdepth: 1
193 |    :caption: Developer documents
194 | 
195 |    docs/*
196 | 


--------------------------------------------------------------------------------
/build/md2ipynb.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import time
 4 | import notedown
 5 | import nbformat
 6 | 
 7 | assert len(sys.argv) == 3, 'usage: input.md output.ipynb'
 8 | 
 9 | # timeout for each notebook, in sec
10 | timeout = 20 * 60
11 | 
12 | # the files will be ingored for execution
13 | ignore_execution = []
14 | 
15 | input_fn = sys.argv[1]
16 | output_fn = sys.argv[2]
17 | 
18 | reader = notedown.MarkdownReader(match='strict')
19 | 
20 | do_eval = int(os.environ.get('EVAL', True))
21 | 
22 | # read
23 | with open(input_fn, 'r') as f:
24 |     notebook = reader.read(f)
25 | 
26 | if do_eval and not any([i in input_fn for i in ignore_execution]):
27 |     tic = time.time()
28 |     notedown.run(notebook, timeout)
29 |     print('=== Finished evaluation in %f sec'%(time.time()-tic))
30 | 
31 | # write
32 | # need to add language info to for syntax highlight
33 | notebook['metadata'].update({'language_info':{'name':'python'}})
34 | 
35 | with open(output_fn, 'w') as f:
36 |     f.write(nbformat.writes(notebook))
37 | 


--------------------------------------------------------------------------------
/chapter01_crashcourse/autograd.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Automatic differentiation with ``autograd`` \n",
  8 |     "\n",
  9 |     "\n",
 10 |     "In machine learning, we *train* models to get better and better as a function of experience. Usually, *getting better* means minimizing a *loss function*, i.e. a score that answers \"how *bad* is our model?\" With neural networks, we choose loss functions to be differentiable with respect to our parameters. Put simply, this means that for each of the model's parameters, we can determine how much *increasing* or *decreasing* it might affect the loss. While the calculations are straightforward, for complex models, working it out by hand can be a pain.\n",
 11 |     "\n",
 12 |     "_MXNet_'s autograd package expedites this work by automatically calculating derivatives. And while most other libraries require that we compile a symbolic graph to take automatic derivatives, ``mxnet.autograd``, like PyTorch, allows you to take derivatives while writing  ordinary imperative code. Every time you make pass through your model, ``autograd`` builds a graph on the fly, through which it can immediately backpropagate gradients.\n",
 13 |     "\n",
 14 |     "Let's go through it step by step. For this tutorial, we'll only need to import ``mxnet.ndarray``, and ``mxnet.autograd``."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import mxnet as mx\n",
 26 |     "from mxnet import nd, autograd\n",
 27 |     "mx.random.seed(1)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "## Attaching gradients\n",
 35 |     "\n",
 36 |     "As a toy example, Let's say that we are interested in differentiating a function ``f = 2 * (x ** 2)`` with respect to parameter x. We can start by assigning an initial value of ``x``."
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 2,
 42 |    "metadata": {
 43 |     "collapsed": true
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "x = nd.array([[1, 2], [3, 4]])"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "Once we compute the gradient of ``f`` with respect to ``x``, we'll need a place to store it. In _MXNet_, we can tell an NDArray that we plan to store a gradient by invoking its ``attach_grad()`` method."
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 3,
 60 |    "metadata": {
 61 |     "collapsed": true
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "x.attach_grad()"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "Now we're going to define the function ``f`` and *MXNet* will generate a computation graph on the fly. It's as if *MXNet* turned on a recording device and captured the exact path by which each variable was generated. \n",
 73 |     "\n",
 74 |     "Note that building the computation graph requires a nontrivial amount of computation. So *MXNet* will only build the graph when explicitly told to do so. We can instruct *MXNet* to start recording by placing code inside a ``with autograd.record():`` block."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 4,
 80 |    "metadata": {
 81 |     "collapsed": true
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "with autograd.record():\n",
 86 |     "    y = x * 2\n",
 87 |     "    z = y * x"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "Let's backprop by calling ``z.backward()``. When ``z`` has more than one entry, ``z.backward()`` is equivalent to mx.nd.sum(z).backward().\n",
 95 |     "\n"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 5,
101 |    "metadata": {
102 |     "collapsed": true
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "z.backward()"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "Now, let's see if this is the expected output. Remember that ``y = x * 2``, and ``z = x * y``, so ``z`` should be equal to  ``2 * x * x``. After, doing backprop with ``z.backward()``, we expect to get back gradient dz/dx as follows: dy/dx = ``2``, dz/dx = ``4 * x``. So, if everything went according to plan, ``x.grad`` should consist of an NDArray with the values ``[[4, 8],[12, 16]]``."
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {},
120 |    "outputs": [
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "\n",
126 |       "[[  4.   8.]\n",
127 |       " [ 12.  16.]]\n",
128 |       "<NDArray 2x2 @cpu(0)>\n"
129 |      ]
130 |     }
131 |    ],
132 |    "source": [
133 |     "print(x.grad)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "## Head gradients and the chain rule\n",
141 |     "\n",
142 |     "*Caution: This part is tricky, but not necessary to understanding subsequent sections.*\n",
143 |     "\n",
144 |     "Sometimes when we call the backward method on an NDArray, e.g. ``y.backward()``, where ``y`` is a function of ``x`` we are just interested in the derivative of ``y`` with respect to ``x``. Mathematicians write this as $\\frac{dy(x)}{dx}$. At other times, we may be interested in the gradient of ``z`` with respect to ``x``, where ``z`` is a function of ``y``, which in turn, is a function of ``x``. That is, we are interested in $\\frac{d}{dx} z(y(x))$. Recall that by the chain rule $\\frac{d}{dx} z(y(x)) = \\frac{dz(y)}{dy} \\frac{dy(x)}{dx}$. So, when ``y`` is part of a larger function ``z``, and we want ``x.grad`` to store $\\frac{dz}{dx}$, we can pass in the *head gradient* $\\frac{dz}{dy}$ as an input to ``backward()``. The default argument is ``nd.ones_like(y)``. See [Wikipedia](https://en.wikipedia.org/wiki/Chain_rule) for more details."
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 7,
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "name": "stdout",
154 |      "output_type": "stream",
155 |      "text": [
156 |       "\n",
157 |       "[[ 40.           8.        ]\n",
158 |       " [  1.20000005   0.16      ]]\n",
159 |       "<NDArray 2x2 @cpu(0)>\n"
160 |      ]
161 |     }
162 |    ],
163 |    "source": [
164 |     "with autograd.record():\n",
165 |     "    y = x * 2\n",
166 |     "    z = y * x\n",
167 |     "\n",
168 |     "head_gradient = nd.array([[10, 1.], [.1, .01]])\n",
169 |     "z.backward(head_gradient)\n",
170 |     "print(x.grad)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "Now that we know the basics, we can do some wild things with autograd, including building differentiable functions using Pythonic control flow."
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 8,
183 |    "metadata": {
184 |     "collapsed": true
185 |    },
186 |    "outputs": [],
187 |    "source": [
188 |     "a = nd.random_normal(shape=3)\n",
189 |     "a.attach_grad()\n",
190 |     "\n",
191 |     "with autograd.record():\n",
192 |     "    b = a * 2\n",
193 |     "    while (nd.norm(b) < 1000).asscalar():\n",
194 |     "        b = b * 2\n",
195 |     "\n",
196 |     "    if (mx.nd.sum(b) > 0).asscalar():\n",
197 |     "        c = b\n",
198 |     "    else:\n",
199 |     "        c = 100 * b"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 9,
205 |    "metadata": {
206 |     "collapsed": true
207 |    },
208 |    "outputs": [],
209 |    "source": [
210 |     "head_gradient = nd.array([0.01, 1.0, .1])\n",
211 |     "c.backward(head_gradient)"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 10,
217 |    "metadata": {},
218 |    "outputs": [
219 |     {
220 |      "name": "stdout",
221 |      "output_type": "stream",
222 |      "text": [
223 |       "\n",
224 |       "[   2048.  204800.   20480.]\n",
225 |       "<NDArray 3 @cpu(0)>\n"
226 |      ]
227 |     }
228 |    ],
229 |    "source": [
230 |     "print(a.grad)"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "## Next\n",
238 |     "[Chapter 1 Problem Set](../chapter01_crashcourse/chapter-one-problem-set)"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {},
244 |    "source": [
245 |     "For whinges or inquiries, [open an issue on  GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "metadata": {
252 |     "collapsed": true
253 |    },
254 |    "outputs": [],
255 |    "source": []
256 |   }
257 |  ],
258 |  "metadata": {
259 |   "kernelspec": {
260 |    "display_name": "Python 3",
261 |    "language": "python",
262 |    "name": "python3"
263 |   },
264 |   "language_info": {
265 |    "codemirror_mode": {
266 |     "name": "ipython",
267 |     "version": 3
268 |    },
269 |    "file_extension": ".py",
270 |    "mimetype": "text/x-python",
271 |    "name": "python",
272 |    "nbconvert_exporter": "python",
273 |    "pygments_lexer": "ipython3",
274 |    "version": "3.6.2"
275 |   }
276 |  },
277 |  "nbformat": 4,
278 |  "nbformat_minor": 2
279 | }
280 | 


--------------------------------------------------------------------------------
/chapter01_crashcourse/preface.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Preface\n",
  8 |     "\n",
  9 |     "If you're a reasonable person, you might ask, \"what is *mxnet-the-straight-dope*?\" You might also ask, \"why does it have such an ostentatious name?\" Speaking to the former question, *mxnet-the-straight-dope* is an attempt to create a new kind of educational resource for deep learning. Our goal is to leverage the strengths of Jupyter notebooks to present prose, graphics, equations, and (importantly) code together in one place. If we're successful, the result will be a resource that could be simultaneously a book, course material, a prop for live tutorials, and a resource for plagiarising (with our blessing) useful code. To our knowledge, few available resources aim to teach either (1) the full breadth of concepts in modern machine learning or (2) interleave an engaging textbook with runnable code. We'll find out by the end of this venture whether or not that void exists for a good reason.\n",
 10 |     "\n",
 11 |     "Regarding the name, we are cognizant that the machine learning community and the ecosystem in which we operate have lurched into an absurd place. In the early 2000s, comparatively few tasks in machine learning had been conquered, but we felt that we understood *how* and *why* those models worked (with some caveats). By contrast, today's machine learning systems are extremely powerful and *actually work* for a growing list of tasks, but huge open questions remain as to precisely *why* they are so effective.  \n",
 12 |     "\n",
 13 |     "This new world offers enormous opportunity, but has also given rise to considerable buffoonery. Research preprints like [the arXiv](http://arxiv.org) are flooded by clickbait, AI startups have sometimes received overly optimistic valuations, and the blogosphere is flooded with thought leadership pieces written by marketers bereft of any technical knowledge. Amid the chaos, easy money, and lax standards, we believe it's important not to take our models or the environment in which they are worshipped too seriously. Also, in order to both explain, visualize, and code the full breadth of models that we aim to address, it's important that the authors do not get bored while writing. \n",
 14 |     "\n",
 15 |     "## Organization\n",
 16 |     "\n",
 17 |     "At present, we're aiming for the following format: aside from a few (optional) notebooks providing a crash course in the basic mathematical background, each subsequent notebook will both:\n",
 18 |     "\n",
 19 |     "1. Introduce a reasonable number (perhaps one) of new concepts\n",
 20 |     "2. Provide a single self-contained working example, using a real dataset\n",
 21 |     "\n",
 22 |     "This presents an organizational challenge. Some models might logically be grouped together in a single notebook. \n",
 23 |     "And some ideas might be best taught by executing several models in succession. \n",
 24 |     "On the other hand, there's a big advantage to adhering to a policy of *1 working example, 1 notebook*:\n",
 25 |     "This makes it as easy as possible for you to start your own research projects \n",
 26 |     "by plagiarising our code. Just copy a single notebook and start modifying it.\n",
 27 |     "\n",
 28 |     "We will interleave the runnable code with background material as needed. \n",
 29 |     "In general, we will often err on the side of making tools available before explaining them fully \n",
 30 |     "(and we will follow up by explaining the background later). \n",
 31 |     "For instance, we might use *stochastic gradient descent* \n",
 32 |     "before fully explaining why it is useful or why it works. \n",
 33 |     "This helps to give practitioners the necessary ammunition to solve problems quickly, \n",
 34 |     "at the expense of requiring the reader to trust us with some decisions, at least in the short term. \n",
 35 |     "Throughout, we'll be working with the MXNet library, \n",
 36 |     "which has the rare property of being flexible enough for research \n",
 37 |     "while being fast enough for production. \n",
 38 |     "Our more advanced chapters will mostly rely \n",
 39 |     "on MXNet's new high-level imperative interface ``gluon``. \n",
 40 |     "Note that this is not the same as ``mxnet.module``, \n",
 41 |     "an older, symbolic interface supported by MXNet. \n",
 42 |     "\n",
 43 |     "This book will teach deep learning concepts from scratch. \n",
 44 |     "Sometimes, we'll want to delve into fine details about the models \n",
 45 |     "that are hidden from the user by ``gluon``'s advanced features. \n",
 46 |     "This comes up especially in the basic tutorials, \n",
 47 |     "where we'll want you to understand everything that happens in a given layer. \n",
 48 |     "In these cases, we'll generally present two versions of the example: \n",
 49 |     "one where we implement everything from scratch, \n",
 50 |     "relying only on NDArray and automatic differentiation, \n",
 51 |     "and another where we show how to do things succinctly with ``gluon``. \n",
 52 |     "Once we've taught you how a layer works, \n",
 53 |     "we can just use the ``gluon`` version in subsequent tutorials.\n",
 54 |     "\n",
 55 |     "## Learning by doing\n",
 56 |     "\n",
 57 |     "Many textbooks teach a series of topics, each in exhaustive detail. For example, Chris Bishop's excellent textbook, [Pattern Recognition and Machine Learning](https://www.amazon.com/Pattern-Recognition-Learning-Information-Statistics/dp/0387310738), teaches each topic so thoroughly, that getting to the chapter on linear regression requires a non-trivial amount of work. When I (Zack) was first learning machine learning, this actually limited the book's usefulness as an introductory text. When I rediscovered it a couple years later, I loved it precisely for its thoroughness, and I hope you check it out after working through this material! But perhaps the traditional textbook aproach is not the easiest way to get started in the first place. \n",
 58 |     "\n",
 59 |     "Instead, in this book, we'll teach most concepts just in time. For \n",
 60 |     "the fundamental preliminaries like linear algebra and probability, \n",
 61 |     "we'll provide a brief crash course from the outset, \n",
 62 |     "but we want you to taste the satisfaction of training your first model \n",
 63 |     "before worrying about exotic probability distributions. \n",
 64 |     "\n",
 65 |     "## Next steps\n",
 66 |     "\n",
 67 |     "If you're ready to get started, head over to [the introduction](../chapter01_crashcourse/introduction.ipynb) or go straight to [our basic primer on NDArray](./ndarray.ipynb), MXNet's workhorse data structure.\n"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "For whinges or inquiries, [open an issue on  GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)"
 75 |    ]
 76 |   }
 77 |  ],
 78 |  "metadata": {
 79 |   "kernelspec": {
 80 |    "display_name": "Python 3",
 81 |    "language": "python",
 82 |    "name": "python3"
 83 |   },
 84 |   "language_info": {
 85 |    "codemirror_mode": {
 86 |     "name": "ipython",
 87 |     "version": 3
 88 |    },
 89 |    "file_extension": ".py",
 90 |    "mimetype": "text/x-python",
 91 |    "name": "python",
 92 |    "nbconvert_exporter": "python",
 93 |    "pygments_lexer": "ipython3",
 94 |    "version": "3.4.3"
 95 |   }
 96 |  },
 97 |  "nbformat": 4,
 98 |  "nbformat_minor": 2
 99 | }
100 | 


--------------------------------------------------------------------------------
/chapter02_supervised-learning/regularization-gluon.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Overfitting and regularization (with ``gluon``)\n",
  8 |     "\n",
  9 |     "Now that we've built a [regularized logistic regression model from scratch](regularization-scratch.html), let's make this more efficient with ``gluon``. We recommend that you read that section for a description as to why regularization is a good idea. As always, we begin by loading libraries and some data.\n",
 10 |     "\n",
 11 |     "[**REFINED DRAFT - RELEASE STAGE: CATFOOD**]"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "from __future__ import print_function\n",
 23 |     "import mxnet as mx\n",
 24 |     "from mxnet import autograd\n",
 25 |     "from mxnet import gluon\n",
 26 |     "import mxnet.ndarray as nd\n",
 27 |     "import numpy as np\n",
 28 |     "ctx = mx.cpu()\n",
 29 |     "\n",
 30 |     "# for plotting purposes\n",
 31 |     "%matplotlib inline\n",
 32 |     "import matplotlib\n",
 33 |     "import matplotlib.pyplot as plt"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## The MNIST Dataset"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {
 47 |     "collapsed": true
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "mnist = mx.test_utils.get_mnist()\n",
 52 |     "num_examples = 1000\n",
 53 |     "batch_size = 64\n",
 54 |     "train_data = mx.gluon.data.DataLoader(\n",
 55 |     "    mx.gluon.data.ArrayDataset(mnist[\"train_data\"][:num_examples],\n",
 56 |     "                               mnist[\"train_label\"][:num_examples].astype(np.float32)), \n",
 57 |     "                               batch_size, shuffle=True)\n",
 58 |     "test_data = mx.gluon.data.DataLoader(\n",
 59 |     "    mx.gluon.data.ArrayDataset(mnist[\"test_data\"][:num_examples],\n",
 60 |     "                               mnist[\"test_label\"][:num_examples].astype(np.float32)), \n",
 61 |     "                               batch_size, shuffle=False)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "## Multiclass Logistic Regression"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {
 75 |     "collapsed": true
 76 |    },
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "net = gluon.nn.Sequential()\n",
 80 |     "with net.name_scope():\n",
 81 |     "    net.add(gluon.nn.Dense(10))"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "## Parameter initialization\n"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "collapsed": true
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "## Softmax Cross Entropy Loss"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": true
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "loss = gluon.loss.SoftmaxCrossEntropyLoss()"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "## Optimizer\n",
125 |     "\n",
126 |     "By default ``gluon`` tries to keep the coefficients from diverging by using a *weight decay* penalty. So, to get the real overfitting experience we need to switch it off. We do this by passing `'wd': 0.0'` when we instantiate the trainer. "
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "metadata": {
133 |     "collapsed": true
134 |    },
135 |    "outputs": [],
136 |    "source": [
137 |     "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01, 'wd': 0.0})"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "## Evaluation Metric"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": true
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "def evaluate_accuracy(data_iterator, net, loss_fun):\n",
156 |     "    acc = mx.metric.Accuracy()\n",
157 |     "    loss_avg = 0.\n",
158 |     "    for i, (data, label) in enumerate(data_iterator):\n",
159 |     "        data = data.as_in_context(ctx).reshape((-1,784))\n",
160 |     "        label = label.as_in_context(ctx)\n",
161 |     "        output = net(data)\n",
162 |     "        loss = loss_fun(output, label) \n",
163 |     "        predictions = nd.argmax(output, axis=1)\n",
164 |     "        acc.update(preds=predictions, labels=label)\n",
165 |     "        loss_avg = loss_avg*i/(i+1) + nd.mean(loss).asscalar()/(i+1)\n",
166 |     "    return acc.get()[1], loss_avg\n",
167 |     "\n",
168 |     "def plot_learningcurves(loss_tr,loss_ts, acc_tr,acc_ts):\n",
169 |     "    xs = list(range(len(loss_tr)))\n",
170 |     "    \n",
171 |     "    f = plt.figure(figsize=(12,6))\n",
172 |     "    fg1 = f.add_subplot(121)\n",
173 |     "    fg2 = f.add_subplot(122)\n",
174 |     "    \n",
175 |     "    fg1.set_xlabel('epoch',fontsize=14)\n",
176 |     "    fg1.set_title('Comparing loss functions')\n",
177 |     "    fg1.semilogy(xs, loss_tr)\n",
178 |     "    fg1.semilogy(xs, loss_ts)\n",
179 |     "    fg1.grid(True,which=\"both\")\n",
180 |     "\n",
181 |     "    fg1.legend(['training loss', 'testing loss'],fontsize=14)\n",
182 |     "    \n",
183 |     "    fg2.set_title('Comparing accuracy')\n",
184 |     "    fg1.set_xlabel('epoch',fontsize=14)\n",
185 |     "    fg2.plot(xs, acc_tr)\n",
186 |     "    fg2.plot(xs, acc_ts)\n",
187 |     "    fg2.grid(True,which=\"both\")\n",
188 |     "    fg2.legend(['training accuracy', 'testing accuracy'],fontsize=14)\n",
189 |     "    plt.show()"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "metadata": {},
195 |    "source": [
196 |     "## Execute training loop"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "metadata": {
203 |     "collapsed": true
204 |    },
205 |    "outputs": [],
206 |    "source": [
207 |     "epochs = 700\n",
208 |     "moving_loss = 0.\n",
209 |     "niter=0\n",
210 |     "\n",
211 |     "loss_seq_train = []\n",
212 |     "loss_seq_test = []\n",
213 |     "acc_seq_train = []\n",
214 |     "acc_seq_test = []\n",
215 |     "\n",
216 |     "for e in range(epochs):\n",
217 |     "    for i, (data, label) in enumerate(train_data):\n",
218 |     "        data = data.as_in_context(ctx).reshape((-1,784))\n",
219 |     "        label = label.as_in_context(ctx)\n",
220 |     "        with autograd.record():\n",
221 |     "            output = net(data)\n",
222 |     "            cross_entropy = loss(output, label)\n",
223 |     "        cross_entropy.backward()\n",
224 |     "        trainer.step(data.shape[0])\n",
225 |     "        \n",
226 |     "        ##########################\n",
227 |     "        #  Keep a moving average of the losses\n",
228 |     "        ##########################\n",
229 |     "        niter +=1\n",
230 |     "        moving_loss = .99 * moving_loss + .01 * nd.mean(cross_entropy).asscalar()\n",
231 |     "        est_loss = moving_loss/(1-0.99**niter)\n",
232 |     "            \n",
233 |     "    test_accuracy, test_loss = evaluate_accuracy(test_data, net, loss)\n",
234 |     "    train_accuracy, train_loss = evaluate_accuracy(train_data, net, loss)\n",
235 |     "    \n",
236 |     "    # save them for later\n",
237 |     "    loss_seq_train.append(train_loss)\n",
238 |     "    loss_seq_test.append(test_loss)\n",
239 |     "    acc_seq_train.append(train_accuracy)\n",
240 |     "    acc_seq_test.append(test_accuracy)\n",
241 |     "    \n",
242 |     "        \n",
243 |     "    if e % 20 == 0:\n",
244 |     "        print(\"Completed epoch %s. Train Loss: %s, Test Loss %s, Train_acc %s, Test_acc %s\" % \n",
245 |     "              (e+1, train_loss, test_loss, train_accuracy, test_accuracy))     \n",
246 |     "\n",
247 |     "## Plotting the learning curves\n",
248 |     "plot_learningcurves(loss_seq_train,loss_seq_test,acc_seq_train,acc_seq_test)"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "markdown",
253 |    "metadata": {},
254 |    "source": [
255 |     "## Regularization\n",
256 |     "\n",
257 |     "Now let's see what this mysterious *weight decay* is all about. We begin with a bit of math. When we add an L2 penalty to the weights we are effectively adding $\\frac{\\lambda}{2} \\|w\\|^2$ to the loss. Hence, every time we compute the gradient it gets an additional $\\lambda w$ term that is added to $g_t$, since this is the very derivative of the L2 penalty. As a result we end up taking a descent step not in the direction $-\\eta g_t$ but rather in the direction $-\\eta (g_t + \\lambda w)$. This effectively shrinks $w$ at each step by $\\eta \\lambda w$, thus the name weight decay. To make this work in practice we just need to set the weight decay to something nonzero."
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": null,
263 |    "metadata": {
264 |     "collapsed": true
265 |    },
266 |    "outputs": [],
267 |    "source": [
268 |     "net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx, force_reinit=True)\n",
269 |     "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01, 'wd': 0.001})\n",
270 |     "\n",
271 |     "moving_loss = 0.\n",
272 |     "niter=0\n",
273 |     "loss_seq_train = []\n",
274 |     "loss_seq_test = []\n",
275 |     "acc_seq_train = []\n",
276 |     "acc_seq_test = []\n",
277 |     "\n",
278 |     "for e in range(epochs):\n",
279 |     "    for i, (data, label) in enumerate(train_data):\n",
280 |     "        data = data.as_in_context(ctx).reshape((-1,784))\n",
281 |     "        label = label.as_in_context(ctx)\n",
282 |     "        with autograd.record():\n",
283 |     "            output = net(data)\n",
284 |     "            cross_entropy = loss(output, label)\n",
285 |     "        cross_entropy.backward()\n",
286 |     "        trainer.step(data.shape[0])\n",
287 |     "        \n",
288 |     "        ##########################\n",
289 |     "        #  Keep a moving average of the losses\n",
290 |     "        ##########################\n",
291 |     "        niter +=1\n",
292 |     "        moving_loss = .99 * moving_loss + .01 * nd.mean(cross_entropy).asscalar()\n",
293 |     "        est_loss = moving_loss/(1-0.99**niter)\n",
294 |     "            \n",
295 |     "    test_accuracy, test_loss = evaluate_accuracy(test_data, net,loss)\n",
296 |     "    train_accuracy, train_loss = evaluate_accuracy(train_data, net, loss)\n",
297 |     "    \n",
298 |     "    # save them for later\n",
299 |     "    loss_seq_train.append(train_loss)\n",
300 |     "    loss_seq_test.append(test_loss)\n",
301 |     "    acc_seq_train.append(train_accuracy)\n",
302 |     "    acc_seq_test.append(test_accuracy)\n",
303 |     "    \n",
304 |     "    if e % 20 == 0:\n",
305 |     "        print(\"Completed epoch %s. Train Loss: %s, Test Loss %s, Train_acc %s, Test_acc %s\" % \n",
306 |     "              (e+1, train_loss, test_loss, train_accuracy, test_accuracy))  \n",
307 |     "        \n",
308 |     "## Plotting the learning curves\n",
309 |     "plot_learningcurves(loss_seq_train,loss_seq_test,acc_seq_train,acc_seq_test)"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "markdown",
314 |    "metadata": {},
315 |    "source": [
316 |     "As we can see, the test accuracy improves a bit. Note that the amount by which it improves actually depends on the amount of weight decay. We recommend that you try and experiment with different extents of weight decay. For instance, a larger weight decay (e.g. $0.01$) will lead to inferior performance, one that's larger still ($0.1$) will lead to terrible results. This is one of the reasons why tuning parameters is quite so important in getting good experimental results in practice."
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "markdown",
321 |    "metadata": {},
322 |    "source": [
323 |     "## Next\n",
324 |     "[Learning environments](../chapter02_supervised-learning/environment.ipynb)"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {
330 |     "collapsed": true
331 |    },
332 |    "source": [
333 |     "For whinges or inquiries, [open an issue on  GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)"
334 |    ]
335 |   }
336 |  ],
337 |  "metadata": {
338 |   "kernelspec": {
339 |    "display_name": "Python 3",
340 |    "language": "python",
341 |    "name": "python3"
342 |   },
343 |   "language_info": {
344 |    "codemirror_mode": {
345 |     "name": "ipython",
346 |     "version": 3
347 |    },
348 |    "file_extension": ".py",
349 |    "mimetype": "text/x-python",
350 |    "name": "python",
351 |    "nbconvert_exporter": "python",
352 |    "pygments_lexer": "ipython3",
353 |    "version": "3.6.2"
354 |   }
355 |  },
356 |  "nbformat": 4,
357 |  "nbformat_minor": 2
358 | }
359 | 


--------------------------------------------------------------------------------
/chapter03_deep-neural-networks/serialization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Serialization - saving, loading and checkpointing\n",
  8 |     "\n",
  9 |     "At this point we've already covered quite a lot of ground. \n",
 10 |     "We know how to manipulate data and labels.\n",
 11 |     "We know how to construct flexible models capable of expressing plausible hypotheses.\n",
 12 |     "We know how to fit those models to our dataset.\n",
 13 |     "We know of loss functions to use for classification and for regression,\n",
 14 |     "and we know how to minimize those losses with respect to our models' parameters. \n",
 15 |     "We even know how to write our own neural network layers in ``gluon``.\n",
 16 |     "\n",
 17 |     "But even with all this knowledge, we're not ready to build a real machine learning system.\n",
 18 |     "That's because we haven't yet covered how to save and load models. \n",
 19 |     "In reality, we often  train a model on one device\n",
 20 |     "and then want to run it to make predictions on many devices simultaneously.\n",
 21 |     "In order for our models to persist beyond the execution of a single Python script, \n",
 22 |     "we need mechanisms to save and load NDArrays, ``gluon`` Parameters, and models themselves. "
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "from __future__ import print_function\n",
 32 |     "import mxnet as mx\n",
 33 |     "from mxnet import nd, autograd\n",
 34 |     "from mxnet import gluon\n",
 35 |     "ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "## Saving and loading NDArrays\n",
 43 |     "\n",
 44 |     "To start, let's show how you can save and load a list of NDArrays for future use. Note that while it's possible to use a general Python serialization package like ``Pickle``, it's not optimized for use with NDArrays and will be unnecessarily slow. We prefer to use ``ndarray.save`` and ``ndarray.load``. "
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "X = nd.ones((100, 100))\n",
 54 |     "Y = nd.zeros((100, 100))\n",
 55 |     "import os\n",
 56 |     "\n",
 57 |     "dir_name = 'checkpoints'\n",
 58 |     "if not os.path.exists(dir_name):\n",
 59 |     "    os.makedirs(dir_name)\n",
 60 |     "\n",
 61 |     "filename = os.path.join(dir_name, \"test1.params\")\n",
 62 |     "nd.save(filename, [X, Y])"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "It's just as easy to load a saved NDArray."
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "A, B = nd.load(filename)\n",
 79 |     "print(A)\n",
 80 |     "print(B)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "We can also save a dictionary where the keys are strings and the values are NDArrays."
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "mydict = {\"X\": X, \"Y\": Y}\n",
 97 |     "filename = os.path.join(dir_name, \"test2.params\")\n",
 98 |     "nd.save(filename, mydict)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "C = nd.load(filename)\n",
108 |     "print(C)"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "## Saving and loading the parameters of ``gluon`` models\n",
116 |     "\n",
117 |     "Recall from [our first look at the plumbing behind ``gluon`` blocks](P03.5-C01-plumbing.ipynb]) \n",
118 |     "that ``gluon`` wraps the NDArrays corresponding to model parameters in ``Parameter`` objects. \n",
119 |     "We'll often want to store and load an entire model's parameters without \n",
120 |     "having to individually extract or load the NDarrays from the Parameters via ParameterDicts in each block.\n",
121 |     "\n",
122 |     "Fortunately, ``gluon`` blocks make our lives very easy by providing a ``.save_parameters()`` and ``.load_parameters()`` methods. To see them in work, let's just spin up a simple MLP."
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "num_hidden = 256\n",
132 |     "num_outputs = 1\n",
133 |     "net = gluon.nn.Sequential()\n",
134 |     "with net.name_scope():\n",
135 |     "    net.add(gluon.nn.Dense(num_hidden, activation=\"relu\"))\n",
136 |     "    net.add(gluon.nn.Dense(num_hidden, activation=\"relu\"))\n",
137 |     "    net.add(gluon.nn.Dense(num_outputs))"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "Now, let's initialize the parameters by attaching an initializer and actually passing in a datapoint to induce shape inference."
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "net.collect_params().initialize(mx.init.Normal(sigma=1.), ctx=ctx)\n",
154 |     "net(nd.ones((1, 100), ctx=ctx))"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "So this randomly initialized model maps a 100-dimensional vector of all ones to the number 362.53 (that's the number on my machine--your mileage may vary).\n",
162 |     "Let's save the parameters, instantiate a new network, load them in and make sure that we get the same result."
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "filename = os.path.join(dir_name, \"testnet.params\")\n",
172 |     "net.save_parameters(filename)\n",
173 |     "net2 = gluon.nn.Sequential()\n",
174 |     "with net2.name_scope():\n",
175 |     "    net2.add(gluon.nn.Dense(num_hidden, activation=\"relu\"))\n",
176 |     "    net2.add(gluon.nn.Dense(num_hidden, activation=\"relu\"))\n",
177 |     "    net2.add(gluon.nn.Dense(num_outputs))\n",
178 |     "net2.load_parameters(filename, ctx=ctx)\n",
179 |     "net2(nd.ones((1, 100), ctx=ctx))"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "Great! Now we're ready to save our work. \n",
187 |     "The practice of saving models is sometimes called *checkpointing*\n",
188 |     "and it's especially important for a number of reasons.\n",
189 |     "1. We can preserve and syndicate models that are trained once.\n",
190 |     "2. Some models perform best (as determined on validation data) at some epoch in the middle of training. If we checkpoint the model after each epoch, we can later select the best epoch.\n",
191 |     "3. We might want to ask questions about our trained model that we didn't think of when we first wrote the scripts for our experiments. Having the parameters lying around allows us to examine our past work without having to train from scratch.\n",
192 |     "4. Sometimes people might want to run our models who don't know how to execute training themselves or can't access a suitable dataset for training. Checkpointing gives us a way to share our work with others."
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "<!-- ## Serializing models themselves\n",
200 |     "\n",
201 |     "[PLACEHOLDER] -->"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "markdown",
206 |    "metadata": {},
207 |    "source": [
208 |     "## Next\n",
209 |     "[Convolutional neural networks from scratch](../chapter04_convolutional-neural-networks/cnn-scratch.ipynb)"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "markdown",
214 |    "metadata": {
215 |     "collapsed": true
216 |    },
217 |    "source": [
218 |     "For whinges or inquiries, [open an issue on  GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)"
219 |    ]
220 |   }
221 |  ],
222 |  "metadata": {
223 |   "kernelspec": {
224 |    "display_name": "Python 3",
225 |    "language": "python",
226 |    "name": "python3"
227 |   },
228 |   "language_info": {
229 |    "codemirror_mode": {
230 |     "name": "ipython",
231 |     "version": 3
232 |    },
233 |    "file_extension": ".py",
234 |    "mimetype": "text/x-python",
235 |    "name": "python",
236 |    "nbconvert_exporter": "python",
237 |    "pygments_lexer": "ipython3",
238 |    "version": "3.6.5"
239 |   }
240 |  },
241 |  "nbformat": 4,
242 |  "nbformat_minor": 2
243 | }
244 | 


--------------------------------------------------------------------------------
/chapter04_convolutional-neural-networks/cnn-batch-norm-gluon.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Batch Normalization in `gluon`\n",
  8 |     "\n",
  9 |     "In the preceding section, [we implemented batch normalization ourselves](../chapter04_convolutional-neural-networks/cnn-batch-norm-scratch.ipynb) using NDArray and autograd.\n",
 10 |     "As with most commonly used neural network layers,\n",
 11 |     "Gluon has batch normalization predefined,\n",
 12 |     "so this section is going to be straightforward."
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {
 19 |     "ExecuteTime": {
 20 |      "end_time": "2017-10-18T03:21:49.174951Z",
 21 |      "start_time": "2017-10-18T03:21:48.205450Z"
 22 |     }
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "from __future__ import print_function\n",
 27 |     "import mxnet as mx\n",
 28 |     "from mxnet import nd, autograd\n",
 29 |     "from mxnet import gluon\n",
 30 |     "import numpy as np\n",
 31 |     "mx.random.seed(1)\n",
 32 |     "ctx = mx.cpu()"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "## The MNIST dataset"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {
 46 |     "ExecuteTime": {
 47 |      "end_time": "2017-10-18T03:21:50.220488Z",
 48 |      "start_time": "2017-10-18T03:21:49.176860Z"
 49 |     }
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "batch_size = 64\n",
 54 |     "num_inputs = 784\n",
 55 |     "num_outputs = 10\n",
 56 |     "def transform(data, label):\n",
 57 |     "    return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)\n",
 58 |     "train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),\n",
 59 |     "                                      batch_size, shuffle=True)\n",
 60 |     "test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),\n",
 61 |     "                                     batch_size, shuffle=False)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "## Define a CNN with Batch Normalization\n",
 69 |     "\n",
 70 |     "To add batchnormalization to a ``gluon`` model defined with Sequential,\n",
 71 |     "we only need to add a few lines. \n",
 72 |     "Specifically, we just insert `BatchNorm` layers before the applying the ReLU activations."
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {
 79 |     "ExecuteTime": {
 80 |      "end_time": "2017-10-18T03:21:50.292271Z",
 81 |      "start_time": "2017-10-18T03:21:50.222527Z"
 82 |     }
 83 |    },
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "num_fc = 512\n",
 87 |     "net = gluon.nn.Sequential()\n",
 88 |     "with net.name_scope():\n",
 89 |     "    net.add(gluon.nn.Conv2D(channels=20, kernel_size=5))\n",
 90 |     "    net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True))\n",
 91 |     "    net.add(gluon.nn.Activation(activation='relu'))\n",
 92 |     "    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n",
 93 |     "    \n",
 94 |     "    net.add(gluon.nn.Conv2D(channels=50, kernel_size=5))\n",
 95 |     "    net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True))\n",
 96 |     "    net.add(gluon.nn.Activation(activation='relu'))\n",
 97 |     "    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n",
 98 |     "    \n",
 99 |     "    # The Flatten layer collapses all axis, except the first one, into one axis.\n",
100 |     "    net.add(gluon.nn.Flatten())\n",
101 |     "    \n",
102 |     "    net.add(gluon.nn.Dense(num_fc))\n",
103 |     "    net.add(gluon.nn.BatchNorm(axis=1, center=True, scale=True))\n",
104 |     "    net.add(gluon.nn.Activation(activation='relu'))\n",
105 |     "    \n",
106 |     "    net.add(gluon.nn.Dense(num_outputs))"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "## Parameter initialization\n"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "ExecuteTime": {
121 |      "end_time": "2017-10-18T03:21:50.311368Z",
122 |      "start_time": "2017-10-18T03:21:50.296296Z"
123 |     }
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "## Softmax cross-entropy Loss"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "ExecuteTime": {
142 |      "end_time": "2017-10-18T03:21:50.335025Z",
143 |      "start_time": "2017-10-18T03:21:50.322603Z"
144 |     }
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "## Optimizer"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {
162 |     "ExecuteTime": {
163 |      "end_time": "2017-10-18T03:21:50.350590Z",
164 |      "start_time": "2017-10-18T03:21:50.339939Z"
165 |     }
166 |    },
167 |    "outputs": [],
168 |    "source": [
169 |     "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "## Write evaluation loop to calculate accuracy"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {
183 |     "ExecuteTime": {
184 |      "start_time": "2017-10-18T03:21:48.241Z"
185 |     }
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "def evaluate_accuracy(data_iterator, net):\n",
190 |     "    acc = mx.metric.Accuracy()\n",
191 |     "    for i, (data, label) in enumerate(data_iterator):\n",
192 |     "        data = data.as_in_context(ctx)\n",
193 |     "        label = label.as_in_context(ctx)\n",
194 |     "        output = net(data)\n",
195 |     "        predictions = nd.argmax(output, axis=1)\n",
196 |     "        acc.update(preds=predictions, labels=label)\n",
197 |     "    return acc.get()[1]"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {},
203 |    "source": [
204 |     "## Training Loop"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "ExecuteTime": {
212 |      "start_time": "2017-10-18T03:21:48.244Z"
213 |     }
214 |    },
215 |    "outputs": [],
216 |    "source": [
217 |     "epochs = 1\n",
218 |     "smoothing_constant = .01\n",
219 |     "\n",
220 |     "for e in range(epochs):\n",
221 |     "    for i, (data, label) in enumerate(train_data):\n",
222 |     "        data = data.as_in_context(ctx)\n",
223 |     "        label = label.as_in_context(ctx)\n",
224 |     "        with autograd.record():\n",
225 |     "            output = net(data)\n",
226 |     "            loss = softmax_cross_entropy(output, label)\n",
227 |     "        loss.backward()\n",
228 |     "        trainer.step(data.shape[0])\n",
229 |     "        \n",
230 |     "        ##########################\n",
231 |     "        #  Keep a moving average of the losses\n",
232 |     "        ##########################\n",
233 |     "        curr_loss = nd.mean(loss).asscalar()\n",
234 |     "        moving_loss = (curr_loss if ((i == 0) and (e == 0)) \n",
235 |     "                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)\n",
236 |     "        \n",
237 |     "    test_accuracy = evaluate_accuracy(test_data, net)\n",
238 |     "    train_accuracy = evaluate_accuracy(train_data, net)\n",
239 |     "    print(\"Epoch %s. Loss: %s, Train_acc %s, Test_acc %s\" % (e, moving_loss, train_accuracy, test_accuracy))    "
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "## Next\n",
247 |     "[Introduction to recurrent neural networks](../chapter05_recurrent-neural-networks/simple-rnn.ipynb)"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {},
253 |    "source": [
254 |     "For whinges or inquiries, [open an issue on  GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)"
255 |    ]
256 |   }
257 |  ],
258 |  "metadata": {
259 |   "kernelspec": {
260 |    "display_name": "Python 3",
261 |    "language": "python",
262 |    "name": "python3"
263 |   },
264 |   "language_info": {
265 |    "codemirror_mode": {
266 |     "name": "ipython",
267 |     "version": 3
268 |    },
269 |    "file_extension": ".py",
270 |    "mimetype": "text/x-python",
271 |    "name": "python",
272 |    "nbconvert_exporter": "python",
273 |    "pygments_lexer": "ipython3",
274 |    "version": "3.6.1"
275 |   }
276 |  },
277 |  "nbformat": 4,
278 |  "nbformat_minor": 2
279 | }
280 | 


--------------------------------------------------------------------------------
/chapter04_convolutional-neural-networks/cnn-gluon.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Convolutional Neural Networks in ``gluon``\n",
  8 |     "\n",
  9 |     "Now let's see how succinctly we can express a convolutional neural network using ``gluon``. You might be relieved to find out that this too requires hardly any more code than logistic regression. "
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from __future__ import print_function\n",
 21 |     "import numpy as np\n",
 22 |     "import mxnet as mx\n",
 23 |     "from mxnet import nd, autograd, gluon\n",
 24 |     "mx.random.seed(1)"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "## Set the context"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": true
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# ctx = mx.gpu()\n",
 43 |     "ctx = mx.cpu()"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "## Grab the MNIST dataset"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {
 57 |     "collapsed": true
 58 |    },
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "batch_size = 64\n",
 62 |     "num_inputs = 784\n",
 63 |     "num_outputs = 10\n",
 64 |     "def transform(data, label):\n",
 65 |     "    return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)\n",
 66 |     "train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True, transform=transform),\n",
 67 |     "                                      batch_size, shuffle=True)\n",
 68 |     "test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False, transform=transform),\n",
 69 |     "                                     batch_size, shuffle=False)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "## Define a convolutional neural network\n",
 77 |     "\n",
 78 |     "Again, a few lines here is all we need in order to change the model. Let's add a couple of convolutional layers using ``gluon.nn``."
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {
 85 |     "collapsed": true
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "num_fc = 512\n",
 90 |     "net = gluon.nn.Sequential()\n",
 91 |     "with net.name_scope():\n",
 92 |     "    net.add(gluon.nn.Conv2D(channels=20, kernel_size=5, activation='relu'))\n",
 93 |     "    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))            \n",
 94 |     "    net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu'))\n",
 95 |     "    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n",
 96 |     "    # The Flatten layer collapses all axis, except the first one, into one axis.\n",
 97 |     "    net.add(gluon.nn.Flatten())\n",
 98 |     "    net.add(gluon.nn.Dense(num_fc, activation=\"relu\"))\n",
 99 |     "    net.add(gluon.nn.Dense(num_outputs))"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "## Parameter initialization\n"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": true
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "## Softmax cross-entropy Loss"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {
131 |     "collapsed": true
132 |    },
133 |    "outputs": [],
134 |    "source": [
135 |     "softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "## Optimizer"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {
149 |     "collapsed": true
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1})"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "## Write evaluation loop to calculate accuracy"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {
167 |     "collapsed": true
168 |    },
169 |    "outputs": [],
170 |    "source": [
171 |     "def evaluate_accuracy(data_iterator, net):\n",
172 |     "    acc = mx.metric.Accuracy()\n",
173 |     "    for i, (data, label) in enumerate(data_iterator):\n",
174 |     "        data = data.as_in_context(ctx)\n",
175 |     "        label = label.as_in_context(ctx)\n",
176 |     "        output = net(data)\n",
177 |     "        predictions = nd.argmax(output, axis=1)\n",
178 |     "        acc.update(preds=predictions, labels=label)\n",
179 |     "    return acc.get()[1]"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "## Training Loop"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": [
195 |     "epochs = 1\n",
196 |     "smoothing_constant = .01\n",
197 |     "\n",
198 |     "for e in range(epochs):\n",
199 |     "    for i, (data, label) in enumerate(train_data):\n",
200 |     "        data = data.as_in_context(ctx)\n",
201 |     "        label = label.as_in_context(ctx)\n",
202 |     "        with autograd.record():\n",
203 |     "            output = net(data)\n",
204 |     "            loss = softmax_cross_entropy(output, label)\n",
205 |     "        loss.backward()\n",
206 |     "        trainer.step(data.shape[0])\n",
207 |     "        \n",
208 |     "        ##########################\n",
209 |     "        #  Keep a moving average of the losses\n",
210 |     "        ##########################\n",
211 |     "        curr_loss = nd.mean(loss).asscalar()\n",
212 |     "        moving_loss = (curr_loss if ((i == 0) and (e == 0)) \n",
213 |     "                       else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)\n",
214 |     "        \n",
215 |     "    test_accuracy = evaluate_accuracy(test_data, net)\n",
216 |     "    train_accuracy = evaluate_accuracy(train_data, net)\n",
217 |     "    print(\"Epoch %s. Loss: %s, Train_acc %s, Test_acc %s\" % (e, moving_loss, train_accuracy, test_accuracy))    "
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "metadata": {},
223 |    "source": [
224 |     "## Conclusion\n",
225 |     "\n",
226 |     "You might notice that by using ``gluon``, we get code that runs much faster whether on CPU or GPU. That's largely because ``gluon`` can call down to highly optimized layers that have been written in C++. "
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "## Next\n",
234 |     "[Deep convolutional networks (AlexNet)](../chapter04_convolutional-neural-networks/deep-cnns-alexnet.ipynb)"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "markdown",
239 |    "metadata": {},
240 |    "source": [
241 |     "For whinges or inquiries, [open an issue on  GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)"
242 |    ]
243 |   }
244 |  ],
245 |  "metadata": {
246 |   "kernelspec": {
247 |    "display_name": "Python 3",
248 |    "language": "python",
249 |    "name": "python3"
250 |   },
251 |   "language_info": {
252 |    "codemirror_mode": {
253 |     "name": "ipython",
254 |     "version": 3
255 |    },
256 |    "file_extension": ".py",
257 |    "mimetype": "text/x-python",
258 |    "name": "python",
259 |    "nbconvert_exporter": "python",
260 |    "pygments_lexer": "ipython3",
261 |    "version": "3.6.2"
262 |   }
263 |  },
264 |  "nbformat": 4,
265 |  "nbformat_minor": 2
266 | }
267 | 


--------------------------------------------------------------------------------
/chapter04_convolutional-neural-networks/very-deep-nets-vgg.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Very deep networks with repeating elements\n",
  8 |     "\n",
  9 |     "As we already noticed in AlexNet, the number of layers in networks keeps on increasing. This means that it becomes extremely tedious to write code that piles on one layer after the other manually. Fortunately, programming languages have a wonderful fix for this: subroutines and loops. This way we can express networks as *code*. Just like we would use a for loop to count from 1 to 10, we'll use code to combine layers. The first network that had this structure was VGG. "
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## VGG\n",
 17 |     "\n",
 18 |     "We begin with the usual import ritual"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "ExecuteTime": {
 26 |      "end_time": "2017-10-18T06:00:51.744769Z",
 27 |      "start_time": "2017-10-18T06:00:51.019959Z"
 28 |     }
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "from __future__ import print_function\n",
 33 |     "import mxnet as mx\n",
 34 |     "from mxnet import nd, autograd\n",
 35 |     "from mxnet import gluon\n",
 36 |     "import numpy as np\n",
 37 |     "mx.random.seed(1)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "ExecuteTime": {
 45 |      "end_time": "2017-10-18T06:00:51.749941Z",
 46 |      "start_time": "2017-10-18T06:00:51.746808Z"
 47 |     }
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "ctx = mx.gpu()"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "## Load up a dataset\n"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {
 65 |     "ExecuteTime": {
 66 |      "end_time": "2017-10-18T06:00:52.252105Z",
 67 |      "start_time": "2017-10-18T06:00:51.752991Z"
 68 |     }
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "batch_size = 64\n",
 73 |     "\n",
 74 |     "def transform(data, label):\n",
 75 |     "    return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)\n",
 76 |     "\n",
 77 |     "train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),\n",
 78 |     "                                      batch_size, shuffle=True)\n",
 79 |     "test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),\n",
 80 |     "                                     batch_size, shuffle=False)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "## The VGG architecture\n",
 88 |     "\n",
 89 |     "A key aspect of VGG was to use many convolutional blocks with relatively narrow kernels, followed by a max-pooling step and to repeat this block multiple times. What is pretty neat about the code below is that we use functions to *return* network blocks. These are then combined to larger networks (e.g. in `vgg_stack`) and this allows us to construct VGG from components. What is particularly useful here is that we can use it to reparameterize the architecture simply by changing a few lines rather than adding and removing many lines of network definitions. "
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "ExecuteTime": {
 97 |      "end_time": "2017-10-18T06:00:52.283905Z",
 98 |      "start_time": "2017-10-18T06:00:52.254227Z"
 99 |     }
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "from mxnet.gluon import nn\n",
104 |     "\n",
105 |     "def vgg_block(num_convs, channels):\n",
106 |     "    out = nn.Sequential()\n",
107 |     "    for _ in range(num_convs):\n",
108 |     "        out.add(nn.Conv2D(channels=channels, kernel_size=3,\n",
109 |     "                      padding=1, activation='relu'))\n",
110 |     "    out.add(nn.MaxPool2D(pool_size=2, strides=2))\n",
111 |     "    return out\n",
112 |     "\n",
113 |     "def vgg_stack(architecture):\n",
114 |     "    out = nn.Sequential()\n",
115 |     "    for (num_convs, channels) in architecture:\n",
116 |     "        out.add(vgg_block(num_convs, channels))\n",
117 |     "    return out\n",
118 |     "\n",
119 |     "num_outputs = 10\n",
120 |     "architecture = ((1,64), (1,128), (2,256), (2,512))\n",
121 |     "net = nn.Sequential()\n",
122 |     "with net.name_scope():\n",
123 |     "    net.add(vgg_stack(architecture))\n",
124 |     "    net.add(nn.Flatten())\n",
125 |     "    net.add(nn.Dense(512, activation=\"relu\"))\n",
126 |     "    net.add(nn.Dropout(.5))\n",
127 |     "    net.add(nn.Dense(512, activation=\"relu\"))\n",
128 |     "    net.add(nn.Dropout(.5))\n",
129 |     "    net.add(nn.Dense(num_outputs))"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "## Initialize parameters"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {
143 |     "ExecuteTime": {
144 |      "end_time": "2017-10-18T06:00:53.879036Z",
145 |      "start_time": "2017-10-18T06:00:52.285901Z"
146 |     }
147 |    },
148 |    "outputs": [],
149 |    "source": [
150 |     "net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "## Optimizer"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "metadata": {
164 |     "ExecuteTime": {
165 |      "end_time": "2017-10-18T06:00:53.920533Z",
166 |      "start_time": "2017-10-18T06:00:53.898827Z"
167 |     }
168 |    },
169 |    "outputs": [],
170 |    "source": [
171 |     "trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .05})"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "## Softmax cross-entropy loss"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "ExecuteTime": {
186 |      "end_time": "2017-10-18T06:00:53.941011Z",
187 |      "start_time": "2017-10-18T06:00:53.922904Z"
188 |     }
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "## Evaluation loop"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {
206 |     "ExecuteTime": {
207 |      "end_time": "2017-10-18T06:00:53.962279Z",
208 |      "start_time": "2017-10-18T06:00:53.943086Z"
209 |     }
210 |    },
211 |    "outputs": [],
212 |    "source": [
213 |     "def evaluate_accuracy(data_iterator, net):\n",
214 |     "    acc = mx.metric.Accuracy()\n",
215 |     "    for d, l in data_iterator:\n",
216 |     "        data = d.as_in_context(ctx)\n",
217 |     "        label = l.as_in_context(ctx)\n",
218 |     "        output = net(data)\n",
219 |     "        predictions = nd.argmax(output, axis=1)\n",
220 |     "        acc.update(preds=predictions, labels=label)\n",
221 |     "    return acc.get()[1]"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "markdown",
226 |    "metadata": {},
227 |    "source": [
228 |     "## Training loop"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {
235 |     "ExecuteTime": {
236 |      "end_time": "2017-10-18T06:02:36.461653Z",
237 |      "start_time": "2017-10-18T06:00:53.965101Z"
238 |     }
239 |    },
240 |    "outputs": [],
241 |    "source": [
242 |     "###########################\n",
243 |     "#  Only one epoch so tests can run quickly, increase this variable to actually run\n",
244 |     "###########################\n",
245 |     "epochs = 1\n",
246 |     "smoothing_constant = .01\n",
247 |     "\n",
248 |     "for e in range(epochs):\n",
249 |     "    for i, (d, l) in enumerate(train_data):\n",
250 |     "        data = d.as_in_context(ctx)\n",
251 |     "        label = l.as_in_context(ctx)\n",
252 |     "        with autograd.record():\n",
253 |     "            output = net(data)\n",
254 |     "            loss = softmax_cross_entropy(output, label)\n",
255 |     "        loss.backward()\n",
256 |     "        trainer.step(data.shape[0])\n",
257 |     "        \n",
258 |     "        ##########################\n",
259 |     "        #  Keep a moving average of the losses\n",
260 |     "        ##########################\n",
261 |     "        curr_loss = nd.mean(loss).asscalar()\n",
262 |     "        moving_loss = (curr_loss if ((i == 0) and (e == 0)) \n",
263 |     "                       else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)\n",
264 |     "        \n",
265 |     "        if i > 0 and i % 200 == 0:\n",
266 |     "            print('Batch %d. Loss: %f' % (i, moving_loss))\n",
267 |     "            \n",
268 |     "    test_accuracy = evaluate_accuracy(test_data, net)\n",
269 |     "    train_accuracy = evaluate_accuracy(train_data, net)\n",
270 |     "    print(\"Epoch %s. Loss: %s, Train_acc %s, Test_acc %s\" % (e, moving_loss, train_accuracy, test_accuracy))    "
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "## Next\n",
278 |     "[Batch normalization from scratch](../chapter04_convolutional-neural-networks/cnn-batch-norm-scratch.ipynb)"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "markdown",
283 |    "metadata": {},
284 |    "source": [
285 |     "For whinges or inquiries, [open an issue on  GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)"
286 |    ]
287 |   }
288 |  ],
289 |  "metadata": {
290 |   "kernelspec": {
291 |    "display_name": "Python 3",
292 |    "language": "python",
293 |    "name": "python3"
294 |   },
295 |   "language_info": {
296 |    "codemirror_mode": {
297 |     "name": "ipython",
298 |     "version": 3
299 |    },
300 |    "file_extension": ".py",
301 |    "mimetype": "text/x-python",
302 |    "name": "python",
303 |    "nbconvert_exporter": "python",
304 |    "pygments_lexer": "ipython3",
305 |    "version": "3.6.1"
306 |   }
307 |  },
308 |  "nbformat": 4,
309 |  "nbformat_minor": 2
310 | }
311 | 


--------------------------------------------------------------------------------
/chapter07_distributed-learning/multiple-gpus-gluon.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#  Training on multiple GPUs with `gluon`\n",
  8 |     "\n",
  9 |     "Gluon makes it easy to implement data parallel training.\n",
 10 |     "In this notebook, we'll implement data parallel training for a convolutional neural network.\n",
 11 |     "If you'd like a finer grained view of the concepts, \n",
 12 |     "you might want to first read the previous notebook,\n",
 13 |     "[multi gpu from scratch](./multiple-gpus-scratch.ipynb) with `gluon`.\n",
 14 |     "\n",
 15 |     "To get started, let's first define a simple convolutional neural network and loss function."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 1,
 21 |    "metadata": {
 22 |     "collapsed": true
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import mxnet as mx\n",
 27 |     "from mxnet import nd, gluon, autograd\n",
 28 |     "net = gluon.nn.Sequential(prefix='cnn_')\n",
 29 |     "with net.name_scope():\n",
 30 |     "    net.add(gluon.nn.Conv2D(channels=20, kernel_size=3, activation='relu'))\n",
 31 |     "    net.add(gluon.nn.MaxPool2D(pool_size=(2,2), strides=(2,2)))\n",
 32 |     "    net.add(gluon.nn.Conv2D(channels=50, kernel_size=5, activation='relu'))\n",
 33 |     "    net.add(gluon.nn.MaxPool2D(pool_size=(2,2), strides=(2,2)))\n",
 34 |     "    net.add(gluon.nn.Flatten())\n",
 35 |     "    net.add(gluon.nn.Dense(128, activation=\"relu\"))\n",
 36 |     "    net.add(gluon.nn.Dense(10))\n",
 37 |     "    \n",
 38 |     "loss = gluon.loss.SoftmaxCrossEntropyLoss()"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "## Initialize on multiple devices\n",
 46 |     "\n",
 47 |     "Gluon supports initialization of network parameters over multiple devices. We accomplish this by passing in an array of device contexts, instead of the single contexts we've used in earlier notebooks.\n",
 48 |     "When we pass in an array of contexts, the parameters are initialized \n",
 49 |     "to be identical across all of our devices."
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 2,
 55 |    "metadata": {
 56 |     "collapsed": true
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "GPU_COUNT = 2 # increase if you have more\n",
 61 |     "ctx = [mx.gpu(i) for i in range(GPU_COUNT)]\n",
 62 |     "net.collect_params().initialize(ctx=ctx)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "Given a batch of input data,\n",
 70 |     "we can split it into parts (equal to the number of contexts) \n",
 71 |     "by calling `gluon.utils.split_and_load(batch, ctx)`.\n",
 72 |     "The `split_and_load` function doesn't just split the data,\n",
 73 |     "it also loads each part onto the appropriate device context. \n",
 74 |     "\n",
 75 |     "So now when we call the forward pass on two separate parts,\n",
 76 |     "each one is computed on the appropriate corresponding device and using the version of the parameters stored there."
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 3,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "name": "stdout",
 86 |      "output_type": "stream",
 87 |      "text": [
 88 |       "\n",
 89 |       "[[-0.01876061 -0.02165037 -0.01293943  0.03837404 -0.00821797 -0.00911531\n",
 90 |       "   0.00416799 -0.00729158 -0.00232711 -0.00155549]\n",
 91 |       " [ 0.00441474 -0.01953595 -0.00128483  0.02768224  0.01389615 -0.01320441\n",
 92 |       "  -0.01166505 -0.00637776  0.0135425  -0.00611765]]\n",
 93 |       "<NDArray 2x10 @gpu(0)>\n",
 94 |       "\n",
 95 |       "[[ -6.78736670e-03  -8.86893831e-03  -1.04004676e-02   1.72976423e-02\n",
 96 |       "    2.26115398e-02  -6.36630831e-03  -1.54974898e-02  -1.22633884e-02\n",
 97 |       "    1.19591374e-02  -6.60043515e-05]\n",
 98 |       " [ -1.17358668e-02  -2.16879714e-02   1.71219767e-03   2.49827504e-02\n",
 99 |       "    1.16810966e-02  -9.52543691e-03  -1.03610428e-02   5.08510228e-03\n",
100 |       "    7.06662657e-03  -9.25292261e-03]]\n",
101 |       "<NDArray 2x10 @gpu(1)>\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "from mxnet.test_utils import get_mnist\n",
107 |     "mnist = get_mnist()\n",
108 |     "batch = mnist['train_data'][0:GPU_COUNT*2, :]\n",
109 |     "data = gluon.utils.split_and_load(batch, ctx)\n",
110 |     "print(net(data[0]))\n",
111 |     "print(net(data[1]))"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "At any time, we can access the version of the parameters stored on each device. \n",
119 |     "Recall from the first Chapter that our weights may not actually be initialized\n",
120 |     "when we call `initialize` because the parameter shapes may not yet be known. \n",
121 |     "In these cases, initialization is deferred pending shape inference. "
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 4,
127 |    "metadata": {
128 |     "scrolled": true
129 |    },
130 |    "outputs": [
131 |     {
132 |      "name": "stdout",
133 |      "output_type": "stream",
134 |      "text": [
135 |       "=== channel 0 of the first conv on gpu(0) ===\n",
136 |       "[[[ 0.04118239  0.05352169 -0.04762455]\n",
137 |       "  [ 0.06035256 -0.01528978  0.04946674]\n",
138 |       "  [ 0.06110793 -0.00081179  0.02191102]]]\n",
139 |       "<NDArray 1x3x3 @gpu(0)>\n",
140 |       "=== channel 0 of the first conv on gpu(1) ===\n",
141 |       "[[[ 0.04118239  0.05352169 -0.04762455]\n",
142 |       "  [ 0.06035256 -0.01528978  0.04946674]\n",
143 |       "  [ 0.06110793 -0.00081179  0.02191102]]]\n",
144 |       "<NDArray 1x3x3 @gpu(1)>\n"
145 |      ]
146 |     }
147 |    ],
148 |    "source": [
149 |     "weight = net.collect_params()['cnn_conv0_weight']\n",
150 |     "\n",
151 |     "for c in ctx:\n",
152 |     "    print('=== channel 0 of the first conv on {} ==={}'.format(\n",
153 |     "        c, weight.data(ctx=c)[0]))\n",
154 |     "    "
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "Similarly, we can access the gradients on each of the GPUs. Because each GPU gets a different part of the batch (a different subset of examples), the gradients on each GPU vary. "
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 5,
167 |    "metadata": {
168 |     "scrolled": true
169 |    },
170 |    "outputs": [
171 |     {
172 |      "name": "stdout",
173 |      "output_type": "stream",
174 |      "text": [
175 |       "=== grad of channel 0 of the first conv2d on gpu(0) ===\n",
176 |       "[[[-0.02078936 -0.00562428  0.01711007]\n",
177 |       "  [ 0.01138539  0.0280002   0.04094725]\n",
178 |       "  [ 0.00993335  0.01218192  0.02122578]]]\n",
179 |       "<NDArray 1x3x3 @gpu(0)>\n",
180 |       "=== grad of channel 0 of the first conv2d on gpu(1) ===\n",
181 |       "[[[-0.02543036 -0.02789939 -0.00302115]\n",
182 |       "  [-0.04816786 -0.03347274 -0.00403483]\n",
183 |       "  [-0.03178394 -0.01254033  0.00855637]]]\n",
184 |       "<NDArray 1x3x3 @gpu(1)>\n"
185 |      ]
186 |     }
187 |    ],
188 |    "source": [
189 |     "def forward_backward(net, data, label):\n",
190 |     "    with autograd.record():\n",
191 |     "        losses = [loss(net(X), Y) for X, Y in zip(data, label)]\n",
192 |     "    for l in losses:\n",
193 |     "        l.backward()\n",
194 |     "        \n",
195 |     "label = gluon.utils.split_and_load(mnist['train_label'][0:4], ctx)\n",
196 |     "forward_backward(net, data, label)\n",
197 |     "for c in ctx:\n",
198 |     "    print('=== grad of channel 0 of the first conv2d on {} ==={}'.format(\n",
199 |     "        c, weight.grad(ctx=c)[0]))"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "## Put all things together\n",
207 |     "\n",
208 |     "Now we can implement the remaining functions. Most of them are the same as [when we did everything by hand](./chapter07_distributed-learning/multiple-gpus-scratch.ipynb); one notable difference is that if a `gluon` trainer recognizes multi-devices, it will automatically aggregate the gradients and synchronize the parameters. "
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {
215 |     "scrolled": true
216 |    },
217 |    "outputs": [
218 |     {
219 |      "name": "stdout",
220 |      "output_type": "stream",
221 |      "text": [
222 |       "Running on [gpu(0)]\n",
223 |       "Batch size is 64\n",
224 |       "Epoch 0, training time = 5.0 sec\n",
225 |       "         validation accuracy = 0.9738\n",
226 |       "Epoch 1, training time = 4.8 sec\n",
227 |       "         validation accuracy = 0.9841\n",
228 |       "Epoch 2, training time = 4.7 sec\n",
229 |       "         validation accuracy = 0.9863\n",
230 |       "Epoch 3, training time = 4.7 sec\n",
231 |       "         validation accuracy = 0.9868\n",
232 |       "Epoch 4, training time = 4.7 sec\n",
233 |       "         validation accuracy = 0.9877\n",
234 |       "Running on [gpu(0), gpu(1)]\n",
235 |       "Batch size is 128\n"
236 |      ]
237 |     }
238 |    ],
239 |    "source": [
240 |     "from mxnet.io import NDArrayIter\n",
241 |     "from time import time\n",
242 |     "\n",
243 |     "def train_batch(batch, ctx, net, trainer):\n",
244 |     "    # split the data batch and load them on GPUs\n",
245 |     "    data = gluon.utils.split_and_load(batch.data[0], ctx)\n",
246 |     "    label = gluon.utils.split_and_load(batch.label[0], ctx)\n",
247 |     "    # compute gradient\n",
248 |     "    forward_backward(net, data, label)\n",
249 |     "    # update parameters\n",
250 |     "    trainer.step(batch.data[0].shape[0])\n",
251 |     "    \n",
252 |     "def valid_batch(batch, ctx, net):\n",
253 |     "    data = batch.data[0].as_in_context(ctx[0])\n",
254 |     "    pred = nd.argmax(net(data), axis=1)\n",
255 |     "    return nd.sum(pred == batch.label[0].as_in_context(ctx[0])).asscalar()    \n",
256 |     "\n",
257 |     "def run(num_gpus, batch_size, lr):    \n",
258 |     "    # the list of GPUs will be used\n",
259 |     "    ctx = [mx.gpu(i) for i in range(num_gpus)]\n",
260 |     "    print('Running on {}'.format(ctx))\n",
261 |     "    \n",
262 |     "    # data iterator\n",
263 |     "    mnist = get_mnist()\n",
264 |     "    train_data = NDArrayIter(mnist[\"train_data\"], mnist[\"train_label\"], batch_size)\n",
265 |     "    valid_data = NDArrayIter(mnist[\"test_data\"], mnist[\"test_label\"], batch_size)\n",
266 |     "    print('Batch size is {}'.format(batch_size))\n",
267 |     "    \n",
268 |     "    net.collect_params().initialize(force_reinit=True, ctx=ctx)\n",
269 |     "    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})\n",
270 |     "    for epoch in range(5):\n",
271 |     "        # train\n",
272 |     "        start = time()\n",
273 |     "        train_data.reset()\n",
274 |     "        for batch in train_data:\n",
275 |     "            train_batch(batch, ctx, net, trainer)\n",
276 |     "        nd.waitall()  # wait until all computations are finished to benchmark the time\n",
277 |     "        print('Epoch %d, training time = %.1f sec'%(epoch, time()-start))\n",
278 |     "        \n",
279 |     "        # validating\n",
280 |     "        valid_data.reset()\n",
281 |     "        correct, num = 0.0, 0.0\n",
282 |     "        for batch in valid_data:\n",
283 |     "            correct += valid_batch(batch, ctx, net)\n",
284 |     "            num += batch.data[0].shape[0]                \n",
285 |     "        print('         validation accuracy = %.4f'%(correct/num))\n",
286 |     "        \n",
287 |     "run(1, 64, .3)        \n",
288 |     "run(GPU_COUNT, 64*GPU_COUNT, .3)            "
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": [
295 |     "## Conclusion\n",
296 |     "\n",
297 |     "Both parameters and trainers in `gluon` support multi-devices. Moving from one device to multi-devices is straightforward. "
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "## Next\n",
305 |     "[Distributed training with multiple machines](../chapter07_distributed-learning/training-with-multiple-machines.ipynb)"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "markdown",
310 |    "metadata": {},
311 |    "source": [
312 |     "For whinges or inquiries, [open an issue on  GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)"
313 |    ]
314 |   }
315 |  ],
316 |  "metadata": {
317 |   "anaconda-cloud": {},
318 |   "kernelspec": {
319 |    "display_name": "Python 3",
320 |    "language": "python",
321 |    "name": "python3"
322 |   },
323 |   "language_info": {
324 |    "codemirror_mode": {
325 |     "name": "ipython",
326 |     "version": 3
327 |    },
328 |    "file_extension": ".py",
329 |    "mimetype": "text/x-python",
330 |    "name": "python",
331 |    "nbconvert_exporter": "python",
332 |    "pygments_lexer": "ipython3",
333 |    "version": "3.4.3"
334 |   }
335 |  },
336 |  "nbformat": 4,
337 |  "nbformat_minor": 2
338 | }
339 | 


--------------------------------------------------------------------------------
/chapter07_distributed-learning/training-with-multiple-machines.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Distributed training with multiple machines\n",
  8 |     "\n",
  9 |     "In the previous two tutorials, we saw \n",
 10 |     "that using multiple GPUs within a machine \n",
 11 |     "can accelerate training. \n",
 12 |     "The speedup, however, is limited \n",
 13 |     "by the number of GPUs installed in that machine.\n",
 14 |     "And it's rare to find a single machine with more than 16 GPUs nowadays. \n",
 15 |     "For some truly large-scale applications, \n",
 16 |     "this speedup might still be insufficient.\n",
 17 |     "For example, it could still take many days \n",
 18 |     "to train a state-of-the-art CNN on millions of images.\n",
 19 |     "\n",
 20 |     "In this tutorial, we'll discuss the key concepts you'll need \n",
 21 |     "in order to go from a program that does single-machine training\n",
 22 |     "to one that executes distributed training across multiple machines. \n",
 23 |     "We depict a typical distributed system in the following figure, where\n",
 24 |     "multiple machines are connected by network switches.\n",
 25 |     "\n",
 26 |     "![](../img/multi-machines.svg)\n",
 27 |     "\n",
 28 |     "Note that the way we used `copyto` to copy data from one GPU to another in the [multiple-GPU tutorial](../multiple-gpus-scratch.ipynb) does not work when our GPUs are sitting on different machines. To make use of the available resources here well need a better abstraction."
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {
 34 |     "collapsed": true
 35 |    },
 36 |    "source": [
 37 |     "## Key-value store\n",
 38 |     "\n",
 39 |     "MXNet provides a key-value store to synchronize data among devices. The following code initializes an `ndarray` associated with the key \"weight\" on a key-value store."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 1,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stdout",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "=== init \"weight\" ===\n",
 52 |       "[[ 0.54881352  0.59284461  0.71518934]\n",
 53 |       " [ 0.84426576  0.60276335  0.85794562]]\n",
 54 |       "<NDArray 2x3 @cpu(0)>\n"
 55 |      ]
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "from mxnet import kv, nd\n",
 60 |     "store = kv.create('local')\n",
 61 |     "shape = (2, 3)\n",
 62 |     "x = nd.random_uniform(shape=shape)\n",
 63 |     "store.init('weight', x) \n",
 64 |     "print('=== init \"weight\" ==={}'.format(x))"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "After initialization, we can pull the value to multiple devices. "
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 2,
 77 |    "metadata": {
 78 |     "scrolled": true
 79 |    },
 80 |    "outputs": [
 81 |     {
 82 |      "name": "stdout",
 83 |      "output_type": "stream",
 84 |      "text": [
 85 |       "=== pull \"weight\" to [gpu(0), gpu(1)] ===\n",
 86 |       "[\n",
 87 |       "[[ 0.54881352  0.59284461  0.71518934]\n",
 88 |       " [ 0.84426576  0.60276335  0.85794562]]\n",
 89 |       "<NDArray 2x3 @gpu(0)>, \n",
 90 |       "[[ 0.54881352  0.59284461  0.71518934]\n",
 91 |       " [ 0.84426576  0.60276335  0.85794562]]\n",
 92 |       "<NDArray 2x3 @gpu(1)>]\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "from mxnet import gpu\n",
 98 |     "ctx = [gpu(0), gpu(1)]\n",
 99 |     "y = [nd.zeros(shape, ctx=c) for c in ctx]\n",
100 |     "store.pull('weight', out=y)\n",
101 |     "print('=== pull \"weight\" to {} ===\\n{}'.format(ctx, y))"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "We can also push new data value into the store. It will first sum the data on the same key and then overwrite the current value."
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 3,
114 |    "metadata": {
115 |     "scrolled": true
116 |    },
117 |    "outputs": [
118 |     {
119 |      "name": "stdout",
120 |      "output_type": "stream",
121 |      "text": [
122 |       "=== push to \"weight\" ===\n",
123 |       "[\n",
124 |       "[[ 1.  1.  1.]\n",
125 |       " [ 1.  1.  1.]]\n",
126 |       "<NDArray 2x3 @gpu(0)>, \n",
127 |       "[[ 2.  2.  2.]\n",
128 |       " [ 2.  2.  2.]]\n",
129 |       "<NDArray 2x3 @gpu(1)>]\n",
130 |       "=== pull \"weight\" ===\n",
131 |       "[\n",
132 |       "[[ 3.  3.  3.]\n",
133 |       " [ 3.  3.  3.]]\n",
134 |       "<NDArray 2x3 @gpu(0)>, \n",
135 |       "[[ 3.  3.  3.]\n",
136 |       " [ 3.  3.  3.]]\n",
137 |       "<NDArray 2x3 @gpu(1)>]\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "z = [nd.ones(shape, ctx=ctx[i])+i for i in range(len(ctx))]\n",
143 |     "store.push('weight', z)\n",
144 |     "print('=== push to \"weight\" ===\\n{}'.format(z))\n",
145 |     "store.pull('weight', out=y)\n",
146 |     "print('=== pull \"weight\" ===\\n{}'.format(y))"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "With `push` and `pull` we can replace the `allreduce` function defined in [multiple-gpus-scratch](P14-C02-multiple-gpus-scratch.ipynb) by\n",
154 |     "\n",
155 |     "```python\n",
156 |     "def allreduce(data, data_name, store):\n",
157 |     "    store.push(data_name, data)\n",
158 |     "    store.pull(data_name, out=data)\n",
159 |     "```"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "## Distributed key-value store\n",
167 |     "\n",
168 |     "Not only can we synchronize data within a machine, with the key-value store we can facilitate inter-machine communication. To use it, one can create a distributed kvstore by using the following command: (Note: distributed key-value store requires `MXNet` to be compiled with the flag `USE_DIST_KVSTORE=1`, e.g. `make USE_DIST_KVSTORE=1`.)\n",
169 |     "\n",
170 |     "```python\n",
171 |     "store = kv.create('dist')\n",
172 |     "```\n",
173 |     "\n",
174 |     "Now if we run the code from the previous section on two machines at the same time, then the store will aggregate the two ndarrays pushed from each machine, and after that, the pulled results will be: \n",
175 |     "\n",
176 |     "```\n",
177 |     "[[ 6.  6.  6.]\n",
178 |     " [ 6.  6.  6.]]\n",
179 |     "```\n",
180 |     "\n",
181 |     "In the distributed setting, `MXNet` launches three kinds of processes (each time, running `python myprog.py` will create a process). One is a *worker*, which runs the user program, such as the code in the previous section. The other two are the *server*, which maintains the data pushed into the store, and the *scheduler*, which monitors the aliveness of each node.\n",
182 |     "\n",
183 |     "It's up to users which machines to run these processes on. But to simplify the process placement and launching, MXNet provides a tool located at [tools/launch.py](https://github.com/dmlc/mxnet/blob/master/tools/launch.py). \n",
184 |     "\n",
185 |     "Assume there are two machines, A and B. They are ssh-able, and their IPs are saved in a file named `hostfile`. Then we can start one worker in each machine through: \n",
186 |     "\n",
187 |     "```\n",
188 |     "$ mxnet_path/tools/launch.py -H hostfile -n 2 python myprog.py\n",
189 |     "```\n",
190 |     "\n",
191 |     "It will also start a server in each machine, and the scheduler on the same machine we are currently on.\n",
192 |     "\n",
193 |     "![](img/dist_kv.svg)"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "## Using `kvstore` in `gluon`\n",
201 |     "\n",
202 |     "As mentioned in [our section on training with multiple GPUs from scratch](multiple-gpus-scratch.ipynb#data-parallelism), to implement data parallelism we just need to specify \n",
203 |     "\n",
204 |     "- how to split data\n",
205 |     "- how to synchronize gradients and weights\n",
206 |     "\n",
207 |     "We already see from [multiple-gpu-gluon](P14-C03-multiple-gpus-gluon.ipynb#put-all-things-together) that a `gluon` trainer can automatically aggregate the gradients among different GPUs. What it really does is having a key-value store with type `local` within it. Therefore, to change to multi-machine training we only need to pass a distributed key-value store, for example,\n",
208 |     "\n",
209 |     "```python\n",
210 |     "store = kv.create('dist')\n",
211 |     "trainer = gluon.Trainer(..., kvstore=store)\n",
212 |     "```\n",
213 |     "\n",
214 |     "To split the data, however, we cannot directly copy the previous approach. One commonly used solution is to split the whole dataset into *k* parts at the beginning, then let the *i*-th worker only read the *i*-th part of the data.\n",
215 |     "\n",
216 |     "We can obtain the total number of workers by reading the attribute `num_workers` and the rank of the current worker from the attribute `rank`."
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 4,
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "name": "stdout",
226 |      "output_type": "stream",
227 |      "text": [
228 |       "total number of workers: 1\n",
229 |       "my rank among workers: 0\n"
230 |      ]
231 |     }
232 |    ],
233 |    "source": [
234 |     "print('total number of workers: %d'%(store.num_workers))\n",
235 |     "print('my rank among workers: %d'%(store.rank))"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "With this information, we can manually access the proper chunk of the input data. In addition, several data iterators provided by `MXNet` already support reading only part of the data. For example,\n",
243 |     "\n",
244 |     "```python\n",
245 |     "from mxnet.io import ImageRecordIter\n",
246 |     "data = ImageRecordIter(num_parts=store.num_workers, part_index=store.rank, ...)\n",
247 |     "```"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {
253 |     "collapsed": true
254 |    },
255 |    "source": [
256 |     "For whinges or inquiries, [open an issue on  GitHub.](https://github.com/zackchase/mxnet-the-straight-dope)"
257 |    ]
258 |   }
259 |  ],
260 |  "metadata": {
261 |   "kernelspec": {
262 |    "display_name": "Python 3",
263 |    "language": "python",
264 |    "name": "python3"
265 |   },
266 |   "language_info": {
267 |    "codemirror_mode": {
268 |     "name": "ipython",
269 |     "version": 3
270 |    },
271 |    "file_extension": ".py",
272 |    "mimetype": "text/x-python",
273 |    "name": "python",
274 |    "nbconvert_exporter": "python",
275 |    "pygments_lexer": "ipython3",
276 |    "version": "3.4.3"
277 |   }
278 |  },
279 |  "nbformat": 4,
280 |  "nbformat_minor": 2
281 | }
282 | 


--------------------------------------------------------------------------------
/chapter11_recommender-systems/intro-recommender-systems.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Introduction to recommender systems\n",
  8 |     "[Early, early draft]\n",
  9 |     "\n",
 10 |     "This chapter introduces recommender systems (commonly called RecSys),\n",
 11 |     "tools that recommmend *items* to *users*.\n",
 12 |     "Many of the most popular uses of recommender systems \n",
 13 |     "involve to suggesting products to customers.\n",
 14 |     "Amazon, for example, uses recommender systems to choose which retail products to display.\n",
 15 |     "Recommender systems aren't limited to physical products. \n",
 16 |     "For example, the algorithms that Pandora and Spotify use to curate playlists\n",
 17 |     "are recommender systems.\n",
 18 |     "Personalized suggestions on news websites are recommender systems.\n",
 19 |     "And as of this writing, several carousels on the home page for \n",
 20 |     "Amazon's Prime Videos's contain personalized TV and Movie recommendations.\n",
 21 |     "\n",
 22 |     "![](../img/recommended-prime-tv.png)\n",
 23 |     "\n",
 24 |     "I (Zack) have honestly no idea why Amazon wants me to watch Bubble Guppies. \n",
 25 |     "It's possible that Bubble Guppies is a masterpiece,\n",
 26 |     "and the recommender systems knows that my life will change upon watching it.\n",
 27 |     "It's also possible that the recommender made a mistake.\n",
 28 |     "For example, it might have extrapolated incorrectly from my affinity for the anime Death Note,\n",
 29 |     "thinking that I would similarly love any animated series.\n",
 30 |     "And, since I've never rated a nickelodean series (either postiively or negatively),\n",
 31 |     "the system may have no knowledge to the contrary.\n",
 32 |     "It's also possible that this series is a new addition to the catalogue,\n",
 33 |     "and thus they need to recommend the item to many users in ordder to develop a sense of *who* likes Bubble Guppies.\n",
 34 |     "This problem, of sorting out how to handle a new item, is called the *cold-start* problem.\n",
 35 |     "\n",
 36 |     "\n",
 37 |     "A recommender system doesn't have to use any sophisticated machine learning techniques.\n",
 38 |     "And it doesn't even have to be personalized.\n",
 39 |     "One reasonable baseline for most applications \n",
 40 |     "is to suggest the most popular items to everyone. \n",
 41 |     "But we have to be careful.\n",
 42 |     "Depending on how we define popularity,\n",
 43 |     "we might create a feedback loop.\n",
 44 |     "The most popular items get recommended which makes them even more popular,\n",
 45 |     "which makes them even more frequently recommended, etc.\n",
 46 |     "\n",
 47 |     "For services with diverse users,\n",
 48 |     "however, personalization can be essential.\n",
 49 |     "Diapers are among the most popular items on Amazon,\n",
 50 |     "but we probably shouldn't recommend diapers \n",
 51 |     "to adolescents. \n",
 52 |     "We also probably *should not* recommend anything associated with Justin Bieber\n",
 53 |     "to a user who *isn't* an adolescent. \n",
 54 |     "Moreover, we might want to personalize, not only to the user, but to the context.\n",
 55 |     "For example, just after I bought a Pixel phone,\n",
 56 |     "I was in the market for a phone case.\n",
 57 |     "But I have no interested in buying a phone case one year later.\n",
 58 |     "\n",
 59 |     "\n",
 60 |     "## Many ways to pose the problem \n",
 61 |     "\n",
 62 |     "While it might seem obvious,\n",
 63 |     "that personalization is a good strategy,\n",
 64 |     "it's not immediately obvious how best to articualate \n",
 65 |     "recommendation as a machine learning problem. \n",
 66 |     "\n",
 67 |     "Discuss:\n",
 68 |     "* Rating prediction\n",
 69 |     "* Passive feedback (view/notview)\n",
 70 |     "* Content-based recommendation\n",
 71 |     "\n",
 72 |     "## Amazon review dataset\n",
 73 |     "\n",
 74 |     "* introduce dataset\n"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 5,
 80 |    "metadata": {
 81 |     "collapsed": true
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "import mxnet\n",
 86 |     "import mxnet.ndarray as nd\n",
 87 |     "import urllib\n",
 88 |     "import gzip"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 10,
 94 |    "metadata": {
 95 |     "collapsed": true
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "with gzip.open(urllib.request.urlopen(\"http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Grocery_and_Gourmet_Food_5.json.gz\")) as f:\n",
100 |     "    data = [eval(l) for l in f]\n"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 11,
106 |    "metadata": {},
107 |    "outputs": [
108 |     {
109 |      "data": {
110 |       "text/plain": [
111 |        "{'asin': '616719923X',\n",
112 |        " 'helpful': [0, 0],\n",
113 |        " 'overall': 4.0,\n",
114 |        " 'reviewText': 'Just another flavor of Kit Kat but the taste is unique and a bit different.  The only thing that is bothersome is the price.  I thought it was a bit expensive....',\n",
115 |        " 'reviewTime': '06 1, 2013',\n",
116 |        " 'reviewerID': 'A1VEELTKS8NLZB',\n",
117 |        " 'reviewerName': 'Amazon Customer',\n",
118 |        " 'summary': 'Good Taste',\n",
119 |        " 'unixReviewTime': 1370044800}"
120 |       ]
121 |      },
122 |      "execution_count": 11,
123 |      "metadata": {},
124 |      "output_type": "execute_result"
125 |     }
126 |    ],
127 |    "source": [
128 |     "data[0]"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "## [Do some dataset exploration]\n",
136 |     "* Look at the average rating\n",
137 |     "* Look at the number of unique users and items\n",
138 |     "* Plot a histogram of the number of ratings/reviews corresponding to each user\n",
139 |     "* \"\" for items"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 17,
145 |    "metadata": {
146 |     "collapsed": true
147 |    },
148 |    "outputs": [],
149 |    "source": [
150 |     "users = [d['reviewerID'] for d in data]"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 18,
156 |    "metadata": {
157 |     "collapsed": true
158 |    },
159 |    "outputs": [],
160 |    "source": [
161 |     "items = [d['asin'] for d in data]"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 14,
167 |    "metadata": {
168 |     "collapsed": true
169 |    },
170 |    "outputs": [],
171 |    "source": [
172 |     "ratings = [d['overall'] for d in data]"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "## Models \n",
180 |     "* Just the average\n",
181 |     "* Offset plus user and item biases\n",
182 |     "* Latent factor model / matrix factorization"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {
189 |     "collapsed": true
190 |    },
191 |    "outputs": [],
192 |    "source": []
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {
198 |     "collapsed": true
199 |    },
200 |    "outputs": [],
201 |    "source": []
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "metadata": {
207 |     "collapsed": true
208 |    },
209 |    "outputs": [],
210 |    "source": []
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": null,
215 |    "metadata": {
216 |     "collapsed": true
217 |    },
218 |    "outputs": [],
219 |    "source": []
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {
225 |     "collapsed": true
226 |    },
227 |    "outputs": [],
228 |    "source": []
229 |   }
230 |  ],
231 |  "metadata": {
232 |   "kernelspec": {
233 |    "display_name": "Python 3",
234 |    "language": "python",
235 |    "name": "python3"
236 |   },
237 |   "language_info": {
238 |    "codemirror_mode": {
239 |     "name": "ipython",
240 |     "version": 3
241 |    },
242 |    "file_extension": ".py",
243 |    "mimetype": "text/x-python",
244 |    "name": "python",
245 |    "nbconvert_exporter": "python",
246 |    "pygments_lexer": "ipython3",
247 |    "version": "3.4.3"
248 |   }
249 |  },
250 |  "nbformat": 4,
251 |  "nbformat_minor": 2
252 | }
253 | 


--------------------------------------------------------------------------------
/chapter11_recommender-systems/introduction-to-recommender-systems.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Introduction to recommender systems\n",
  8 |     "\n",
  9 |     "* Explain recsys from the beginning\n",
 10 |     "* Introduce task of rating prediction\n",
 11 |     "* introduce dataset\n"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 5,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import mxnet\n",
 23 |     "import mxnet.ndarray as nd\n",
 24 |     "import urllib\n",
 25 |     "import gzip"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 10,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "with gzip.open(urllib.request.urlopen(\"http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Grocery_and_Gourmet_Food_5.json.gz\")) as f:\n",
 35 |     "    data = [eval(l) for l in f]\n"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 11,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "data": {
 45 |       "text/plain": [
 46 |        "{'asin': '616719923X',\n",
 47 |        " 'helpful': [0, 0],\n",
 48 |        " 'overall': 4.0,\n",
 49 |        " 'reviewText': 'Just another flavor of Kit Kat but the taste is unique and a bit different.  The only thing that is bothersome is the price.  I thought it was a bit expensive....',\n",
 50 |        " 'reviewTime': '06 1, 2013',\n",
 51 |        " 'reviewerID': 'A1VEELTKS8NLZB',\n",
 52 |        " 'reviewerName': 'Amazon Customer',\n",
 53 |        " 'summary': 'Good Taste',\n",
 54 |        " 'unixReviewTime': 1370044800}"
 55 |       ]
 56 |      },
 57 |      "execution_count": 11,
 58 |      "metadata": {},
 59 |      "output_type": "execute_result"
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "data[0]"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "## [Do some dataset exploration]\n",
 71 |     "* Look at the average rating\n",
 72 |     "* Look at the number of unique users and items\n",
 73 |     "* Plot a histogram of the number of ratings/reviews corresponding to each user\n",
 74 |     "* \"\" for items"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 17,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "users = [d['reviewerID'] for d in data]"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 18,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "items = [d['asin'] for d in data]"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 14,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "ratings = [d['overall'] for d in data]"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "## Models \n",
111 |     "* Just the average\n",
112 |     "* Offset plus user and item biases\n",
113 |     "* Latent factor model / matrix factorization"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {
120 |     "collapsed": true
121 |    },
122 |    "outputs": [],
123 |    "source": []
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {
129 |     "collapsed": true
130 |    },
131 |    "outputs": [],
132 |    "source": []
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {
138 |     "collapsed": true
139 |    },
140 |    "outputs": [],
141 |    "source": []
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {
147 |     "collapsed": true
148 |    },
149 |    "outputs": [],
150 |    "source": []
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {
156 |     "collapsed": true
157 |    },
158 |    "outputs": [],
159 |    "source": []
160 |   }
161 |  ],
162 |  "metadata": {
163 |   "kernelspec": {
164 |    "display_name": "Python 3",
165 |    "language": "python",
166 |    "name": "python3"
167 |   },
168 |   "language_info": {
169 |    "codemirror_mode": {
170 |     "name": "ipython",
171 |     "version": 3
172 |    },
173 |    "file_extension": ".py",
174 |    "mimetype": "text/x-python",
175 |    "name": "python",
176 |    "nbconvert_exporter": "python",
177 |    "pygments_lexer": "ipython3",
178 |    "version": "3.4.3"
179 |   }
180 |  },
181 |  "nbformat": 4,
182 |  "nbformat_minor": 2
183 | }
184 | 


--------------------------------------------------------------------------------
/chapter13_unsupervised-learning/.gitignore:
--------------------------------------------------------------------------------
1 | *.gz
2 | *.params
3 | *.png
4 | 


--------------------------------------------------------------------------------
/docs/C01-install.md:
--------------------------------------------------------------------------------
 1 | # Run these tutorials
 2 | 
 3 | Each tutorial consists of a Jupyter notebook, which is editable and
 4 | runnable. To run these notebooks, you must have `python` installed.
 5 | Additionally, you'll need `jupyter` and a recent version of `mxnet`.  
 6 | The following commands install them through `pip`:
 7 | 
 8 | ```bash
 9 | # optional: update pip to the newest version
10 | sudo pip install --upgrade pip
11 | # install jupyter
12 | pip install jupyter --user
13 | # install the nightly built mxnet
14 | pip install mxnet --pre --user
15 | ```
16 | 
17 | The default `MXNet` package only supports CPU but some tutorials require
18 | GPUs. If you are running on a computer that has a GPU and either CUDA 7.5 
19 | or 8.0 is installed, then the following commands install a GPU-enabled 
20 | version of MXNet. 
21 | 
22 | ```bash
23 | pip install mxnet-cu80 --pre --user  # for CUDA 8.0
24 | pip install mxnet-cu90 --pre --user  # for CUDA 9.0
25 | ```
26 | 
27 | After completing installation, you're ready to obtain and run the source code:
28 | 
29 | ```bash
30 | git clone https://github.com/zackchase/mxnet-the-straight-dope/
31 | cd mxnet-the-straight-dope
32 | jupyter notebook 
33 | ```
34 | 
35 | The last command starts the Jupyter notebook. You can now run and edit the 
36 | notebooks in a web browser. If you're running the notebooks on a server,
37 | then you might want to ssh with the `-L` flag to tie localhost:8888
38 | on your machine and on the server:
39 | 
40 | ```
41 | ssh myserver -L 8888:localhost:8888
42 | ```
43 | 
44 | Pro tip: if you'd like to run your notebook on some other port (than 8888),
45 | launch it with:
46 | 
47 | ```jupyter notebook --port <port_number>```
48 | 


--------------------------------------------------------------------------------
/docs/C01-install.rst:
--------------------------------------------------------------------------------
 1 | Run these tutorials
 2 | ===========================
 3 | 
 4 | Each tutorial is made from a Jupyter notebook, which is editable and
 5 | runable. Assume ``python`` in already installed, then in additional, both
 6 | ``jupyter`` and a recent version of ``mxnet`` are required.  The following
 7 | commands install them through ``pip``:
 8 | 
 9 | .. code-block:: bash
10 | 
11 |    # optional: update pip to the newest version
12 |    sudo pip install --upgrade pip
13 |    # install jupyter
14 |    pip install jupyter --user
15 |    # install the nightly built mxnet
16 |    pip install mxnet --pre --user
17 | 
18 | The default ``MXNet`` package only supports CPU while some tutorials may need
19 | GPUs. If GPU is available and either CUDA 7.5 or 8.0 is installed, then we can
20 | install the GPU-supported package
21 | 
22 | .. code-block:: bash
23 | 
24 |    pip install mxnet-cu80 --pre --user  # for CUDA 8.0
25 |    pip install mxnet-cu90 --pre --user  # for CUDA 9.0
26 | 
27 | Now we are ready to obtain the source codes and run them
28 | 
29 | .. code-block:: bash
30 | 
31 |    git clone https://github.com/zackchase/mxnet-the-straight-dope/
32 |    cd mxnet-the-straight-dope
33 |    jupyter notebook
34 | 
35 | The last command starts the jupyter notebook, and now you can edit and run these
36 | tutorials now.
37 | 


--------------------------------------------------------------------------------
/docs/C02-contribute.md:
--------------------------------------------------------------------------------
 1 | # How to contribute
 2 | 
 3 | For whinges and inquiries, please open
 4 | [an issue at github](https://github.com/zackchase/mxnet-the-straight-dope/issues).
 5 | 
 6 | To contribute codes, please follow the following guidelines:
 7 | 
 8 | 1. Check the
 9 |    [roadmap](https://github.com/zackchase/mxnet-the-straight-dope/#roadmap)
10 |    before creating a new tutorial.
11 | 
12 | 2. Only cover a single new concept on a tutorial, and explain it in detail. Do
13 |    not assume readers will know it before.
14 | 
15 | 3. Make both words and codes as simple as possible. Each tutorial should take
16 |    no more than 20 minutes to read
17 | 
18 | 4. Do not submit large files, such as dataset or images, to the repo. You can
19 |    upload them to a different repo and cross reference it. For example
20 | 
21 |    - Insert an image:
22 | 
23 |      ```
24 |      ![](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mnist.png)
25 |      ```
26 | 
27 |    - Download a dataset if not exists in local:
28 | 
29 |      ```
30 |      mx.test_utils.download('https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/ptb/ptb.train.txt')
31 |      ```
32 | 
33 | 5. Resize the images to proper sizes. Large size images look fine in notebook,
34 |    but they may be ugly in the HTML or PDF format.
35 | 
36 | 6. Either restart and evaluate all code blocks or clean all outputs before
37 |    submitting
38 | 
39 |    - For the former, you can click `Kernel -> Restart & Run All` in the
40 |      Jupyter notebook menu.
41 |    - For the latter, use `Kernel -> Restart & Clear Output`. Then our Jenkins
42 |      server will evaluate this notebook when building the documents. It is
43 |      recommended because it can be used as a unit test. But only do it if this
44 |      notebook is fast to run (e.g. less than 5 minutes) and does not require
45 |      GPU.
46 | 
47 | 7. You can build the documents locally to preview the changes. It requires GPU
48 |    is available with `CUDA 8.0` installed, and also `conda` is installed.  T
49 |    following commands create an environment with all requirements installed:
50 | 
51 |     ```bash
52 |     # assume at the root directory of this project
53 |     conda env create -f environment.yml
54 |     source activate gluon_docs
55 |     ```
56 | 
57 |    Now you are able to build the HTMLs::
58 | 
59 |     ```bash
60 |     make html
61 |     ```
62 | 


--------------------------------------------------------------------------------
/docs/C02-contribute.rst:
--------------------------------------------------------------------------------
 1 | How to contribute
 2 | ===================
 3 | 
 4 | For whinges and inquiries, please open `an issue at github
 5 | <https://github.com/zackchase/mxnet-the-straight-dope/issues>`_.
 6 | 
 7 | To contribute codes, please follow the following guidelines:
 8 | 
 9 | 1. Check the `roadmap
10 |    <https://github.com/zackchase/mxnet-the-straight-dope/#roadmap>`_ before
11 |    creating a new tutorial.
12 | 
13 | 2. Only cover a single new concept on a tutorial, and explain it in detail. Do
14 |    not assume readers will know it before.
15 | 
16 | 3. Make both words and codes as simple as possible. Each tutorial should take
17 |    no more than 20 minutes to read
18 | 
19 | 4. Do not submit large files, such as dataset or images, to the repo. You can
20 |    upload them to a different repo and cross reference it. For example
21 | 
22 |    - Insert an image::
23 | 
24 |        ![](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mnist.png)
25 | 
26 |    - Download a dataset if not exists in local::
27 | 
28 |        mx.test_utils.download('https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/ptb/ptb.train.txt')
29 | 
30 | 5. Resize the images to proper sizes. Large size images look fine in notebook,
31 |    but they may be ugly in the HTML or PDF format.
32 | 
33 | 6. Either restart and evaluate all code blocks or clean all outputs before
34 |    submitting
35 | 
36 |    - For the former, you can click ``Kernel -> Restart & Run All`` in the
37 |      Jupyter notebook menu.
38 |    - For the latter, use ``Kernel -> Restart & Clear Output``. Then our Jenkins
39 |      server will evaluate this notebook when building the documents. It is
40 |      recommended because it can be used as a unit test. But only do it if this
41 |      notebook is fast to run (e.g. less than 5 minutes) and does not require
42 |      GPU.
43 | 
44 | 7. (Update, this feature is not availabe for Jupyter now.) If you want to reference a function or class, use
45 |    `sphinx domains <http://www.sphinx-doc.org/en/stable/domains.html>`_. For example
46 | 
47 |    - function: ``:func:`mxnet.ndarray.zeros``` to :func:`mxnet.ndarray.zeros`
48 |    - class ``:class:`mxnet.gluon.Parameter``` to :class:`mxnet.gluon.Parameter`
49 |    - also works for numpy: ``:func:`numpy.zeros``` to :func:`numpy.zeros`
50 | 
51 | 8. You can build the documents locally to preview the changes. Assume ``conda``
52 |    is available, then following commands create an environment with all
53 |    requirements installed::
54 | 
55 |      # assume at the root directory of this project
56 |      conda env create -f environment.yml
57 |      source activate gluon_docs
58 | 
59 |    Now you are able to build the HTMLs::
60 | 
61 |      make html
62 | 
63 |    If latex is installed, you can also build the PDF version::
64 | 
65 |      make latex
66 |      make -C _build/latex
67 | 


--------------------------------------------------------------------------------
/docs/publish.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Build and publish all docs into Pulish all notebooks to mxnet.
 4 | set -x
 5 | set -e
 6 | 
 7 | NOTEBOOK_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
 8 | cd ${NOTEBOOK_DIR}
 9 | 
10 | # install a gpu version
11 | # sed -i.bak s/mxnet/mxnet-cu90/g environment.yml
12 | 
13 | # prepare the env
14 | conda env update -f environment.yml
15 | source activate gluon_docs
16 | 
17 | make html
18 | 
19 | rm -rf ~/www/latest
20 | mv _build/html ~/www/latest
21 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: gluon
 2 | dependencies:
 3 | - python
 4 | - libgfortran
 5 | - jupyter
 6 | - matplotlib
 7 | - pandas
 8 | - pip:
 9 |   - requests
10 |   - mxnet>=0.11.1b20171003
11 | 


--------------------------------------------------------------------------------
/img/Assault-clipped.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/Assault-clipped.png


--------------------------------------------------------------------------------
/img/Assault.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/Assault.png


--------------------------------------------------------------------------------
/img/Assualt_DDQN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/Assualt_DDQN.png


--------------------------------------------------------------------------------
/img/Assualt_DDQN_Clipped.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/Assualt_DDQN_Clipped.png


--------------------------------------------------------------------------------
/img/Pixel2pixel-Unet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/Pixel2pixel-Unet.png


--------------------------------------------------------------------------------
/img/bbb_nn_bayes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/bbb_nn_bayes.png


--------------------------------------------------------------------------------
/img/bbb_nn_classic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/bbb_nn_classic.png


--------------------------------------------------------------------------------
/img/berliner.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/berliner.jpg


--------------------------------------------------------------------------------
/img/cat-cartoon1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/cat-cartoon1.png


--------------------------------------------------------------------------------
/img/cat-cartoon2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/cat-cartoon2.png


--------------------------------------------------------------------------------
/img/cat1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/cat1.jpg


--------------------------------------------------------------------------------
/img/cat2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/cat2.jpg


--------------------------------------------------------------------------------
/img/catdog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/catdog.jpg


--------------------------------------------------------------------------------
/img/cgan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/cgan.png


--------------------------------------------------------------------------------
/img/comic-hot-dog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/comic-hot-dog.png


--------------------------------------------------------------------------------
/img/data-collection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/data-collection.png


--------------------------------------------------------------------------------
/img/dcgan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dcgan.png


--------------------------------------------------------------------------------
/img/death_cap.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/death_cap.jpg


--------------------------------------------------------------------------------
/img/deeplearning_amazon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/deeplearning_amazon.png


--------------------------------------------------------------------------------
/img/dist_kv.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" version="1.1" viewBox="79 51 643 302" width="643pt" height="302pt" xmlns:dc="http://purl.org/dc/elements/1.1/"><metadata> Produced by OmniGraffle 6.6.1 <dc:date>2017-07-24 05:08:25 +0000</dc:date></metadata><defs><font-face font-family="Helvetica Neue" font-size="16" panose-1="2 0 5 3 0 0 0 2 0 4" units-per-em="1000" underline-position="-100" underline-thickness="50" slope="0" x-height="517" cap-height="714" ascent="951.99585" descent="-212.99744" font-weight="500"><font-face-src><font-face-name name="HelveticaNeue"/></font-face-src></font-face><font-face font-family="Consolas" font-size="10.9" panose-1="2 11 6 9 2 2 4 3 2 4" units-per-em="1000" underline-position="-129.88281" underline-thickness="70.3125" slope="0" x-height="499.02344" cap-height="646.97266" ascent="742.6758" descent="-257.32422" font-weight="500"><font-face-src><font-face-name name="Consolas"/></font-face-src></font-face></defs><g stroke="none" stroke-opacity="1" stroke-dasharray="none" fill="none" fill-opacity="1"><title>Canvas 2</title><rect fill="white" width="1037" height="845"/><g><title>Layer 1</title><path d="M 413 99 L 703 99 C 707.41828 99 711 102.58172 711 107 L 711 334 C 711 338.41828 707.41828 342 703 342 L 413 342 C 408.58172 342 405 338.41828 405 334 L 405 107 C 405 102.58172 408.58172 99 413 99 Z" fill="white"/><path d="M 413 99 L 703 99 C 707.41828 99 711 102.58172 711 107 L 711 334 C 711 338.41828 707.41828 342 703 342 L 413 342 C 408.58172 342 405 338.41828 405 334 L 405 107 C 405 102.58172 408.58172 99 413 99 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><path d="M 98 99 L 379 99 C 383.41828 99 387 102.58172 387 107 L 387 334 C 387 338.41828 383.41828 342 379 342 L 98 342 C 93.58172 342 90 338.41828 90 334 L 90 107 C 90 102.58172 93.58172 99 98 99 Z" fill="white"/><path d="M 98 99 L 379 99 C 383.41828 99 387 102.58172 387 107 L 387 334 C 387 338.41828 383.41828 342 379 342 L 98 342 C 93.58172 342 90 338.41828 90 334 L 90 107 C 90 102.58172 93.58172 99 98 99 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><rect x="279" y="198" width="81" height="36" fill="#3a8eed"/><rect x="279" y="198" width="81" height="36" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(284 206.776)" fill="white"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" fill="white" x="3.508" y="15" textLength="14.816">W</tspan><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" fill="white" x="17.412" y="15" textLength="50.08">orker 0</tspan></text><rect x="432" y="198" width="81" height="36" fill="#3a8eed"/><rect x="432" y="198" width="81" height="36" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(437 206.776)" fill="white"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" fill="white" x="3.508" y="15" textLength="14.816">W</tspan><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" fill="white" x="17.412" y="15" textLength="50.08">orker 1</tspan></text><rect x="279" y="117" width="81" height="36" fill="#68bc36"/><rect x="279" y="117" width="81" height="36" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(284 125.776)" fill="white"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" fill="white" x="5.724" y="15" textLength="59.552">Server 0</tspan></text><rect x="432" y="117" width="81" height="36" fill="#68bc36"/><rect x="432" y="117" width="81" height="36" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(437 125.776)" fill="white"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" fill="white" x="5.724" y="15" textLength="59.552">Server 1</tspan></text><rect x="144" y="117" width="99" height="36" fill="#00bfc0"/><rect x="144" y="117" width="99" height="36" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(149 125.776)" fill="white"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" fill="white" x="8.348" y="15" textLength="72.304">Scheduler</tspan></text><path d="M 108 201 L 270 201 L 270 309 C 221.4 295.5 156.6 349.5 108 322.5 Z" fill="white"/><path d="M 108 201 L 270 201 L 270 309 C 221.4 295.5 156.6 349.5 108 322.5 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(113 205.09302)" fill="#cb2339"><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#cb2339" x="0" y="14" textLength="23.971484">from</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="23.971484" y="14" textLength="41.950098"> mxnet </tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#cb2339" x="65.921582" y="14" textLength="35.957227">import</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="101.87881" y="14" textLength="17.978613"> kv</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="0" y="32.862793" textLength="35.957227">store </tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#cb2339" x="35.957227" y="32.862793" textLength="5.992871">=</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="41.950098" y="32.862793" textLength="65.921582"> kv.create(</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#05214f" x="107.87168" y="32.862793" textLength="35.957227">&apos;dist&apos;</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="143.82891" y="32.862793" textLength="5.992871">)</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="0" y="51.725586" textLength="65.921582">store.init(</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#05214f" x="65.921582" y="51.725586" textLength="23.971484">&apos;w0&apos;</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="89.893066" y="51.725586" textLength="23.971484">, …)</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="0" y="70.58838" textLength="65.921582">store.push(</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#05214f" x="65.921582" y="70.58838" textLength="23.971484">&apos;w0&apos;</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="89.893066" y="70.58838" textLength="23.971484">, …)</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="0" y="89.45117" textLength="65.921582">store.pull(</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#05214f" x="65.921582" y="89.45117" textLength="23.971484">&apos;w0&apos;</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="89.893066" y="89.45117" textLength="29.964355">, …) </tspan></text><line x1="319.5" y1="198" x2="319.5" y2="153" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="3"/><line x1="353.5" y1="198" x2="438.5" y2="153" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="3"/><line x1="472.5" y1="153" x2="472.5" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="3"/><line x1="438.5" y1="198" x2="353.5" y2="153" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="3"/><path d="M 522 201 L 684 201 L 684 309 C 635.4 295.5 570.6 349.5 522 322.5 Z" fill="white"/><path d="M 522 201 L 684 201 L 684 309 C 635.4 295.5 570.6 349.5 522 322.5 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(527 205.09302)" fill="#cb2339"><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#cb2339" x="0" y="14" textLength="23.971484">from</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="23.971484" y="14" textLength="41.950098"> mxnet </tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#cb2339" x="65.921582" y="14" textLength="35.957227">import</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="101.87881" y="14" textLength="17.978613"> kv</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="0" y="32.862793" textLength="35.957227">store </tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#cb2339" x="35.957227" y="32.862793" textLength="5.992871">=</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="41.950098" y="32.862793" textLength="65.921582"> kv.create(</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#05214f" x="107.87168" y="32.862793" textLength="35.957227">&apos;dist&apos;</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="143.82891" y="32.862793" textLength="5.992871">)</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="0" y="51.725586" textLength="65.921582">store.init(</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#05214f" x="65.921582" y="51.725586" textLength="23.971484">&apos;w0&apos;</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="89.893066" y="51.725586" textLength="23.971484">, …)</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="0" y="70.58838" textLength="65.921582">store.push(</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#05214f" x="65.921582" y="70.58838" textLength="23.971484">&apos;w0&apos;</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="89.893066" y="70.58838" textLength="23.971484">, …)</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="0" y="89.45117" textLength="65.921582">store.pull(</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#05214f" x="65.921582" y="89.45117" textLength="23.971484">&apos;w0&apos;</tspan><tspan font-family="Consolas" font-size="10.9" font-weight="500" fill="#1b1e22" x="89.893066" y="89.45117" textLength="29.964355">, …) </tspan></text><text transform="translate(113 176.776)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".26" y="15" textLength="36.464">mypr</tspan><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x="36.436" y="15" textLength="40.304">og.py</tspan></text><text transform="translate(527 176.776)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".26" y="15" textLength="36.464">mypr</tspan><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x="36.436" y="15" textLength="40.304">og.py</tspan></text><text transform="translate(198 65.776)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".208" y="15" textLength="75.584">machine A</tspan></text><text transform="translate(527 65.776)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".412" y="15" textLength="76.176">machine B</tspan></text></g></g></svg>
4 | 


--------------------------------------------------------------------------------
/img/dog-cartoon1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dog-cartoon1.png


--------------------------------------------------------------------------------
/img/dog-cartoon2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dog-cartoon2.jpg


--------------------------------------------------------------------------------
/img/dog1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dog1.jpg


--------------------------------------------------------------------------------
/img/dog2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dog2.jpg


--------------------------------------------------------------------------------
/img/dog_hotdog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dog_hotdog.jpg


--------------------------------------------------------------------------------
/img/dogdogcat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dogdogcat.png


--------------------------------------------------------------------------------
/img/doughnut.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/doughnut.jpg


--------------------------------------------------------------------------------
/img/dropout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/dropout.png


--------------------------------------------------------------------------------
/img/fake_bedrooms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/fake_bedrooms.png


--------------------------------------------------------------------------------
/img/filters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/filters.png


--------------------------------------------------------------------------------
/img/fine-tune.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/fine-tune.png


--------------------------------------------------------------------------------
/img/gd-move.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/gd-move.png


--------------------------------------------------------------------------------
/img/growth-2-20-girls.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/growth-2-20-girls.png


--------------------------------------------------------------------------------
/img/gtx-580-gpu.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/gtx-580-gpu.jpeg


--------------------------------------------------------------------------------
/img/house_pricing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/house_pricing.png


--------------------------------------------------------------------------------
/img/imagenet.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/imagenet.jpeg


--------------------------------------------------------------------------------
/img/kaggle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/kaggle.png


--------------------------------------------------------------------------------
/img/kaggle_submit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/kaggle_submit.png


--------------------------------------------------------------------------------
/img/kaggle_submit2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/kaggle_submit2.png


--------------------------------------------------------------------------------
/img/leg_hotdog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/leg_hotdog.jpg


--------------------------------------------------------------------------------
/img/legendre.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/legendre.jpeg


--------------------------------------------------------------------------------
/img/linear-regression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/linear-regression.png


--------------------------------------------------------------------------------
/img/ml-loop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/ml-loop.png


--------------------------------------------------------------------------------
/img/momentum-move.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/momentum-move.png


--------------------------------------------------------------------------------
/img/multi-gpu.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" version="1.1" viewBox="196 106 157 175" width="157pt" height="175pt" xmlns:dc="http://purl.org/dc/elements/1.1/"><metadata> Produced by OmniGraffle 6.6.1 <dc:date>2017-07-22 21:18:22 +0000</dc:date></metadata><defs><font-face font-family="Helvetica Neue" font-size="13" panose-1="2 0 5 3 0 0 0 2 0 4" units-per-em="1000" underline-position="-100" underline-thickness="50" slope="0" x-height="517" cap-height="714" ascent="951.99585" descent="-212.99744" font-weight="500"><font-face-src><font-face-name name="HelveticaNeue"/></font-face-src></font-face></defs><g stroke="none" stroke-opacity="1" stroke-dasharray="none" fill="none" fill-opacity="1"><title>Canvas 1</title><rect fill="white" width="1037" height="845"/><g><title>Layer 1</title><rect x="207" y="216" width="27" height="54" fill="#68bc36"/><rect x="207" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(228.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 0</tspan></text><rect x="243" y="216" width="27" height="54" fill="#68bc36"/><rect x="243" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(264.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 1</tspan></text><rect x="279" y="216" width="27" height="54" fill="#68bc36"/><rect x="279" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(300.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 2</tspan></text><rect x="315" y="216" width="27" height="54" fill="#68bc36"/><rect x="315" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(336.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 3</tspan></text><rect x="234" y="162" width="81" height="36" fill="#3a8eed"/><rect x="234" y="162" width="81" height="36" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(239 164.636)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="21.421" y="12" textLength="31.772">PCIe </tspan><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="15.766" y="27.363998" textLength="43.082">Switch </tspan></text><line x1="231.75" y1="216" x2="239.25" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="258.75" y1="216" x2="260.25" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="290.25" y1="216" x2="288.75" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="317.25" y1="216" x2="309.75" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><rect x="243" y="117" width="63" height="27" fill="#68bc36"/><rect x="243" y="117" width="63" height="27" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(248 122.818)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="12.902" y="12" textLength="27.196">CPU</tspan></text><line x1="274.5" y1="144" x2="274.5" y2="162" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/></g></g></svg>
4 | 


--------------------------------------------------------------------------------
/img/multi-machines.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" version="1.1" viewBox="196 25 643 256" width="643pt" height="256pt" xmlns:dc="http://purl.org/dc/elements/1.1/"><metadata> Produced by OmniGraffle 6.6.1 <dc:date>2017-07-23 10:21:45 +0000</dc:date></metadata><defs><font-face font-family="Helvetica Neue" font-size="13" panose-1="2 0 5 3 0 0 0 2 0 4" units-per-em="1000" underline-position="-100" underline-thickness="50" slope="0" x-height="517" cap-height="714" ascent="951.99585" descent="-212.99744" font-weight="500"><font-face-src><font-face-name name="HelveticaNeue"/></font-face-src></font-face></defs><g stroke="none" stroke-opacity="1" stroke-dasharray="none" fill="none" fill-opacity="1"><title>Canvas 1</title><rect fill="white" width="1037" height="845"/><g><title>Layer 1</title><rect x="207" y="216" width="27" height="54" fill="#68bc36"/><rect x="207" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(228.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 0</tspan></text><rect x="243" y="216" width="27" height="54" fill="#68bc36"/><rect x="243" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(264.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 1</tspan></text><rect x="279" y="216" width="27" height="54" fill="#68bc36"/><rect x="279" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(300.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 2</tspan></text><rect x="315" y="216" width="27" height="54" fill="#68bc36"/><rect x="315" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(336.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 3</tspan></text><rect x="234" y="162" width="81" height="36" fill="#3a8eed"/><rect x="234" y="162" width="81" height="36" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(239 164.636)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="21.421" y="12" textLength="31.772">PCIe </tspan><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="15.766" y="27.363998" textLength="43.082">Switch </tspan></text><line x1="231.75" y1="216" x2="239.25" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="258.75" y1="216" x2="260.25" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="290.25" y1="216" x2="288.75" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="317.25" y1="216" x2="309.75" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><rect x="243" y="117" width="63" height="27" fill="#68bc36"/><rect x="243" y="117" width="63" height="27" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(248 122.818)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="12.902" y="12" textLength="27.196">CPU</tspan></text><line x1="274.5" y1="144" x2="274.5" y2="162" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><rect x="369" y="216" width="27" height="54" fill="#68bc36"/><rect x="369" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(390.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 0</tspan></text><rect x="405" y="216" width="27" height="54" fill="#68bc36"/><rect x="405" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(426.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 1</tspan></text><rect x="441" y="216" width="27" height="54" fill="#68bc36"/><rect x="441" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(462.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 2</tspan></text><rect x="477" y="216" width="27" height="54" fill="#68bc36"/><rect x="477" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(498.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 3</tspan></text><rect x="396" y="162" width="81" height="36" fill="#3a8eed"/><rect x="396" y="162" width="81" height="36" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(401 164.636)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="21.421" y="12" textLength="31.772">PCIe </tspan><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="15.766" y="27.363998" textLength="43.082">Switch </tspan></text><line x1="393.75" y1="216" x2="401.25" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="420.75" y1="216" x2="422.25" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="452.25" y1="216" x2="450.75" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="479.25" y1="216" x2="471.75" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><rect x="405" y="117" width="63" height="27" fill="#68bc36"/><rect x="405" y="117" width="63" height="27" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(410 122.818)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="12.902" y="12" textLength="27.196">CPU</tspan></text><line x1="436.5" y1="144" x2="436.5" y2="162" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><rect x="531" y="216" width="27" height="54" fill="#68bc36"/><rect x="531" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(552.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 0</tspan></text><rect x="567" y="216" width="27" height="54" fill="#68bc36"/><rect x="567" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(588.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 1</tspan></text><rect x="603" y="216" width="27" height="54" fill="#68bc36"/><rect x="603" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(624.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 2</tspan></text><rect x="639" y="216" width="27" height="54" fill="#68bc36"/><rect x="639" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(660.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 3</tspan></text><rect x="558" y="162" width="81" height="36" fill="#3a8eed"/><rect x="558" y="162" width="81" height="36" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(563 164.636)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="21.421" y="12" textLength="31.772">PCIe </tspan><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="15.766" y="27.363998" textLength="43.082">Switch </tspan></text><line x1="555.75" y1="216" x2="563.25" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="582.75" y1="216" x2="584.25" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="614.25" y1="216" x2="612.75" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="641.25" y1="216" x2="633.75" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><rect x="567" y="117" width="63" height="27" fill="#68bc36"/><rect x="567" y="117" width="63" height="27" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(572 122.818)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="12.902" y="12" textLength="27.196">CPU</tspan></text><line x1="598.5" y1="144" x2="598.5" y2="162" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><rect x="693" y="216" width="27" height="54" fill="#68bc36"/><rect x="693" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(714.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 0</tspan></text><rect x="729" y="216" width="27" height="54" fill="#68bc36"/><rect x="729" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(750.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 1</tspan></text><rect x="765" y="216" width="27" height="54" fill="#68bc36"/><rect x="765" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(786.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 2</tspan></text><rect x="801" y="216" width="27" height="54" fill="#68bc36"/><rect x="801" y="216" width="27" height="54" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(822.182 221) rotate(90)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="2.7405" y="12" textLength="38.519">GPU 3</tspan></text><rect x="720" y="162" width="81" height="36" fill="#3a8eed"/><rect x="720" y="162" width="81" height="36" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(725 164.636)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="21.421" y="12" textLength="31.772">PCIe </tspan><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="15.766" y="27.363998" textLength="43.082">Switch </tspan></text><line x1="717.75" y1="216" x2="725.25" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="744.75" y1="216" x2="746.25" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="776.25" y1="216" x2="774.75" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="803.25" y1="216" x2="795.75" y2="198" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><rect x="729" y="117" width="63" height="27" fill="#68bc36"/><rect x="729" y="117" width="63" height="27" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(734 122.818)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="12.902" y="12" textLength="27.196">CPU</tspan></text><line x1="760.5" y1="144" x2="760.5" y2="162" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><rect x="468" y="36" width="81" height="36" fill="#3a8eed"/><rect x="468" y="36" width="81" height="36" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(473 38.636002)" fill="white"><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="11.073" y="12" textLength="52.468">Network </tspan><tspan font-family="Helvetica Neue" font-size="13" font-weight="500" fill="white" x="15.766" y="27.363998" textLength="43.082">Switch </tspan></text><line x1="468" y1="64.8" x2="288" y2="117" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="484.2" y1="72" x2="447.5077" y2="117" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="532.8" y1="72" x2="583.33846" y2="117" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/><line x1="549" y1="64.8" x2="729" y2="120.714894" stroke="#666" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"/></g></g></svg>
4 | 


--------------------------------------------------------------------------------
/img/multilayer-perceptron.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/multilayer-perceptron.png


--------------------------------------------------------------------------------
/img/mxnet_google.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/mxnet_google.png


--------------------------------------------------------------------------------
/img/onelayer.graffle/data.plist:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/onelayer.graffle/data.plist


--------------------------------------------------------------------------------
/img/onelayer.graffle/image4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/onelayer.graffle/image4.pdf


--------------------------------------------------------------------------------
/img/onelayer.graffle/image5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/onelayer.graffle/image5.pdf


--------------------------------------------------------------------------------
/img/onelayer.graffle/image6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/onelayer.graffle/image6.pdf


--------------------------------------------------------------------------------
/img/onelayer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/onelayer.png


--------------------------------------------------------------------------------
/img/operator-context.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/operator-context.png


--------------------------------------------------------------------------------
/img/overfitting-low-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/overfitting-low-data.png


--------------------------------------------------------------------------------
/img/pikachu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/pikachu.jpg


--------------------------------------------------------------------------------
/img/pizza.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/pizza.png


--------------------------------------------------------------------------------
/img/real_hotdog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/real_hotdog.jpg


--------------------------------------------------------------------------------
/img/recommended-prime-tv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/recommended-prime-tv.png


--------------------------------------------------------------------------------
/img/recurrent-batching.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/recurrent-batching.png


--------------------------------------------------------------------------------
/img/recurrent-lm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/recurrent-lm.png


--------------------------------------------------------------------------------
/img/recurrent-motivation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/recurrent-motivation.png


--------------------------------------------------------------------------------
/img/regularization-overfitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/regularization-overfitting.png


--------------------------------------------------------------------------------
/img/regularization.graffle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/regularization.graffle


--------------------------------------------------------------------------------
/img/regularization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/regularization.png


--------------------------------------------------------------------------------
/img/rl-environment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/rl-environment.png


--------------------------------------------------------------------------------
/img/road-cliff.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/road-cliff.jpg


--------------------------------------------------------------------------------
/img/simple-gan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/simple-gan.png


--------------------------------------------------------------------------------
/img/simple-net-linear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/simple-net-linear.png


--------------------------------------------------------------------------------
/img/simple-rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/simple-rnn.png


--------------------------------------------------------------------------------
/img/simple-softmax-net.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/simple-softmax-net.png


--------------------------------------------------------------------------------
/img/sodapopcoke.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/sodapopcoke.png


--------------------------------------------------------------------------------
/img/speech.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/speech.jpg


--------------------------------------------------------------------------------
/img/ssd.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
3 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" version="1.1" viewBox="363 277 511 269" width="511pt" height="269pt" xmlns:dc="http://purl.org/dc/elements/1.1/"><metadata> Produced by OmniGraffle 6.6.1 <dc:date>2017-09-12 18:59:01 +0000</dc:date></metadata><defs><marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="SharpArrow_Marker" viewBox="-4 -4 10 8" markerWidth="10" markerHeight="8" color="black"><g><path d="M 5 0 L -3 -3 L 0 0 L 0 0 L -3 3 Z" fill="currentColor" stroke="currentColor" stroke-width="1"/></g></marker><font-face font-family="Helvetica Neue" font-size="16" panose-1="2 0 5 3 0 0 0 2 0 4" units-per-em="1000" underline-position="-100" underline-thickness="50" slope="0" x-height="517" cap-height="714" ascent="951.99585" descent="-212.99744" font-weight="500"><font-face-src><font-face-name name="HelveticaNeue"/></font-face-src></font-face><linearGradient x1="0" x2="1" id="Gradient" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#dadada"/><stop offset="1" stop-color="#a5a5a5"/></linearGradient><linearGradient id="Obj_Gradient" xl:href="#Gradient" gradientTransform="translate(432.2213 392.56777) rotate(-178) scale(61.1913)"/><linearGradient x1="0" x2="1" id="Gradient_2" gradientUnits="userSpaceOnUse"><stop offset="0" stop-color="#dadada"/><stop offset="1" stop-color="#dadada"/></linearGradient><linearGradient id="Obj_Gradient_2" xl:href="#Gradient_2" gradientTransform="translate(449.34076 417.78345) rotate(-178) scale(23.407182)"/><linearGradient id="Obj_Gradient_3" xl:href="#Gradient_2" gradientTransform="translate(447.50292 315.16213) rotate(-178) scale(73.76219)"/><linearGradient id="Obj_Gradient_4" xl:href="#Gradient" gradientTransform="translate(737.29897 435.32794) rotate(-178) scale(36.870395)"/><linearGradient id="Obj_Gradient_5" xl:href="#Gradient_2" gradientTransform="translate(748.1703 446.17186) rotate(-178) scale(13.598863)"/><linearGradient id="Obj_Gradient_6" xl:href="#Gradient_2" gradientTransform="translate(747.36498 401.73303) rotate(-178) scale(45.757826)"/><linearGradient id="Obj_Gradient_7" xl:href="#Gradient" gradientTransform="translate(580.73246 418.93854) rotate(-178) scale(52.246744)"/><linearGradient id="Obj_Gradient_8" xl:href="#Gradient_2" gradientTransform="translate(595.87383 436.36728) rotate(-178) scale(19.509551)"/><linearGradient id="Obj_Gradient_9" xl:href="#Gradient_2" gradientTransform="translate(594.58944 365.14743) rotate(-178) scale(64.218004)"/></defs><g stroke="none" stroke-opacity="1" stroke-dasharray="none" fill="none" fill-opacity="1"><title>Canvas 1</title><rect fill="white" width="937" height="619"/><g><title>Layer 1</title><line x1="746.32886" y1="425.32886" x2="821.10005" y2="425.61765" marker-end="url(#SharpArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><line x1="720" y1="398.95973" x2="794.7712" y2="398.67094" marker-end="url(#SharpArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(736.198 374.776)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".256" y="15" textLength="56">class pr</tspan><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x="55.968" y="15" textLength="49.776">edictor</tspan></text><text transform="translate(764 402.44714)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".144" y="15" textLength="46.224">box pr</tspan><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x="46.08" y="15" textLength="49.776">edictor</tspan></text><path d="M 374.6443 288 L 374.6443 443.25 L 428.6443 495 L 428.6443 339.75 Z" fill="url(#Obj_Gradient)"/><path d="M 374.6443 288 L 374.6443 443.25 L 428.6443 495 L 428.6443 339.75 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><rect x="428.6443" y="339.75" width="18" height="155.25" fill="url(#Obj_Gradient_2)"/><rect x="428.6443" y="339.75" width="18" height="155.25" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><path d="M 374.6443 288 L 428.6443 339.75 L 446.6443 339.75 L 392.6443 288 Z" fill="url(#Obj_Gradient_3)"/><path d="M 374.6443 288 L 428.6443 339.75 L 446.6443 339.75 L 392.6443 288 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><path d="M 702 389.68456 L 702 457.18456 L 735.75 479.68456 L 735.75 412.18456 Z" fill="url(#Obj_Gradient_4)"/><path d="M 702 389.68456 L 702 457.18456 L 735.75 479.68456 L 735.75 412.18456 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><rect x="735.75" y="412.18456" width="11.25" height="67.5" fill="url(#Obj_Gradient_5)"/><rect x="735.75" y="412.18456" width="11.25" height="67.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><path d="M 702 389.68456 L 735.75 412.18456 L 747 412.18456 L 714.85714 389.68456 Z" fill="url(#Obj_Gradient_6)"/><path d="M 702 389.68456 L 735.75 412.18456 L 747 412.18456 L 714.85714 389.68456 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><line x1="446.6443" y1="417.42515" x2="524.1001" y2="417.85675" marker-end="url(#SharpArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(469.7886 393.76258)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".42" y="15" textLength="36.16">body</tspan></text><line x1="584.32886" y1="377.68456" x2="659.10005" y2="377.97335" marker-end="url(#SharpArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><line x1="549" y1="351.31544" x2="623.7712" y2="351.02665" marker-end="url(#SharpArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><line x1="594" y1="435.94722" x2="695.10035" y2="434.92496" marker-end="url(#SharpArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><text transform="translate(559.5625 512.776)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".164" y="15" textLength="50.672">scale 0</tspan></text><text transform="translate(718 512.15513)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".164" y="15" textLength="50.672">scale 1</tspan></text><text transform="translate(417.0625 512.776)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".064" y="15" textLength="35.872">input</tspan></text><text transform="translate(601.39136 411.09144)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".216" y="15" textLength="91.568">downsample</tspan></text><text transform="translate(556.198 327.1317)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".256" y="15" textLength="56">class pr</tspan><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x="55.968" y="15" textLength="49.776">edictor</tspan></text><text transform="translate(602 354.80285)" fill="black"><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x=".144" y="15" textLength="46.224">box pr</tspan><tspan font-family="Helvetica Neue" font-size="16" font-weight="500" x="46.08" y="15" textLength="49.776">edictor</tspan></text><path d="M 531 346.02685 L 531 454.02685 L 578.25 490.02685 L 578.25 382.02685 Z" fill="url(#Obj_Gradient_7)"/><path d="M 531 346.02685 L 531 454.02685 L 578.25 490.02685 L 578.25 382.02685 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><rect x="578.25" y="382.02685" width="15.75" height="108" fill="url(#Obj_Gradient_8)"/><rect x="578.25" y="382.02685" width="15.75" height="108" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/><path d="M 531 346.02685 L 578.25 382.02685 L 594 382.02685 L 549 346.02685 Z" fill="url(#Obj_Gradient_9)"/><path d="M 531 346.02685 L 578.25 382.02685 L 594 382.02685 L 549 346.02685 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/></g></g></svg>
4 | 


--------------------------------------------------------------------------------
/img/supervised-learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/supervised-learning.png


--------------------------------------------------------------------------------
/img/taxonomy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/taxonomy.jpg


--------------------------------------------------------------------------------
/img/tensor_cartoon.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/tensor_cartoon.jpg


--------------------------------------------------------------------------------
/img/tensor_contraction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/tensor_contraction.png


--------------------------------------------------------------------------------
/img/tensor_fibers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/tensor_fibers.png


--------------------------------------------------------------------------------
/img/training_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/training_model.png


--------------------------------------------------------------------------------
/img/wake-word.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/wake-word.png


--------------------------------------------------------------------------------
/img/whitecat160.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitecat160.jpg


--------------------------------------------------------------------------------
/img/whitecat20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitecat20.jpg


--------------------------------------------------------------------------------
/img/whitecat320.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitecat320.jpg


--------------------------------------------------------------------------------
/img/whitecat40.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitecat40.jpg


--------------------------------------------------------------------------------
/img/whitecat80.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitecat80.jpg


--------------------------------------------------------------------------------
/img/whitedog160.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitedog160.jpg


--------------------------------------------------------------------------------
/img/whitedog20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitedog20.jpg


--------------------------------------------------------------------------------
/img/whitedog320.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitedog320.jpg


--------------------------------------------------------------------------------
/img/whitedog40.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitedog40.jpg


--------------------------------------------------------------------------------
/img/whitedog80.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/img/whitedog80.jpg


--------------------------------------------------------------------------------
/media/polly.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackchase/mxnet-the-straight-dope/249cb446a8d0d711c5ca7128ffd68d91fc2e381b/media/polly.mp3


--------------------------------------------------------------------------------