├── .gitignore
├── README.md
├── autolab
    └── chunks.py
├── math
    ├── logsumexp.pdf
    ├── logsumexp.tex
    └── softmax_cross_entropy.pdf
├── recitation-10
    ├── .gitignore
    ├── Pytorch GANs.ipynb
    ├── cifar10_wgangp.py
    ├── images
    │   ├── tensorboard1.png
    │   └── tensorboard2.png
    ├── mnist_cwgangp.py
    ├── mnist_gan.py
    ├── mnist_wgangp.py
    └── train_all.py
├── recitation-11
    ├── Untitled.ipynb
    ├── __pycache__
    │   ├── rbm_demo_utils.cpython-35.pyc
    │   └── rbm_models.cpython-35.pyc
    ├── data
    │   ├── processed
    │   │   └── test.pt
    │   └── raw
    │   │   ├── t10k-images-idx3-ubyte
    │   │   ├── t10k-labels-idx1-ubyte
    │   │   └── train-labels-idx1-ubyte
    ├── net
    │   └── hopfield
    │   │   └── model.pytorch
    ├── rbm_demo_utils.py
    └── rbm_models.py
├── recitation-13
    └── ten_armed_bandit.ipynb
├── recitation-2
    ├── Tutorial-numpy.ipynb
    ├── Tutorial-pytorch.ipynb
    └── pytorch-example.py
├── recitation-3
    ├── recitation3-solved.ipynb
    └── recitation3.ipynb
├── recitation-4
    ├── Tutorial-pytorch-cnn.ipynb
    ├── image_folder
    │   ├── face1
    │   │   └── image1.jpg
    │   └── face2
    │   │   └── image2.jpg
    └── pytorch-mnist-cnn-example.py
├── recitation-5
    ├── hamlet.txt
    ├── recitation5.ipynb
    ├── recursive1.png
    ├── rnn.svg
    └── tweets.txt
├── recitation-6
    ├── output
    │   └── shakespeare
    │   │   └── checkpoint.pytorch
    ├── shakespeare.ipynb
    ├── shakespeare.py
    └── t8.shakespeare.txt
├── recitation-7
    └── TSNE+PCA.ipynb
├── recitation-8
    ├── data
    │   ├── 0.gif
    │   ├── 1.gif
    │   ├── 2.gif
    │   ├── 3.gif
    │   ├── 4.gif
    │   ├── 5.gif
    │   ├── 6.gif
    │   ├── 7.gif
    │   ├── 8.gif
    │   ├── 9.gif
    │   └── numbers.npy
    ├── number_colors.py
    ├── numbers_colors.ipynb
    └── seq2seq_translation_tutorial.ipynb
├── recitation-9
    ├── autoencoder_demo_utils.py
    ├── data
    │   ├── processed
    │   │   ├── test.pt
    │   │   └── training.pt
    │   └── raw
    │   │   ├── t10k-images-idx3-ubyte
    │   │   ├── t10k-labels-idx1-ubyte
    │   │   ├── train-images-idx3-ubyte
    │   │   └── train-labels-idx1-ubyte
    ├── net
    │   ├── hidden_sampling_autoencoder
    │   │   └── model.pytorch
    │   ├── regular_autoencoder
    │   │   └── model.pytorch
    │   └── reparameterized_variational_autoencoder
    │   │   └── model.pytorch
    └── variational-autoencoders.ipynb
├── tensorflow
    └── simple_mnist.py
└── visualization
    ├── .gitignore
    ├── README.md
    ├── pytorch_mnist_inferno_tensorboard_example.py
    ├── pytorch_mnist_tnt_visdom_example.py
    ├── tensorboard-cmd.png
    ├── tensorboard.png
    ├── visdom-cmd.png
    └── visdom.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # deep-learning-tutorials
2 | Tutorials on Deep Learning
3 | 


--------------------------------------------------------------------------------
/autolab/chunks.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import os
 3 | import pickle
 4 | from collections import OrderedDict
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def make_chunks(inpath, outpath, chunk_size=1024 * 1024 * 20):
10 |     """
11 |     Split file into several chunks
12 |     :param inpath: input path
13 |     :param outpath: output path
14 |     :param chunk_size: max chunk size in bytes
15 |     :return: None
16 |     """
17 |     i = 0
18 |     print("Input file: {}".format(inpath))
19 |     with open(inpath, 'rb') as fin:
20 |         while True:
21 |             chunk = fin.read(chunk_size)
22 |             if len(chunk) > 0:
23 |                 with open(outpath.format(i), 'wb') as fout:
24 |                     fout.write(chunk)
25 |                 print("Output file: {}".format(outpath.format(i)))
26 |                 i += 1
27 |             else:
28 |                 break
29 | 
30 | 
31 | def read_chunks(inpath):
32 |     """
33 |     Read several chunks into a memory buffer
34 |     :param inpath: format string for each chunk
35 |     :return: Buffer
36 |     """
37 |     data = io.BytesIO()
38 |     i = 0
39 |     while os.path.exists(inpath.format(i)):
40 |         with open(inpath.format(i), 'rb') as fin:
41 |             data.write(fin.read())
42 |         i += 1
43 |     if i == 0:
44 |         raise FileNotFoundError("missing file: {}".format(inpath.format(i)))
45 |     data.seek(0)
46 |     return data
47 | 
48 | 
49 | def torch_to_numpy(inpath, outpath):
50 |     """
51 |     Convert torch save file to a pickle save file
52 |     :param inpath: torch save path
53 |     :param outpath: path for new pickle save
54 |     :return: None
55 |     """
56 |     data = torch.load(inpath)
57 |     cdata = OrderedDict([(k, w.cpu().numpy()) for k, w in data.items()])
58 |     with open(outpath, 'wb') as f:
59 |         pickle.dump(cdata, f)
60 | 
61 | 
62 | def load_from_numpy(f):
63 |     """
64 |     Read data from a buffer and convert each element to torch.
65 |     :param f: buffer
66 |     :return: dictionary of torch tensors
67 |     """
68 |     cdata = pickle.load(f)
69 |     data = OrderedDict([(k, torch.from_numpy(w)) for k, w in cdata.items()])
70 |     return data
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     # Example of usage
75 |     model_path = '../output/model-v2/model-00000099.tar'  # saved torch state_dict
76 |     numpy_path = model_path + '.npy'  # numpy dump of file
77 |     chunk_path = model_path + '.npy.{}'  # format for each chunk
78 | 
79 |     # To write your model in chunks
80 |     # Convert torch model to pickled numpy arrays
81 |     torch_to_numpy(model_path, numpy_path)
82 |     # Split pickled file into multiple chunks
83 |     make_chunks(numpy_path, chunk_path)
84 | 
85 |     # To read your model in chunks
86 |     # Read the chunks
87 |     data = read_chunks(chunk_path)
88 |     # Load the data
89 |     state_dict = load_from_numpy(data)
90 |     # Load dictionary into your model
91 |     # model.load_state_dict(state_dict)
92 | 


--------------------------------------------------------------------------------
/math/logsumexp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/math/logsumexp.pdf


--------------------------------------------------------------------------------
/math/logsumexp.tex:
--------------------------------------------------------------------------------
 1 | \documentclass{article}
 2 | \usepackage{graphicx}
 3 | \usepackage{amsmath}
 4 | \begin{document}
 5 | 
 6 | \title{LogSumExp Derivation}
 7 | \author{Benjamin Striner}
 8 | 
 9 | \maketitle
10 | 
11 | \section{LogSumExp Trick}
12 | 
13 | Calculating $\log \sum_i e^{x_i} $ is frequent in machine learning and is referred to as LogSumExp. A common trick makes this function numerically stable.
14 | 
15 | \begin{align}
16 | \log \sum_i e^{x_i - C} &= \log \sum_i \frac{e^{x_i}}{e^{C}} \\
17 | & = \log \frac{\sum_i e^{x_i}}{e^C} \\
18 | & = \log[\sum_i e^{x_i}] - \log e^{C} \\
19 | \log \sum_i e^{x_i - C} & = \log[\sum_i e^{x_i}] - C \\
20 | \log[\sum_i e^{x_i}] &= \log[\sum_i e^{x_i - C}]  + C
21 | \end{align}
22 | 
23 | We typically select $ C=\max_j x_j $. That means the largest exponent we calculate is $e^0$, so our exponents never overflow.
24 | 
25 | \section{Softmax}
26 | 
27 | Typical softmax formulation is $ f(x)_i = \frac{e^{x_i}}{\sum_j e^{x_j}} $.
28 | 
29 | \section{Cross-entropy}
30 | 
31 | Typical cross-entropy formulation is $ L(p, q) = -\sum_i p_i \log q_i$.
32 | 
33 | \section{Cross-entropy of Softmax}
34 | 
35 | The cross-entropy of a softmax is therefore $L(p,f(x))=- \sum_i p_i \log \frac{e^{x_i}}{\sum_j e^{x_j}}$. This calculation can be stabilized using the LogSumExp trick.
36 | 
37 | \begin{align}
38 | L(p,f(x)) &=- \sum_i p_i \log \frac{e^{x_i}}{\sum_j e^{x_j}} \\
39 | & = -\sum_i p_i [ \log(e^{x_i}) - \log(\sum_j e^{x_j})] \\
40 | L(p,f(x)) &= -\sum_i p_i [ x_i - LogSumExp_j(x_j)]
41 | \end{align}
42 | 
43 | \section{Conclusion}
44 | You normally won't have to do the math yourself. Pytorch loss functions ``BCEWithLogitsLoss'' and ``CrossEntropyLoss'' will perform these calculations for you.
45 | 
46 | The important thing to remember is to not include the softmax or sigmoid output in your network when using these loss functions. The softmax or sigmoid are already included in the loss function and you don't want to accidentally apply them twice.
47 | 
48 | You can build a network without using the trick, but you may or may not end up getting NaN errors.
49 | 
50 | \end{document}


--------------------------------------------------------------------------------
/math/softmax_cross_entropy.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/math/softmax_cross_entropy.pdf


--------------------------------------------------------------------------------
/recitation-10/.gitignore:
--------------------------------------------------------------------------------
1 | /output
2 | /data
3 | 


--------------------------------------------------------------------------------
/recitation-10/Pytorch GANs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pytorch GANs"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "An example script for creating and running a GAN is in the accompanying file `mnist_gan.py`.\n",
 15 |     "\n",
 16 |     "https://github.com/cmudeeplearning11785/deep-learning-tutorials/blob/master/recitation-10/mnist_gan.py\n",
 17 |     "\n",
 18 |     "- Inferno is used for training and logging\n",
 19 |     "- The main inferno training loop pumps real images and trains the discriminator\n",
 20 |     "- A callback periodically trains the generator\n",
 21 |     "\n",
 22 |     "If you want the full experience, please try running tensorboard while the script runs. \n",
 23 |     "\n",
 24 |     "- Live images will be drawn to the webpage as your network trains.\n",
 25 |     "- A video will be rendered when training completes (if you install ffmpeg).\n"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "![tensorboard](https://github.com/cmudeeplearning11785/deep-learning-tutorials/raw/master/recitation-10/images/tensorboard1.png)"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "![tensorboard](https://github.com/cmudeeplearning11785/deep-learning-tutorials/raw/master/recitation-10/images/tensorboard2.png)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 3,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "# Import the actual code from the linked file\n",
 49 |     "import mnist_gan\n",
 50 |     "import mnist_wgangp\n",
 51 |     "import mnist_cwgangp\n",
 52 |     "import cifar10_wgangp\n",
 53 |     "from IPython.core.display import HTML"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "# Successful GAN\n",
 61 |     "\n",
 62 |     "First we train the GAN with settings that converge (found through trial-and-error). The generator and discriminator both have a learning rate of 3e-4 and the generator is trained 1 time every 5 times the discriminator is trained."
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "mnist_gan.main([])"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "https://www.youtube.com/embed/IUi0REAWj2c?rel=0"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "# Failed GAN\n",
 86 |     "\n",
 87 |     "Here we see what happens if the generator is trained too much or too little compared to the discriminator."
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "mnist_gan.main(['--generator-frequency=1', '--save-directory=output/mnist_gan/frequency-1', '--epochs=50'])"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "https://www.youtube.com/embed/J8m1NXLwSKw"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "# Wasserstein GAN with Gradient Penalty\n",
111 |     "\n",
112 |     "Here we see an improvement on traditional GAN that we discussed in lecture."
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "mnist_wgangp.main([])"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "https://www.youtube.com/watch?v=unXILX2wp1A"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "# WGAN-GP on CIFAR10\n",
136 |     "\n",
137 |     "For a slightly more complicated dataset, this example uses CIFAR10."
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "cifar10_wgangp.main([])"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "https://youtu.be/dAe-UcOfywE"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "# Conditional WGAN-GP on MNIST\n",
161 |     "\n",
162 |     "Here use use a conditional GAN to learn each digit."
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "mnist_cwgangp.main([])"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "https://youtu.be/_wuRRwujeHc"
179 |    ]
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "kernelspec": {
184 |    "display_name": "Python 3",
185 |    "language": "python",
186 |    "name": "python3"
187 |   },
188 |   "language_info": {
189 |    "codemirror_mode": {
190 |     "name": "ipython",
191 |     "version": 3
192 |    },
193 |    "file_extension": ".py",
194 |    "mimetype": "text/x-python",
195 |    "name": "python",
196 |    "nbconvert_exporter": "python",
197 |    "pygments_lexer": "ipython3",
198 |    "version": "3.6.2"
199 |   }
200 |  },
201 |  "nbformat": 4,
202 |  "nbformat_minor": 2
203 | }
204 | 


--------------------------------------------------------------------------------
/recitation-10/cifar10_wgangp.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | 
  4 | import torch
  5 | from inferno.trainers.basic import Trainer
  6 | from inferno.trainers.callbacks.logging.tensorboard import TensorboardLogger
  7 | from torch import nn
  8 | from torch.utils.data.dataloader import DataLoader
  9 | from torchvision import datasets
 10 | from torchvision import transforms
 11 | 
 12 | from mnist_gan import Reshape, save_args, GANModel, GenerateDataCallback, GeneratorTrainingCallback
 13 | from mnist_gan import generate_video
 14 | from mnist_wgangp import WGANGeneratorLoss, WGANDiscriminatorLoss
 15 | 
 16 | 
 17 | def cifar10_data_loader(args):
 18 |     # Create DataLoader for CIFAR10
 19 |     kwargs = {'num_workers': 2, 'pin_memory': True} if args.cuda else {}
 20 |     train_loader = DataLoader(
 21 |         datasets.CIFAR10('./data/cifar10', train=True, download=True,
 22 |                          transform=transforms.ToTensor()),
 23 |         batch_size=args.batch_size, shuffle=True, **kwargs)
 24 |     return train_loader
 25 | 
 26 | 
 27 | class CIFAR10GeneratorNetwork(nn.Sequential):
 28 |     # Network for generation
 29 |     # Input is (N, latent_dim)
 30 |     def __init__(self, args):
 31 |         super(CIFAR10GeneratorNetwork, self).__init__(*[m for m in [
 32 |             nn.Linear(args.latent_dim, 1024),
 33 |             nn.BatchNorm1d(1024) if args.generator_batchnorm else None,
 34 |             nn.LeakyReLU(),
 35 |             nn.Linear(1024, 2 * 2 * 512),
 36 |             Reshape(-1, 512, 2, 2),  # N, 512,2,2
 37 |             nn.BatchNorm2d(512) if args.generator_batchnorm else None,
 38 |             nn.LeakyReLU(),
 39 |             nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1),  # N, 256,4,4
 40 |             nn.BatchNorm2d(256) if args.generator_batchnorm else None,
 41 |             nn.LeakyReLU(),
 42 |             nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),  # N, 128,8,8
 43 |             nn.BatchNorm2d(128) if args.generator_batchnorm else None,
 44 |             nn.LeakyReLU(),
 45 |             nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),  # N, 64,16,16
 46 |             nn.BatchNorm2d(64) if args.generator_batchnorm else None,
 47 |             nn.LeakyReLU(),
 48 |             nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),  # N, 32,32,32
 49 |             nn.BatchNorm2d(32) if args.generator_batchnorm else None,
 50 |             nn.LeakyReLU(),
 51 |             nn.Conv2d(32, 3, kernel_size=3, stride=1, padding=1),  # N, 3,32,32
 52 |             nn.Sigmoid()] if m is not None])
 53 | 
 54 | 
 55 | class CIFAR10DiscriminatorNetwork(nn.Sequential):
 56 |     # Network for discrimination
 57 |     # Input is (N, 1, 28, 28)
 58 |     def __init__(self, args):
 59 |         super(CIFAR10DiscriminatorNetwork, self).__init__(*[m for m in [
 60 |             nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),  # N, 64, 16, 16
 61 |             nn.BatchNorm2d(64) if args.discriminator_batchnorm else None,
 62 |             nn.LeakyReLU(),
 63 |             nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),  # N, 128, 8, 8
 64 |             nn.BatchNorm2d(128) if args.discriminator_batchnorm else None,
 65 |             nn.LeakyReLU(),
 66 |             nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),  # N, 128, 4, 4
 67 |             nn.BatchNorm2d(256) if args.discriminator_batchnorm else None,
 68 |             nn.LeakyReLU(),
 69 |             nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1),  # N, 128, 2, 2
 70 |             nn.BatchNorm2d(512) if args.discriminator_batchnorm else None,
 71 |             nn.LeakyReLU(),
 72 |             Reshape(-1, 512 * 2 * 2),  # N, 128*7*7
 73 |             nn.Linear(512 * 2 * 2, 1024),  # N, 1024
 74 |             nn.BatchNorm1d(1024) if args.discriminator_batchnorm else None,
 75 |             nn.LeakyReLU(),
 76 |             nn.Linear(1024, 1),  # N, 1
 77 |             Reshape(-1)] if m is not None])  # N
 78 | 
 79 | 
 80 | def run(args):
 81 |     save_args(args)  # save command line to a file for reference
 82 |     train_loader = cifar10_data_loader(args)  # get the data
 83 |     model = GANModel(
 84 |         args,
 85 |         discriminator=CIFAR10DiscriminatorNetwork(args),
 86 |         generator=CIFAR10GeneratorNetwork(args))
 87 | 
 88 |     # Build trainer
 89 |     trainer = Trainer(model)
 90 |     trainer.build_criterion(WGANDiscriminatorLoss(penalty_weight=args.penalty_weight, model=model))
 91 |     trainer.build_optimizer('Adam', model.discriminator.parameters(), lr=args.discriminator_lr)
 92 |     trainer.save_every((1, 'epochs'))
 93 |     trainer.save_to_directory(args.save_directory)
 94 |     trainer.set_max_num_epochs(args.epochs)
 95 |     trainer.register_callback(GenerateDataCallback(args))
 96 |     trainer.register_callback(GeneratorTrainingCallback(
 97 |         args,
 98 |         parameters=model.generator.parameters(),
 99 |         criterion=WGANGeneratorLoss()))
100 |     trainer.bind_loader('train', train_loader)
101 |     # Custom logging configuration so it knows to log our images
102 |     logger = TensorboardLogger(
103 |         log_scalars_every=(1, 'iteration'),
104 |         log_images_every=(args.log_image_frequency, 'iteration'))
105 |     trainer.build_logger(logger, log_directory=args.save_directory)
106 |     logger.observe_state('generated_images')
107 |     logger.observe_state('real_images')
108 |     # logger._trainer_states_being_observed_while_training.remove('training_inputs')
109 | 
110 |     if args.cuda:
111 |         trainer.cuda()
112 | 
113 |     # Go!
114 |     trainer.fit()
115 | 
116 |     # Generate video from saved images
117 |     if not args.no_ffmpeg:
118 |         generate_video(args.save_directory)
119 | 
120 | 
121 | def main(argv):
122 |     # Training settings
123 |     parser = argparse.ArgumentParser(description='PyTorch GAN Example')
124 | 
125 |     # Output directory
126 |     parser.add_argument('--save-directory', type=str, default='output/cifar10_wgangp/v1', help='output directory')
127 | 
128 |     # Configuration
129 |     parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='batch size')
130 |     parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs')
131 |     parser.add_argument('--image-frequency', type=int, default=10, metavar='N', help='frequency to write images')
132 |     parser.add_argument('--log-image-frequency', type=int, default=100, metavar='N', help='frequency to log images')
133 |     parser.add_argument('--generator-frequency', type=int, default=10, metavar='N', help='frequency to train generator')
134 | 
135 |     # Hyperparameters
136 |     parser.add_argument('--latent-dim', type=int, default=100, metavar='N', help='latent dimension')
137 |     parser.add_argument('--discriminator-lr', type=float, default=3e-4, metavar='N', help='discriminator learning rate')
138 |     parser.add_argument('--generator-lr', type=float, default=3e-4, metavar='N', help='generator learning rate')
139 |     parser.add_argument('--penalty-weight', type=float, default=10., metavar='N', help='gradient penalty weight')
140 |     parser.add_argument('--discriminator-batchnorm', type=bool, default=False, metavar='N', help='enable BN')
141 |     parser.add_argument('--generator-batchnorm', type=bool, default=True, metavar='N', help='enable BN')
142 | 
143 |     # Flags
144 |     parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training')
145 |     parser.add_argument('--no-ffmpeg', action='store_true', default=False, help='disables video generation')
146 | 
147 |     args = parser.parse_args(argv)
148 |     args.cuda = not args.no_cuda and torch.cuda.is_available()
149 |     run(args)
150 | 
151 | 
152 | if __name__ == '__main__':
153 |     main(sys.argv[1:])
154 | 


--------------------------------------------------------------------------------
/recitation-10/images/tensorboard1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-10/images/tensorboard1.png


--------------------------------------------------------------------------------
/recitation-10/images/tensorboard2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-10/images/tensorboard2.png


--------------------------------------------------------------------------------
/recitation-10/mnist_cwgangp.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | 
  4 | import torch
  5 | from inferno.trainers.basic import Trainer
  6 | from inferno.trainers.callbacks.logging.tensorboard import TensorboardLogger
  7 | from torch import nn
  8 | from torch.autograd import Variable
  9 | from torch.utils.data.dataloader import DataLoader
 10 | from torch.utils.data.dataset import Dataset
 11 | from torchvision import datasets
 12 | from torchvision import transforms
 13 | 
 14 | from mnist_gan import Reshape, format_images
 15 | from mnist_gan import generate_video
 16 | from mnist_gan import save_args, GANModel, GenerateDataCallback, GeneratorTrainingCallback
 17 | from mnist_wgangp import WGANDiscriminatorLoss, WGANGeneratorLoss
 18 | 
 19 | 
 20 | class MNISTWrapper(Dataset):
 21 |     def __init__(self):
 22 |         super(MNISTWrapper, self).__init__()
 23 |         self.dataset = datasets.MNIST('./data/mnist', train=True, download=True,
 24 |                                       transform=transforms.ToTensor())
 25 | 
 26 |     def __len__(self):
 27 |         return len(self.dataset)
 28 | 
 29 |     def __getitem__(self, item):
 30 |         x, y = self.dataset[item]
 31 |         return x, y, y
 32 | 
 33 | 
 34 | def mnist_cgan_data_loader(args):
 35 |     # Create DataLoader for MNIST
 36 |     kwargs = {'num_workers': 2, 'pin_memory': True} if args.cuda else {}
 37 |     train_loader = DataLoader(
 38 |         MNISTWrapper(),
 39 |         batch_size=args.batch_size, shuffle=True, **kwargs)
 40 |     return train_loader
 41 | 
 42 | 
 43 | class CGeneratorNetwork(nn.Module):
 44 |     # Network for generation
 45 |     # Input is (N, latent_dim)
 46 |     def __init__(self, args):
 47 |         super(CGeneratorNetwork, self).__init__()
 48 |         self.embedding = nn.Embedding(10, args.embedding_dim)
 49 |         self.trunk = nn.Sequential(*[m for m in [
 50 |             nn.Linear(args.latent_dim + args.embedding_dim, 1024),
 51 |             nn.BatchNorm1d(1024) if args.generator_batchnorm else None,
 52 |             nn.LeakyReLU(),
 53 |             nn.Linear(1024, 7 * 7 * 128),
 54 |             Reshape(-1, 128, 7, 7),  # N, 128,7,7
 55 |             nn.BatchNorm2d(128) if args.generator_batchnorm else None,
 56 |             nn.LeakyReLU(),
 57 |             nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),  # N, 64,14,14
 58 |             nn.BatchNorm2d(64) if args.generator_batchnorm else None,
 59 |             nn.LeakyReLU(),
 60 |             nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),  # N, 32,28,28
 61 |             nn.BatchNorm2d(32) if args.generator_batchnorm else None,
 62 |             nn.LeakyReLU(),
 63 |             nn.Conv2d(32, 1, kernel_size=3, stride=1, padding=1),  # N, 1,28,28
 64 |             nn.Sigmoid()] if m is not None])
 65 | 
 66 |     def forward(self, latent, y):
 67 |         embedded = self.embedding(y)
 68 |         h = torch.cat((latent, embedded), dim=1)
 69 |         h = self.trunk(h)
 70 |         return h
 71 | 
 72 | 
 73 | class CDiscriminatorNetwork(nn.Module):
 74 |     # Network for discrimination
 75 |     # Input is (N, 1, 28, 28)
 76 |     def __init__(self, args):
 77 |         super(CDiscriminatorNetwork, self).__init__()
 78 |         self.embedding = nn.Embedding(10, args.embedding_dim)
 79 |         self.trunk = nn.Sequential(*[m for m in [
 80 |             nn.Conv2d(1 + args.embedding_dim, 64, kernel_size=4, stride=2, padding=1),  # N, 64, 14, 14
 81 |             nn.BatchNorm2d(64) if args.discriminator_batchnorm else None,
 82 |             nn.LeakyReLU(),
 83 |             nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),  # N, 128, 7, 7
 84 |             nn.BatchNorm2d(128) if args.discriminator_batchnorm else None,
 85 |             nn.LeakyReLU(),
 86 |             Reshape(-1, 128 * 7 * 7),  # N, 128*7*7
 87 |             nn.Linear(128 * 7 * 7, 1024),  # N, 1024
 88 |             nn.BatchNorm1d(1024) if args.discriminator_batchnorm else None,
 89 |             nn.LeakyReLU(),
 90 |             nn.Linear(1024, 1),  # N, 1
 91 |             Reshape(-1)] if m is not None])  # N
 92 | 
 93 |     def forward(self, x, y):
 94 |         embedded = self.embedding(y)  # (N, dim)
 95 |         embedded = embedded.unsqueeze(2).unsqueeze(3).expand(-1, -1, x.size(2), x.size(3))
 96 |         h = torch.cat((x, embedded), dim=1)
 97 |         h = self.trunk(h)
 98 |         return h
 99 | 
100 | 
101 | class CGANModel(GANModel):
102 |     # GAN containing generator and discriminator
103 |     def __init__(self, args, discriminator, generator):
104 |         super(CGANModel, self).__init__(
105 |             args=args,
106 |             discriminator=discriminator,
107 |             generator=generator)
108 | 
109 |     def generate(self, latent, y):
110 |         # Generate fake images from latent inputs
111 |         xfake = self.generator(latent, y)
112 |         # Save images for later
113 |         self._state_hooks['xfake'] = xfake
114 |         self._state_hooks['y'] = y
115 |         self._state_hooks['generated_images'] = format_images(xfake)  # log the generated images
116 |         return xfake
117 | 
118 |     def discriminate(self, x, y):
119 |         # Run discriminator on an input
120 |         return self.discriminator(x, y)
121 | 
122 |     def y_fake(self, latent, y):
123 |         # Run discriminator on generated images
124 |         yfake = self.discriminate(self.generate(latent, y), y)
125 |         return yfake
126 | 
127 |     def y_real(self, xreal, y):
128 |         # Run discriminator on real images
129 |         yreal = self.discriminate(xreal, y)
130 |         # Save images for later
131 |         self._state_hooks['xreal'] = xreal
132 |         self._state_hooks['real_images'] = format_images(xreal)
133 |         return yreal
134 | 
135 |     def latent_sample(self, xreal):
136 |         # Generate latent samples of same shape as real data
137 |         latent = xreal.data.new(xreal.size(0), self.latent_dim)
138 |         torch.randn(*latent.size(), out=latent)
139 |         latent = Variable(latent)
140 |         return latent
141 | 
142 |     def forward(self, xreal, y):
143 |         # Calculate and return y_real and y_fake
144 |         return self.y_real(xreal, y), self.y_fake(self.latent_sample(xreal), y)
145 | 
146 | 
147 | class CWGANDiscriminatorLoss(WGANDiscriminatorLoss):
148 |     def discriminate(self, xmix):
149 |         y = self.model._state_hooks['y']
150 |         return self.model.discriminate(xmix, y)
151 | 
152 | 
153 | class CGenerateDataCallback(GenerateDataCallback):
154 |     # Callback saves generated images to a folder
155 |     def __init__(self, args):
156 |         super(CGenerateDataCallback, self).__init__(args, gridsize=10)
157 |         self.y = torch.arange(0, 10).unsqueeze(1).expand(-1, 10).contiguous().view(-1).contiguous().long()
158 | 
159 |     def end_of_training_iteration(self, **_):
160 |         # Check if it is time to generate images
161 |         self.count += 1
162 |         if self.count > self.frequency:
163 |             self.save_images()
164 |             self.count = 0
165 | 
166 |     def generate(self, latent):
167 |         # Set eval, generate, then set back to train
168 |         self.trainer.model.eval()
169 |         y = Variable(self.y)
170 |         if self.trainer.is_cuda():
171 |             y = y.cuda()
172 |         generated = self.trainer.model.generate(Variable(latent), y)
173 |         self.trainer.model.train()
174 |         return generated
175 | 
176 | 
177 | class CGeneratorTrainingCallback(GeneratorTrainingCallback):
178 |     # Callback periodically trains the generator
179 |     def __init__(self, args, parameters, criterion):
180 |         super(CGeneratorTrainingCallback, self).__init__(args, parameters, criterion)
181 | 
182 |     def train_generator(self):
183 |         # Train the generator
184 |         # Generate latent samples
185 |         if self.trainer.is_cuda():
186 |             latent = torch.cuda.FloatTensor(self.batch_size, self.latent_dim)
187 |         else:
188 |             latent = torch.FloatTensor(self.batch_size, self.latent_dim)
189 |         torch.randn(*latent.size(), out=latent)
190 |         latent = Variable(latent)
191 |         # Calculate yfake
192 |         y = Variable(torch.rand(latent.size(0), out=latent.data.new()) * 10).long()
193 |         yfake = self.trainer.model.y_fake(latent, y)
194 |         # Calculate loss
195 |         loss = self.criterion(yfake)
196 |         # Perform update
197 |         self.opt.zero_grad()
198 |         loss.backward()
199 |         self.opt.step()
200 | 
201 | 
202 | def run(args):
203 |     save_args(args)  # save command line to a file for reference
204 |     train_loader = mnist_cgan_data_loader(args)  # get the data
205 |     model = CGANModel(
206 |         args,
207 |         discriminator=CDiscriminatorNetwork(args),
208 |         generator=CGeneratorNetwork(args))
209 | 
210 |     # Build trainer
211 |     trainer = Trainer(model)
212 |     trainer.build_criterion(CWGANDiscriminatorLoss(penalty_weight=args.penalty_weight, model=model))
213 |     trainer.build_optimizer('Adam', model.discriminator.parameters(), lr=args.discriminator_lr)
214 |     trainer.save_every((1, 'epochs'))
215 |     trainer.save_to_directory(args.save_directory)
216 |     trainer.set_max_num_epochs(args.epochs)
217 |     trainer.register_callback(CGenerateDataCallback(args))
218 |     trainer.register_callback(CGeneratorTrainingCallback(
219 |         args,
220 |         parameters=model.generator.parameters(),
221 |         criterion=WGANGeneratorLoss()))
222 |     trainer.bind_loader('train', train_loader, num_inputs=2)
223 |     # Custom logging configuration so it knows to log our images
224 |     logger = TensorboardLogger(
225 |         log_scalars_every=(1, 'iteration'),
226 |         log_images_every=(args.log_image_frequency, 'iteration'))
227 |     trainer.build_logger(logger, log_directory=args.save_directory)
228 |     logger.observe_state('generated_images')
229 |     logger.observe_state('real_images')
230 |     logger._trainer_states_being_observed_while_training.remove('training_inputs')
231 | 
232 |     if args.cuda:
233 |         trainer.cuda()
234 | 
235 |     # Go!
236 |     trainer.fit()
237 | 
238 |     # Generate video from saved images
239 |     if not args.no_ffmpeg:
240 |         generate_video(args.save_directory)
241 | 
242 | 
243 | def main(argv):
244 |     # Training settings
245 |     parser = argparse.ArgumentParser(description='PyTorch GAN Example')
246 | 
247 |     # Output directory
248 |     parser.add_argument('--save-directory', type=str, default='output/mnist_cwgangp/v1', help='output directory')
249 | 
250 |     # Configuration
251 |     parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='batch size')
252 |     parser.add_argument('--epochs', type=int, default=50, metavar='N', help='number of epochs')
253 |     parser.add_argument('--image-frequency', type=int, default=10, metavar='N', help='frequency to write images')
254 |     parser.add_argument('--log-image-frequency', type=int, default=100, metavar='N', help='frequency to log images')
255 |     parser.add_argument('--generator-frequency', type=int, default=10, metavar='N', help='frequency to train generator')
256 | 
257 |     # Hyperparameters
258 |     parser.add_argument('--latent-dim', type=int, default=100, metavar='N', help='latent dimension')
259 |     parser.add_argument('--embedding-dim', type=int, default=32, metavar='N', help='latent dimension')
260 |     parser.add_argument('--discriminator-lr', type=float, default=3e-4, metavar='N', help='discriminator learning rate')
261 |     parser.add_argument('--generator-lr', type=float, default=3e-4, metavar='N', help='generator learning rate')
262 |     parser.add_argument('--penalty-weight', type=float, default=20., metavar='N', help='gradient penalty weight')
263 |     parser.add_argument('--discriminator-batchnorm', type=bool, default=False, metavar='N', help='enable BN')
264 |     parser.add_argument('--generator-batchnorm', type=bool, default=True, metavar='N', help='enable BN')
265 | 
266 |     # Flags
267 |     parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training')
268 |     parser.add_argument('--no-ffmpeg', action='store_true', default=False, help='disables video generation')
269 | 
270 |     args = parser.parse_args(argv)
271 |     args.cuda = not args.no_cuda and torch.cuda.is_available()
272 |     run(args)
273 | 
274 | 
275 | if __name__ == '__main__':
276 |     main(sys.argv[1:])
277 | 


--------------------------------------------------------------------------------
/recitation-10/mnist_gan.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import math
  3 | import os
  4 | import sys
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | from PIL import Image
  9 | from inferno.trainers.basic import Trainer
 10 | from inferno.trainers.callbacks.base import Callback
 11 | from inferno.trainers.callbacks.logging.tensorboard import TensorboardLogger
 12 | from torch import nn
 13 | from torch.autograd import Variable
 14 | from torch.nn.init import xavier_uniform
 15 | from torch.optim import Adam
 16 | from torch.utils.data.dataloader import DataLoader
 17 | from torchvision import datasets
 18 | from torchvision import transforms
 19 | 
 20 | 
 21 | def mnist_data_loader(args):
 22 |     # Create DataLoader for MNIST
 23 |     kwargs = {'num_workers': 2, 'pin_memory': True} if args.cuda else {}
 24 |     train_loader = DataLoader(
 25 |         datasets.MNIST('./data/mnist', train=True, download=True,
 26 |                        transform=transforms.ToTensor()),
 27 |         batch_size=args.batch_size, shuffle=True, **kwargs)
 28 |     return train_loader
 29 | 
 30 | 
 31 | def initializer(m):
 32 |     # Run xavier on all weights and zero all biases
 33 |     if hasattr(m, 'weight'):
 34 |         if m.weight.ndimension() > 1:
 35 |             xavier_uniform(m.weight.data)
 36 |     if hasattr(m, 'bias'):
 37 |         m.bias.data.zero_()
 38 | 
 39 | 
 40 | def format_images(images):
 41 |     # convert (n, c, h, w) to a single image grid (1, c, g*h, g*w)
 42 |     c = images.size(1)
 43 |     h = images.size(2)
 44 |     w = images.size(3)
 45 |     gridsize = int(math.floor(math.sqrt(images.size(0))))
 46 |     images = images[:gridsize * gridsize]  # (g*g, c, h, w)
 47 |     images = images.view(gridsize, gridsize, c, h, w)  # (g,g,c,h,w)
 48 |     images = images.permute(0, 3, 1, 4, 2).contiguous()  # (g, h, g, w, c)
 49 |     images = images.view(1, gridsize * h, gridsize * w, c)  # (1, g*h, g*w, c)
 50 |     images = images.permute(0, 3, 1, 2)  # (1, c, g*h, g*w)
 51 |     return images
 52 | 
 53 | 
 54 | # Command line to make images into a video
 55 | FFMPEG = """
 56 | ffmpeg -r 60 -f image2 -s 280x280 -i \
 57 | "generated_images{}%08d.png" -vcodec \
 58 | libx264 -crf 25 -pix_fmt yuv420p generation.mp4""".format(os.sep)
 59 | 
 60 | 
 61 | def generate_video(path):
 62 |     # Run FFMPEG to generate video
 63 |     cwd = os.getcwd()
 64 |     os.chdir(path=os.path.abspath(path))
 65 |     os.system(FFMPEG)
 66 |     os.chdir(os.path.abspath(cwd))
 67 | 
 68 | 
 69 | def save_args(args):
 70 |     # Save argparse arguments to a file for reference
 71 |     os.makedirs(args.save_directory, exist_ok=True)
 72 |     with open(os.path.join(args.save_directory, 'args.txt'), 'w') as f:
 73 |         for k, v in vars(args).items():
 74 |             f.write("{}={}\n".format(k, v))
 75 | 
 76 | 
 77 | class Reshape(nn.Module):
 78 |     # Module that just reshapes the input
 79 |     def __init__(self, *shape):
 80 |         super(Reshape, self).__init__()
 81 |         self.shape = shape
 82 | 
 83 |     def forward(self, input):
 84 |         return input.view(*self.shape)
 85 | 
 86 | 
 87 | class GeneratorNetwork(nn.Sequential):
 88 |     # Network for generation
 89 |     # Input is (N, latent_dim)
 90 |     def __init__(self, args):
 91 |         super(GeneratorNetwork, self).__init__(*[m for m in [
 92 |             nn.Linear(args.latent_dim, 1024),
 93 |             nn.BatchNorm1d(1024) if args.generator_batchnorm else None,
 94 |             nn.LeakyReLU(),
 95 |             nn.Linear(1024, 7 * 7 * 128),
 96 |             Reshape(-1, 128, 7, 7),  # N, 128,7,7
 97 |             nn.BatchNorm2d(128) if args.generator_batchnorm else None,
 98 |             nn.LeakyReLU(),
 99 |             nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),  # N, 64,14,14
100 |             nn.BatchNorm2d(64) if args.generator_batchnorm else None,
101 |             nn.LeakyReLU(),
102 |             nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),  # N, 32,28,28
103 |             nn.BatchNorm2d(32) if args.generator_batchnorm else None,
104 |             nn.LeakyReLU(),
105 |             nn.Conv2d(32, 1, kernel_size=3, stride=1, padding=1),  # N, 1,28,28
106 |             nn.Sigmoid()] if m is not None])
107 | 
108 | 
109 | class DiscriminatorNetwork(nn.Sequential):
110 |     # Network for discrimination
111 |     # Input is (N, 1, 28, 28)
112 |     def __init__(self, args):
113 |         super(DiscriminatorNetwork, self).__init__(*[m for m in [
114 |             nn.Conv2d(1, 64, kernel_size=4, stride=2, padding=1),  # N, 64, 14, 14
115 |             nn.BatchNorm2d(64) if args.discriminator_batchnorm else None,
116 |             nn.LeakyReLU(),
117 |             nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),  # N, 128, 7, 7
118 |             nn.BatchNorm2d(128) if args.discriminator_batchnorm else None,
119 |             nn.LeakyReLU(),
120 |             Reshape(-1, 128 * 7 * 7),  # N, 128*7*7
121 |             nn.Linear(128 * 7 * 7, 1024),  # N, 1024
122 |             nn.BatchNorm1d(1024) if args.discriminator_batchnorm else None,
123 |             nn.LeakyReLU(),
124 |             nn.Linear(1024, 1),  # N, 1
125 |             Reshape(-1)] if m is not None])  # N
126 | 
127 | 
128 | class GANModel(nn.Module):
129 |     # GAN containing generator and discriminator
130 |     def __init__(self, args, discriminator, generator):
131 |         super(GANModel, self).__init__()
132 |         self.discriminator = discriminator
133 |         self.generator = generator
134 |         self.latent_dim = args.latent_dim
135 |         self._state_hooks = {}  # used by inferno for logging
136 |         self.apply(initializer)  # initialize the parameters
137 | 
138 |     def generate(self, latent):
139 |         # Generate fake images from latent inputs
140 |         xfake = self.generator(latent)
141 |         # Save images for later
142 |         self._state_hooks['xfake'] = xfake
143 |         self._state_hooks['generated_images'] = format_images(xfake)  # log the generated images
144 |         return xfake
145 | 
146 |     def discriminate(self, x):
147 |         # Run discriminator on an input
148 |         return self.discriminator(x)
149 | 
150 |     def y_fake(self, latent):
151 |         # Run discriminator on generated images
152 |         yfake = self.discriminate(self.generate(latent))
153 |         return yfake
154 | 
155 |     def y_real(self, xreal):
156 |         # Run discriminator on real images
157 |         yreal = self.discriminate(xreal)
158 |         # Save images for later
159 |         self._state_hooks['xreal'] = xreal
160 |         self._state_hooks['real_images'] = format_images(xreal)
161 |         return yreal
162 | 
163 |     def latent_sample(self, xreal):
164 |         # Generate latent samples of same shape as real data
165 |         latent = xreal.data.new(xreal.size(0), self.latent_dim)
166 |         torch.randn(*latent.size(), out=latent)
167 |         latent = Variable(latent)
168 |         return latent
169 | 
170 |     def forward(self, xreal):
171 |         # Calculate and return y_real and y_fake
172 |         return self.y_real(xreal), self.y_fake(self.latent_sample(xreal))
173 | 
174 | 
175 | class DiscriminatorLoss(nn.BCEWithLogitsLoss):
176 |     # Loss function for discriminator
177 |     def forward(self, input, _):
178 |         # Targets are ignored because we know they are 0 or 1
179 |         yreal, yfake = input  # unpack inputs
180 |         real_targets = Variable(yreal.data.new(yreal.size(0)).fill_(1))  # targets for real images
181 |         fake_targets = Variable(yreal.data.new(yreal.size(0)).zero_())  # targets for generated images
182 |         real_loss = super(DiscriminatorLoss, self).forward(yreal, real_targets)  # loss for real images
183 |         fake_loss = super(DiscriminatorLoss, self).forward(yfake, fake_targets)  # loss for fake images
184 |         loss = real_loss + fake_loss  # combined loss
185 |         return loss
186 | 
187 | 
188 | class GeneratorLoss(nn.BCEWithLogitsLoss):
189 |     # Loss function for generator
190 |     def forward(self, yfake):
191 |         # No targets because we know the targets
192 |         fake_targets = Variable(yfake.data.new(yfake.size(0)).fill_(1))  # targets for fake images
193 |         fake_loss = super(GeneratorLoss, self).forward(yfake, fake_targets)  # loss for fake images
194 |         return fake_loss
195 | 
196 | 
197 | class GeneratorTrainingCallback(Callback):
198 |     # Callback periodically trains the generator
199 |     def __init__(self, args, parameters, criterion):
200 |         self.criterion = criterion
201 |         self.opt = Adam(parameters, args.generator_lr)
202 |         self.batch_size = args.batch_size
203 |         self.latent_dim = args.latent_dim
204 |         self.count = 0
205 |         self.frequency = args.generator_frequency
206 | 
207 |     def end_of_training_iteration(self, **_):
208 |         # Each iteration check if it is time to train the generator
209 |         self.count += 1
210 |         if self.count > self.frequency:
211 |             self.train_generator()
212 |             self.count = 0
213 | 
214 |     def train_generator(self):
215 |         # Train the generator
216 |         # Generate latent samples
217 |         if self.trainer.is_cuda():
218 |             latent = torch.cuda.FloatTensor(self.batch_size, self.latent_dim)
219 |         else:
220 |             latent = torch.FloatTensor(self.batch_size, self.latent_dim)
221 |         torch.randn(*latent.size(), out=latent)
222 |         latent = Variable(latent)
223 |         # Calculate yfake
224 |         yfake = self.trainer.model.y_fake(latent)
225 |         # Calculate loss
226 |         loss = self.criterion(yfake)
227 |         # Perform update
228 |         self.opt.zero_grad()
229 |         loss.backward()
230 |         self.opt.step()
231 | 
232 | 
233 | class GenerateDataCallback(Callback):
234 |     # Callback saves generated images to a folder
235 |     def __init__(self, args, gridsize=10):
236 |         super(GenerateDataCallback, self).__init__()
237 |         self.count = 0  # iteration counter
238 |         self.image_count = 0  # image counter
239 |         self.frequency = args.image_frequency
240 |         self.gridsize = gridsize
241 |         self.latent = torch.randn(gridsize * gridsize, args.latent_dim)
242 | 
243 |     def end_of_training_iteration(self, **_):
244 |         # Check if it is time to generate images
245 |         self.count += 1
246 |         if self.count > self.frequency:
247 |             self.save_images()
248 |             self.count = 0
249 | 
250 |     def generate(self, latent):
251 |         # Set eval, generate, then set back to train
252 |         self.trainer.model.eval()
253 |         generated = self.trainer.model.generate(Variable(latent))
254 |         self.trainer.model.train()
255 |         return generated
256 | 
257 |     def save_images(self):
258 |         # Generate images
259 |         path = os.path.join(self.trainer.save_directory, 'generated_images')
260 |         os.makedirs(path, exist_ok=True)  # create directory if necessary
261 |         image_path = os.path.join(path, '{:08d}.png'.format(self.image_count))
262 |         self.image_count += 1
263 |         # Copy latent to cuda if necessary
264 |         if self.trainer.is_cuda():
265 |             latent = self.latent.cuda()
266 |         else:
267 |             latent = self.latent
268 |         generated = self.generate(latent)
269 |         # Reshape, scale, and cast the data so it can be saved
270 |         grid = format_images(generated).squeeze(0).permute(1, 2, 0)
271 |         if grid.size(2) == 1:
272 |             grid = grid.squeeze(2)
273 |         array = grid.data.cpu().numpy() * 255.
274 |         array = array.astype(np.uint8)
275 |         # Save the image
276 |         Image.fromarray(array).save(image_path)
277 | 
278 | 
279 | def run(args):
280 |     save_args(args)  # save command line to a file for reference
281 |     train_loader = mnist_data_loader(args)  # get the data
282 |     # Create the model
283 |     model = GANModel(
284 |         args,
285 |         discriminator=DiscriminatorNetwork(args),
286 |         generator=GeneratorNetwork(args))
287 | 
288 |     # Build trainer
289 |     trainer = Trainer(model)
290 |     trainer.build_criterion(DiscriminatorLoss)
291 |     trainer.build_optimizer('Adam', model.discriminator.parameters(), lr=args.discriminator_lr)
292 |     trainer.save_every((1, 'epochs'))
293 |     trainer.save_to_directory(args.save_directory)
294 |     trainer.set_max_num_epochs(args.epochs)
295 |     trainer.register_callback(GenerateDataCallback(args))
296 |     trainer.register_callback(GeneratorTrainingCallback(
297 |         args,
298 |         parameters=model.generator.parameters(),
299 |         criterion=GeneratorLoss()))
300 |     trainer.bind_loader('train', train_loader)
301 |     # Custom logging configuration so it knows to log our images
302 |     logger = TensorboardLogger(
303 |         log_scalars_every=(1, 'iteration'),
304 |         log_images_every=(args.log_image_frequency, 'iteration'))
305 |     trainer.build_logger(logger, log_directory=args.save_directory)
306 |     logger.observe_state('generated_images')
307 |     logger.observe_state('real_images')
308 |     logger._trainer_states_being_observed_while_training.remove('training_inputs')
309 | 
310 |     if args.cuda:
311 |         trainer.cuda()
312 | 
313 |     # Go!
314 |     trainer.fit()
315 | 
316 |     # Generate video from saved images
317 |     if not args.no_ffmpeg:
318 |         generate_video(args.save_directory)
319 | 
320 | 
321 | def main(argv):
322 |     # Training settings
323 |     parser = argparse.ArgumentParser(description='PyTorch GAN Example')
324 | 
325 |     # Output directory
326 |     parser.add_argument('--save-directory', type=str, default='output/mnist_gan/v1', help='output directory')
327 | 
328 |     # Configuration
329 |     parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='batch size')
330 |     parser.add_argument('--epochs', type=int, default=50, metavar='N', help='number of epochs')
331 |     parser.add_argument('--image-frequency', type=int, default=10, metavar='N', help='frequency to write images')
332 |     parser.add_argument('--log-image-frequency', type=int, default=100, metavar='N', help='frequency to log images')
333 |     parser.add_argument('--generator-frequency', type=int, default=5, metavar='N', help='frequency to train generator')
334 | 
335 |     # Hyperparameters
336 |     parser.add_argument('--latent-dim', type=int, default=100, metavar='N', help='latent dimension')
337 |     parser.add_argument('--discriminator-lr', type=float, default=3e-4, metavar='N', help='discriminator learning rate')
338 |     parser.add_argument('--generator-lr', type=float, default=3e-4, metavar='N', help='generator learning rate')
339 |     parser.add_argument('--discriminator-batchnorm', type=bool, default=True, metavar='N', help='enable BN')
340 |     parser.add_argument('--generator-batchnorm', type=bool, default=True, metavar='N', help='enable BN')
341 | 
342 |     # Flags
343 |     parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training')
344 |     parser.add_argument('--no-ffmpeg', action='store_true', default=False, help='disables video generation')
345 | 
346 |     args = parser.parse_args(argv)
347 |     args.cuda = not args.no_cuda and torch.cuda.is_available()
348 |     run(args)
349 | 
350 | 
351 | if __name__ == '__main__':
352 |     main(sys.argv[1:])
353 | 


--------------------------------------------------------------------------------
/recitation-10/mnist_wgangp.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | 
  4 | import torch
  5 | from inferno.trainers.basic import Trainer
  6 | from inferno.trainers.callbacks.logging.tensorboard import TensorboardLogger
  7 | from torch import nn
  8 | from torch.autograd import Variable
  9 | from torch.autograd import grad
 10 | 
 11 | from mnist_gan import generate_video, mnist_data_loader, DiscriminatorNetwork, GeneratorNetwork
 12 | from mnist_gan import save_args, GANModel, GenerateDataCallback, GeneratorTrainingCallback
 13 | 
 14 | 
 15 | class WGANDiscriminatorLoss(nn.Module):
 16 |     def __init__(self, penalty_weight, model):
 17 |         super(WGANDiscriminatorLoss, self).__init__()
 18 |         self.model = model
 19 |         self.penalty_weight = penalty_weight
 20 | 
 21 |     # Run discriminator
 22 |     def discriminate(self, xmix):
 23 |         return self.model.discriminate(xmix)
 24 | 
 25 |     # Loss function for discriminator
 26 |     def forward(self, input, _):
 27 |         # Targets are ignored
 28 |         yreal, yfake = input  # unpack inputs
 29 | 
 30 |         # Main loss calculation
 31 |         wgan_loss = yfake.mean() - yreal.mean()
 32 | 
 33 |         # Gradient penalty
 34 |         xreal = self.model._state_hooks['xreal']
 35 |         xfake = self.model._state_hooks['xfake']
 36 |         # Random linear combination of xreal and xfake
 37 |         alpha = Variable(torch.rand(xreal.size(0), 1, 1, 1, out=xreal.data.new()))
 38 |         xmix = (alpha * xreal) + ((1. - alpha) * xfake)
 39 |         # Run discriminator on the combination
 40 |         ymix = self.discriminate(xmix)
 41 |         # Calculate gradient of output w.r.t. input
 42 |         ysum = ymix.sum()
 43 |         grads = grad(ysum, [xmix], create_graph=True)[0]
 44 |         gradnorm = torch.sqrt((grads * grads).sum(3).sum(2).sum(1))
 45 |         graddiff = gradnorm - 1
 46 |         gradpenalty = (graddiff * graddiff).mean() * self.penalty_weight
 47 | 
 48 |         # Total loss
 49 |         loss = wgan_loss + gradpenalty
 50 |         return loss
 51 | 
 52 | 
 53 | class WGANGeneratorLoss(nn.BCEWithLogitsLoss):
 54 |     # Loss function for generator
 55 |     def forward(self, yfake):
 56 |         loss = -yfake.mean()
 57 |         return loss
 58 | 
 59 | 
 60 | def run(args):
 61 |     save_args(args)  # save command line to a file for reference
 62 |     train_loader = mnist_data_loader(args)  # get the data
 63 |     model = GANModel(
 64 |         args,
 65 |         discriminator=DiscriminatorNetwork(args),
 66 |         generator=GeneratorNetwork(args))
 67 | 
 68 |     # Build trainer
 69 |     trainer = Trainer(model)
 70 |     trainer.build_criterion(WGANDiscriminatorLoss(penalty_weight=args.penalty_weight, model=model))
 71 |     trainer.build_optimizer('Adam', model.discriminator.parameters(), lr=args.discriminator_lr)
 72 |     trainer.save_every((1, 'epochs'))
 73 |     trainer.save_to_directory(args.save_directory)
 74 |     trainer.set_max_num_epochs(args.epochs)
 75 |     trainer.register_callback(GenerateDataCallback(args))
 76 |     trainer.register_callback(GeneratorTrainingCallback(
 77 |         args,
 78 |         parameters=model.generator.parameters(),
 79 |         criterion=WGANGeneratorLoss()))
 80 |     trainer.bind_loader('train', train_loader)
 81 |     # Custom logging configuration so it knows to log our images
 82 |     logger = TensorboardLogger(
 83 |         log_scalars_every=(1, 'iteration'),
 84 |         log_images_every=(args.log_image_frequency, 'iteration'))
 85 |     trainer.build_logger(logger, log_directory=args.save_directory)
 86 |     logger.observe_state('generated_images')
 87 |     logger.observe_state('real_images')
 88 |     logger._trainer_states_being_observed_while_training.remove('training_inputs')
 89 | 
 90 |     if args.cuda:
 91 |         trainer.cuda()
 92 | 
 93 |     # Go!
 94 |     trainer.fit()
 95 | 
 96 |     # Generate video from saved images
 97 |     if not args.no_ffmpeg:
 98 |         generate_video(args.save_directory)
 99 | 
100 | 
101 | def main(argv):
102 |     # Training settings
103 |     parser = argparse.ArgumentParser(description='PyTorch GAN Example')
104 | 
105 |     # Output directory
106 |     parser.add_argument('--save-directory', type=str, default='output/mnist_wgangp/v1', help='output directory')
107 | 
108 |     # Configuration
109 |     parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='batch size')
110 |     parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs')
111 |     parser.add_argument('--image-frequency', type=int, default=10, metavar='N', help='frequency to write images')
112 |     parser.add_argument('--log-image-frequency', type=int, default=100, metavar='N', help='frequency to log images')
113 |     parser.add_argument('--generator-frequency', type=int, default=10, metavar='N', help='frequency to train generator')
114 | 
115 |     # Hyperparameters
116 |     parser.add_argument('--latent-dim', type=int, default=100, metavar='N', help='latent dimension')
117 |     parser.add_argument('--discriminator-lr', type=float, default=3e-4, metavar='N', help='discriminator learning rate')
118 |     parser.add_argument('--generator-lr', type=float, default=3e-4, metavar='N', help='generator learning rate')
119 |     parser.add_argument('--penalty-weight', type=float, default=20., metavar='N', help='gradient penalty weight')
120 |     parser.add_argument('--discriminator-batchnorm', type=bool, default=False, metavar='N', help='enable BN')
121 |     parser.add_argument('--generator-batchnorm', type=bool, default=True, metavar='N', help='enable BN')
122 | 
123 |     # Flags
124 |     parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training')
125 |     parser.add_argument('--no-ffmpeg', action='store_true', default=False, help='disables video generation')
126 | 
127 |     args = parser.parse_args(argv)
128 |     args.cuda = not args.no_cuda and torch.cuda.is_available()
129 |     run(args)
130 | 
131 | 
132 | if __name__ == '__main__':
133 |     main(sys.argv[1:])
134 | 


--------------------------------------------------------------------------------
/recitation-10/train_all.py:
--------------------------------------------------------------------------------
 1 | import cifar10_wgangp
 2 | import mnist_cwgangp
 3 | import mnist_gan
 4 | import mnist_wgangp
 5 | 
 6 | if __name__ == '__main__':
 7 |     mnist_gan.main(['--save-directory=output/mnist_gan/freq5'])
 8 |     mnist_gan.main(['--save-directory=output/mnist_gan/freq1', '--generator-frequency=1'])
 9 |     mnist_wgangp.main(['--save-directory=output/mnist_wgangp'])
10 |     mnist_cwgangp.main(['--save-directory=output/mnist_cwgangp'])
11 |     cifar10_wgangp.main(['--save-directory=output/cifar10_wgangp'])
12 | 


--------------------------------------------------------------------------------
/recitation-11/Untitled.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "/usr/local/lib/python3.5/dist-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
 13 |       "  from ._conv import register_converters as _register_converters\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "%matplotlib inline\n",
 19 |     "\n",
 20 |     "import torch\n",
 21 |     "import numpy as np\n",
 22 |     "import rbm_demo_utils\n",
 23 |     "import rbm_models"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "mnist = rbm_demo_utils.MNIST(max_len=2000)"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "5\n"
 45 |      ]
 46 |     },
 47 |     {
 48 |      "data": {
 49 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAACylJREFUeJzt3U+InPUdx/HPp2ov6iFppiHE2LUS\nCqHQWIZQUIrFKjGX6EXMQVIQ1oOCgoeKPdRjKFXpoQixBtNilYKKOYTWNAhBKOIoaf6YtrGyYkLM\nTsjBeLLRbw/7KGPcnZnM8zzzPLvf9wuGnXl2NvPNkHeemXlm5+eIEIB8vtX0AACaQfxAUsQPJEX8\nQFLEDyRF/EBSxA8kRfxAUsQPJHXlNG9szZo1MTMzM82bBFKZm5vTuXPnPM51S8Vve6uk30m6QtIf\nImLXsOvPzMyo1+uVuUkAQ3S73bGvO/HDfttXSPq9pDslbZK0w/amSf88ANNV5jn/FknvR8QHEfGZ\npJckba9mLAB1KxP/ekkfDVw+VWz7Gtuztnu2e/1+v8TNAahS7a/2R8TuiOhGRLfT6dR9cwDGVCb+\n05I2DFy+rtgGYBkoE//bkjbavsH2tyXdK2lfNWMBqNvEh/oi4qLthyT9TQuH+vZExPHKJgNQq1LH\n+SNiv6T9Fc0CYIp4ey+QFPEDSRE/kBTxA0kRP5AU8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJET+Q\nFPEDSRE/kBTxA0kRP5AU8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJET+QFPEDSRE/kBTxA0kRP5BU\nqVV6bc9JuiDpc0kXI6JbxVDIwXapn4+IWv/8Mre9HJSKv/CziDhXwZ8DYIp42A8kVTb+kPS67Xds\nz1YxEIDpKPuw/5aIOG37u5IO2P5XRBwavELxn8KsJF1//fUlbw5AVUrt+SPidPF1XtKrkrYscp3d\nEdGNiG6n0ylzcwAqNHH8tq+2fe2X5yXdIelYVYMBqFeZh/1rJb1aHE65UtKfI+KvlUwFoHYTxx8R\nH0j6UYWzYEJ1Hs9us6x/76pwqA9IiviBpIgfSIr4gaSIH0iK+IGkqvitPozAIanJrIRfm20z9vxA\nUsQPJEX8QFLEDyRF/EBSxA8kRfxAUsQPJEX8QFLEDyRF/EBSxA8kRfxAUsQPJEX8QFL8Pv8UlF1K\neqUuRc3nHDSLPT+QFPEDSRE/kBTxA0kRP5AU8QNJET+Q1Mj4be+xPW/72MC21bYP2D5ZfF1V75gr\nW0QMPXHbqMM4e/7nJW29ZNtjkg5GxEZJB4vLAJaRkfFHxCFJ5y/ZvF3S3uL8Xkl3VTwXgJpN+px/\nbUScKc5/LGltRfMAmJLSL/jFwpOzJZ+g2Z613bPd6/f7ZW8OQEUmjf+s7XWSVHydX+qKEbE7IroR\n0e10OhPeHICqTRr/Pkk7i/M7Jb1WzTgApmWcQ30vSvqHpB/YPmX7fkm7JN1u+6SknxeXASwjI3+f\nPyJ2LPGt2yqeBQ0o+1kCWL54hx+QFPEDSRE/kBTxA0kRP5AU8QNJ8dHdK8Cww3F8PDaWwp4fSIr4\ngaSIH0iK+IGkiB9IiviBpIgfSIrj/Ctc2eW9y75PgF8Jbi/2/EBSxA8kRfxAUsQPJEX8QFLEDyRF\n/EBSHOdPruz7AEYZ9vO8B6BZ7PmBpIgfSIr4gaSIH0iK+IGkiB9IiviBpEbGb3uP7Xnbxwa2PWH7\ntO3DxWlbvWOiKREx9FSG7VInlDPOnv95SVsX2f50RGwuTvurHQtA3UbGHxGHJJ2fwiwApqjMc/6H\nbB8pnhasqmwiAFMxafzPSLpR0mZJZyQ9udQVbc/a7tnu9fv9CW8OQNUmij8izkbE5xHxhaRnJW0Z\nct3dEdGNiG6n05l0TgAVmyh+2+sGLt4t6dhS1wXQTiN/pdf2i5JulbTG9ilJv5Z0q+3NkkLSnKQH\napwRQA1Gxh8ROxbZ/FwNs2AZKnOsv87PCpD4vIBReIcfkBTxA0kRP5AU8QNJET+QFPEDSRE/kBTx\nA0kRP5AU8QNJET+QFPEDSRE/kBTxA0mxRDdK4SO0ly/2/EBSxA8kRfxAUsQPJEX8QFLEDyRF/EBS\nHOdPjuP0ebHnB5IifiAp4geSIn4gKeIHkiJ+ICniB5IaGb/tDbbfsP2e7eO2Hy62r7Z9wPbJ4uuq\n+sfF5bI99NRmEVHqhOHG2fNflPRoRGyS9BNJD9reJOkxSQcjYqOkg8VlAMvEyPgj4kxEvFucvyDp\nhKT1krZL2ltcba+ku+oaEkD1Lus5v+0ZSTdJekvS2og4U3zrY0lrK50MQK3Gjt/2NZJelvRIRHwy\n+L1YeIK16JMs27O2e7Z7/X6/1LAAqjNW/Lav0kL4L0TEK8Xms7bXFd9fJ2l+sZ+NiN0R0Y2IbqfT\nqWJmABUY59V+S3pO0omIeGrgW/sk7SzO75T0WvXjAajLOL/Se7Ok+yQdtX242Pa4pF2S/mL7fkkf\nSrqnnhHR9kNyw3DIrb1Gxh8Rb0pa6l/fbdWOA2BaeIcfkBTxA0kRP5AU8QNJET+QFPEDSfHR3RVY\nzsfhR+E4/crFnh9IiviBpIgfSIr4gaSIH0iK+IGkiB9IiuP8hZV8rH4YjuPnxZ4fSIr4gaSIH0iK\n+IGkiB9IiviBpIgfSCrNcf6VfByfY/WYBHt+ICniB5IifiAp4geSIn4gKeIHkiJ+IKmR8dveYPsN\n2+/ZPm774WL7E7ZP2z5cnLbVP+7kImLFnoBJjPMmn4uSHo2Id21fK+kd2weK7z0dEb+tbzwAdRkZ\nf0SckXSmOH/B9glJ6+seDEC9Lus5v+0ZSTdJeqvY9JDtI7b32F61xM/M2u7Z7vX7/VLDAqjO2PHb\nvkbSy5IeiYhPJD0j6UZJm7XwyODJxX4uInZHRDciup1Op4KRAVRhrPhtX6WF8F+IiFckKSLORsTn\nEfGFpGclbalvTABVG+fVfkt6TtKJiHhqYPu6gavdLelY9eMBqMs4r/bfLOk+SUdtHy62PS5ph+3N\nkkLSnKQHapkQQC3GebX/TUmL/TL8/urHATAtvMMPSIr4gaSIH0iK+IGkiB9IiviBpIgfSIr4gaSI\nH0iK+IGkiB9IiviBpIgfSIr4gaQ8zY9+tt2X9OHApjWSzk1tgMvT1tnaOpfEbJOqcrbvRcRYn5c3\n1fi/ceN2LyK6jQ0wRFtna+tcErNNqqnZeNgPJEX8QFJNx7+74dsfpq2ztXUuidkm1chsjT7nB9Cc\npvf8ABrSSPy2t9r+t+33bT/WxAxLsT1n+2ix8nCv4Vn22J63fWxg22rbB2yfLL4uukxaQ7O1YuXm\nIStLN3rftW3F66k/7Ld9haT/SLpd0ilJb0vaERHvTXWQJdiek9SNiMaPCdv+qaRPJf0xIn5YbPuN\npPMRsav4j3NVRPyyJbM9IenTplduLhaUWTe4srSkuyT9Qg3ed0PmukcN3G9N7Pm3SHo/Ij6IiM8k\nvSRpewNztF5EHJJ0/pLN2yXtLc7v1cI/nqlbYrZWiIgzEfFucf6CpC9Xlm70vhsyVyOaiH+9pI8G\nLp9Su5b8Dkmv237H9mzTwyxibbFsuiR9LGltk8MsYuTKzdN0ycrSrbnvJlnxumq84PdNt0TEjyXd\nKenB4uFtK8XCc7Y2Ha4Za+XmaVlkZemvNHnfTbriddWaiP+0pA0Dl68rtrVCRJwuvs5LelXtW334\n7JeLpBZf5xue5yttWrl5sZWl1YL7rk0rXjcR/9uSNtq+wfa3Jd0raV8Dc3yD7auLF2Jk+2pJd6h9\nqw/vk7SzOL9T0msNzvI1bVm5eamVpdXwfde6Fa8jYuonSdu08Ir/fyX9qokZlpjr+5L+WZyONz2b\npBe18DDwf1p4beR+Sd+RdFDSSUl/l7S6RbP9SdJRSUe0ENq6hma7RQsP6Y9IOlyctjV93w2Zq5H7\njXf4AUnxgh+QFPEDSRE/kBTxA0kRP5AU8QNJET+QFPEDSf0fTozA/VbcCJsAAAAASUVORK5CYII=\n",
 50 |       "text/plain": [
 51 |        "<matplotlib.figure.Figure at 0x7f518d2353c8>"
 52 |       ]
 53 |      },
 54 |      "metadata": {},
 55 |      "output_type": "display_data"
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "rbm_demo_utils.display_image(mnist[0][0])"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "net = rbm_models.HopfieldNet(mnist[0][0].size()[0])"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 5,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "trainer = rbm_models.HopfieldTrainWrapper(net, 1)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 6,
 83 |    "metadata": {},
 84 |    "outputs": [
 85 |     {
 86 |      "name": "stdout",
 87 |      "output_type": "stream",
 88 |      "text": [
 89 |       "Loaded checkpoint directly\n"
 90 |      ]
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "rbm_demo_utils.train([trainer], mnist, rbm_demo_utils.IdentityLoss(), 300, 500, 0.001, 'hopfield')"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 7,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "example = torch.autograd.Variable(mnist[np.random.randint(2000, 60000)][0].unsqueeze(dim=0))\n",
104 |     "example[0, -10:] = -1\n",
105 |     "if net.weights.data.is_cuda:\n",
106 |     "    example = example.cuda()\n",
107 |     "example_ev = net(example, num_iters=1000)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 8,
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "name": "stdout",
117 |      "output_type": "stream",
118 |      "text": [
119 |       "0\n"
120 |      ]
121 |     },
122 |     {
123 |      "data": {
124 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAC1JJREFUeJzt3U+InPUdx/HPp/65qIekmYYQY9dK\nKIRCYxlCQSkWq8RcohcxB0lBWA8KCh4q9lCPoVSlhyLEGkyLVQoq5hBa0yAEoYirpPljWmNlxYSY\nnZCD8WSj3x72Uca4OzPO8zzzPLPf9wuGnXl2NvN1zDvPzPxm9nFECEA+32l6AADNIH4gKeIHkiJ+\nICniB5IifiAp4geSIn4gKeIHkrp8kje2Zs2amJmZmeRNAqnMz8/r3LlzHuW6peK3vVXS7yVdJumP\nEbFr0PVnZmY0NzdX5iYBDNDtdke+7tgP+21fJukPku6QtEnSDtubxv3zAExWmef8WyS9HxEfRMRn\nkl6UtL2asQDUrUz86yV91Hf5VLHta2zP2p6zPdfr9UrcHIAq1f5qf0TsjohuRHQ7nU7dNwdgRGXi\nPy1pQ9/la4ttAKZAmfjfkrTR9vW2r5R0j6R91YwFoG5jL/VFxEXbD0r6uxaX+vZExPHKJgNQq1Lr\n/BGxX9L+imYBMEG8vRdIiviBpIgfSIr4gaSIH0iK+IGkiB9IiviBpIgfSIr4gaSIH0iK+IGkiB9I\niviBpIgfSIr4gaSIH0iK+IGkiB9IiviBpIgfSIr4gaSIH0iK+IGkiB9IiviBpIgfSIr4gaSIH0iq\n1FF6bc9LuiDpc0kXI6JbxVArje2mR2hMRDQ9ApZRKv7CzyPiXAV/DoAJ4mE/kFTZ+EPSa7bftj1b\nxUAAJqPsw/6bI+K07e9JOmD73xFxqP8KxT8Ks5J03XXXlbw5AFUpteePiNPF1wVJr0jassR1dkdE\nNyK6nU6nzM0BqNDY8du+yvY1X56XdLukY1UNBqBeZR72r5X0SrGMdbmkv0TE3yqZCkDtxo4/Ij6Q\n9OMKZ5lamdfxh6nzvuE9BOWw1AckRfxAUsQPJEX8QFLEDyRF/EBSVXyqLwWW89pn2P8TlgIHY88P\nJEX8QFLEDyRF/EBSxA8kRfxAUsQPJMU6/4gGrRmXfQ/ANK9Ht/n9D4Nmm+b7vCrs+YGkiB9IiviB\npIgfSIr4gaSIH0iK+IGkWOevQOY14zr/29v8HoKVgD0/kBTxA0kRP5AU8QNJET+QFPEDSRE/kNTQ\n+G3vsb1g+1jfttW2D9g+WXxdVe+YmFa2xz6hXqPs+Z+TtPWSbY9KOhgRGyUdLC4DmCJD44+IQ5LO\nX7J5u6S9xfm9ku6seC4ANRv3Of/aiDhTnP9Y0tqK5gEwIaVf8IvFN3cv+wZv27O252zP9Xq9sjcH\noCLjxn/W9jpJKr4uLHfFiNgdEd2I6HY6nTFvDkDVxo1/n6Sdxfmdkl6tZhwAkzLKUt8Lkv4p6Ye2\nT9m+T9IuSbfZPinpF8VlAFNk6Of5I2LHMt+6teJZ0ICVvJ6e+fcsjIJ3+AFJET+QFPEDSRE/kBTx\nA0kRP5AUv7p7BVjJy3WDsJRXDnt+ICniB5IifiAp4geSIn4gKeIHkiJ+ICnW+afASl3HZ52+Wez5\ngaSIH0iK+IGkiB9IiviBpIgfSIr4gaRY558Cw9bDp/V9AGXn5n0C5bDnB5IifiAp4geSIn4gKeIH\nkiJ+ICniB5IaGr/tPbYXbB/r2/a47dO2DxenbfWOiUEiYtnTSmZ74AmDjbLnf07S1iW2PxURm4vT\n/mrHAlC3ofFHxCFJ5ycwC4AJKvOc/0HbR4qnBasqmwjARIwb/9OSbpC0WdIZSU8sd0Xbs7bnbM/1\ner0xbw5A1caKPyLORsTnEfGFpGckbRlw3d0R0Y2IbqfTGXdOABUbK37b6/ou3iXp2HLXBdBOQz/S\na/sFSbdIWmP7lKTfSLrF9mZJIWle0v01zgigBkPjj4gdS2x+toZZUIOm1/qbXG8fdNtN3y9twDv8\ngKSIH0iK+IGkiB9IiviBpIgfSIpf3Y1alVlS42O59WLPDyRF/EBSxA8kRfxAUsQPJEX8QFLEDyRF\n/EBSxA8kRfxAUsQPJEX8QFLEDyRF/EBSxA8kxef50Rg+r98s9vxAUsQPJEX8QFLEDyRF/EBSxA8k\nRfxAUkPjt73B9uu237V93PZDxfbVtg/YPll8XVX/uCuT7VKnaZ29bhGx7Amj7fkvSnokIjZJ+qmk\nB2xvkvSopIMRsVHSweIygCkxNP6IOBMR7xTnL0g6IWm9pO2S9hZX2yvpzrqGBFC9b/Wc3/aMpBsl\nvSlpbUScKb71saS1lU4GoFYjx2/7akkvSXo4Ij7p/14sPola8omU7Vnbc7bner1eqWEBVGek+G1f\nocXwn4+Il4vNZ22vK76/TtLCUj8bEbsjohsR3U6nU8XMACowyqv9lvSspBMR8WTft/ZJ2lmc3ynp\n1erHA1CXUT7Se5OkeyUdtX242PaYpF2S/mr7PkkfSrq7nhGnX9PLcWW0eXaW7MoZGn9EvCFpub8B\nt1Y7DoBJ4R1+QFLEDyRF/EBSxA8kRfxAUsQPJMWv7q5A3WvhZdaz27xOPwzr+PVizw8kRfxAUsQP\nJEX8QFLEDyRF/EBSxA8kxTr/FGCtHnVgzw8kRfxAUsQPJEX8QFLEDyRF/EBSxA8kxTp/BYatZbd5\nnZ51+LzY8wNJET+QFPEDSRE/kBTxA0kRP5AU8QNJDY3f9gbbr9t+1/Zx2w8V2x+3fdr24eK0rf5x\np1NEtPaEvEZ5k89FSY9ExDu2r5H0tu0Dxfeeiojf1TcegLoMjT8izkg6U5y/YPuEpPV1DwagXt/q\nOb/tGUk3Snqz2PSg7SO299hetczPzNqesz3X6/VKDQugOiPHb/tqSS9JejgiPpH0tKQbJG3W4iOD\nJ5b6uYjYHRHdiOh2Op0KRgZQhZHit32FFsN/PiJelqSIOBsRn0fEF5KekbSlvjEBVG2UV/st6VlJ\nJyLiyb7t6/qudpekY9WPB6Auo7zaf5OkeyUdtX242PaYpB22N0sKSfOS7q9lQgC1GOXV/jckLfWB\n9P3VjwNgUniHH5AU8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJET+QFPEDSRE/kBTxA0kRP5AU8QNJ\neZK/vtl2T9KHfZvWSDo3sQG+nbbO1ta5JGYbV5WzfT8iRvp9eRON/xs3bs9FRLexAQZo62xtnUti\ntnE1NRsP+4GkiB9Iqun4dzd8+4O0dba2ziUx27gama3R5/wAmtP0nh9AQxqJ3/ZW2/+x/b7tR5uY\nYTm2520fLY48PNfwLHtsL9g+1rdtte0Dtk8WX5c8TFpDs7XiyM0Djizd6H3XtiNeT/xhv+3LJL0n\n6TZJpyS9JWlHRLw70UGWYXteUjciGl8Ttv0zSZ9K+lNE/KjY9ltJ5yNiV/EP56qI+FVLZntc0qdN\nH7m5OKDMuv4jS0u6U9Iv1eB9N2Cuu9XA/dbEnn+LpPcj4oOI+EzSi5K2NzBH60XEIUnnL9m8XdLe\n4vxeLf7lmbhlZmuFiDgTEe8U5y9I+vLI0o3edwPmakQT8a+X9FHf5VNq1yG/Q9Jrtt+2Pdv0MEtY\nWxw2XZI+lrS2yWGWMPTIzZN0yZGlW3PfjXPE66rxgt833RwRP5F0h6QHioe3rRSLz9natFwz0pGb\nJ2WJI0t/pcn7btwjXletifhPS9rQd/naYlsrRMTp4uuCpFfUvqMPn/3yIKnF14WG5/lKm47cvNSR\npdWC+65NR7xuIv63JG20fb3tKyXdI2lfA3N8g+2rihdiZPsqSberfUcf3idpZ3F+p6RXG5zla9py\n5Obljiythu+71h3xOiImfpK0TYuv+P9X0q+bmGGZuX4g6V/F6XjTs0l6QYsPA/+nxddG7pP0XUkH\nJZ2U9A9Jq1s0258lHZV0RIuhrWtotpu1+JD+iKTDxWlb0/fdgLkaud94hx+QFC/4AUkRP5AU8QNJ\nET+QFPEDSRE/kBTxA0kRP5DU/wF+1jfp2tE6wgAAAABJRU5ErkJggg==\n",
125 |       "text/plain": [
126 |        "<matplotlib.figure.Figure at 0x7f518d26f240>"
127 |       ]
128 |      },
129 |      "metadata": {},
130 |      "output_type": "display_data"
131 |     }
132 |    ],
133 |    "source": [
134 |     "rbm_demo_utils.display_image(example.data[0])"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 9,
140 |    "metadata": {},
141 |    "outputs": [
142 |     {
143 |      "name": "stdout",
144 |      "output_type": "stream",
145 |      "text": [
146 |       "3\n"
147 |      ]
148 |     },
149 |     {
150 |      "data": {
151 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAC41JREFUeJzt3U/IHPUdx/HPp/65qIek2YYQYx8r\noRAKjWUJBaVYrBJziV7EHCQF4fGgoOChYg/1GEpVeihCrMG0WKWgYg6hNQ1CEIr4KGn+mNZYecSE\nmGdDDsaTjX57eEZZ4/PsrjszO7PP9/2CYWd/O8/O95k8n8zs/Gb254gQgHy+03QBAJpB+IGkCD+Q\nFOEHkiL8QFKEH0iK8ANJEX4gKcIPJHX5JFe2Zs2amJmZmeQqgVTm5+d17tw5j7JsqfDb3irp95Iu\nk/THiNg1aPmZmRnNzc2VWSWAAbrd7sjLjn3Yb/sySX+QdIekTZJ22N407vsBmKwyn/m3SHo/Ij6I\niM8kvShpezVlAahbmfCvl/RR3/NTRdvX2J61PWd7rtfrlVgdgCrVfrY/InZHRDciup1Op+7VARhR\nmfCflrSh7/m1RRuAKVAm/G9J2mj7ettXSrpH0r5qygJQt7G7+iLiou0HJf1di119eyLieGWVAahV\nqX7+iNgvaX9FtQCYIC7vBZIi/EBShB9IivADSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJ\nEX4gKcIPJEX4gaQIP5AU4QeSIvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBThB5Ii/EBShB9IivAD\nSRF+IKlSo/Tanpd0QdLnki5GRLeKotrIdm3vHRGtXTdWrlLhL/w8Is5V8D4AJojDfiCpsuEPSa/Z\nftv2bBUFAZiMsof9N0fEadvfk3TA9r8j4lD/AsV/CrOSdN1115VcHYCqlNrzR8Tp4nFB0iuStiyx\nzO6I6EZEt9PplFkdgAqNHX7bV9m+5st5SbdLOlZVYQDqVeawf62kV4puqMsl/SUi/lZJVQBqN3b4\nI+IDST+usJZS6uwLr9s01z7MoN+t7usbuIZhMLr6gKQIP5AU4QeSIvxAUoQfSIrwA0lVcVdfCoO6\njVZyV90wZX73Jrvyhq07Qzche34gKcIPJEX4gaQIP5AU4QeSIvxAUoQfSGrF9PM32S9b97ozX0dQ\nl7K3E6+E6wDY8wNJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUiumn38lWwl9ytMmwzZnzw8kRfiBpAg/\nkBThB5Ii/EBShB9IivADSQ0Nv+09thdsH+trW237gO2TxeOqestEU2wPnKhteo2y539O0tZL2h6V\ndDAiNko6WDwHMEWGhj8iDkk6f0nzdkl7i/m9ku6suC4ANRv3M//aiDhTzH8saW1F9QCYkNIn/GLx\nIuhlL4S2PWt7zvZcr9cruzoAFRk3/Gdtr5Ok4nFhuQUjYndEdCOi2+l0xlwdgKqNG/59knYW8zsl\nvVpNOQAmZZSuvhck/VPSD22fsn2fpF2SbrN9UtIviucApsjQ+/kjYscyL91acS1owDT3h5f57v0M\n9+sPwxV+QFKEH0iK8ANJEX4gKcIPJEX4gaT46u7CSh2SeSV35dX98ysde34gKcIPJEX4gaQIP5AU\n4QeSIvxAUoQfSIp+/hE1eXvoNPfVo73Y8wNJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUvTzF9p873eZ\nr6ieZiv1Oxbagj0/kBThB5Ii/EBShB9IivADSRF+ICnCDyQ1NPy299hesH2sr+1x26dtHy6mbfWW\niUEiYtmpzvduup/d9tgTRtvzPydp6xLtT0XE5mLaX21ZAOo2NPwRcUjS+QnUAmCCynzmf9D2keJj\nwarKKgIwEeOG/2lJN0jaLOmMpCeWW9D2rO0523O9Xm/M1QGo2ljhj4izEfF5RHwh6RlJWwYsuzsi\nuhHR7XQ649YJoGJjhd/2ur6nd0k6ttyyANpp6C29tl+QdIukNbZPSfqNpFtsb5YUkuYl3V9jjQBq\nMDT8EbFjieZna6hlxWryvvSy7122T3zQ+ulvbxZX+AFJEX4gKcIPJEX4gaQIP5AU4QeS4qu7W6DO\n7rSymnxvugLrxZ4fSIrwA0kRfiApwg8kRfiBpAg/kBThB5Kin78Fmv4K7Izqvs16GoYXZ88PJEX4\ngaQIP5AU4QeSIvxAUoQfSIrwA0nRzz8BbejTnUbTfL//NPybs+cHkiL8QFKEH0iK8ANJEX4gKcIP\nJEX4gaSGht/2Btuv237X9nHbDxXtq20fsH2yeFxVf7krk+1SEzCOUfb8FyU9EhGbJP1U0gO2N0l6\nVNLBiNgo6WDxHMCUGBr+iDgTEe8U8xcknZC0XtJ2SXuLxfZKurOuIgFU71t95rc9I+lGSW9KWhsR\nZ4qXPpa0ttLKANRq5PDbvlrSS5IejohP+l+LxQuZl7yY2fas7Tnbc71er1SxAKozUvhtX6HF4D8f\nES8XzWdtryteXydpYamfjYjdEdGNiG6n06miZgAVGOVsvyU9K+lERDzZ99I+STuL+Z2SXq2+PAB1\nGeWW3psk3SvpqO3DRdtjknZJ+qvt+yR9KOnuekqcfnTHNWMabqtt0tDwR8Qbkpb767212nIATApX\n+AFJEX4gKcIPJEX4gaQIP5AU4QeS4qu7p0CT/dVlr1Gos3b68cthzw8kRfiBpAg/kBThB5Ii/EBS\nhB9IivADSdHPPwX4PgDUgT0/kBThB5Ii/EBShB9IivADSRF+ICnCDyRFP38FhvXDl73vvM39/NxT\nP73Y8wNJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUkPDb3uD7ddtv2v7uO2HivbHbZ+2fbiYttVfbjtF\nxMCpze8/7L3r/t3QnFEu8rko6ZGIeMf2NZLetn2geO2piPhdfeUBqMvQ8EfEGUlnivkLtk9IWl93\nYQDq9a0+89uekXSjpDeLpgdtH7G9x/aqZX5m1vac7bler1eqWADVGTn8tq+W9JKkhyPiE0lPS7pB\n0mYtHhk8sdTPRcTuiOhGRLfT6VRQMoAqjBR+21doMfjPR8TLkhQRZyPi84j4QtIzkrbUVyaAqo1y\ntt+SnpV0IiKe7Gtf17fYXZKOVV8egLqMcrb/Jkn3Sjpq+3DR9pikHbY3SwpJ85Lur6VCDEWXG8Yx\nytn+NyQtdUP5/urLATApXOEHJEX4gaQIP5AU4QeSIvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBTh\nB5Ii/EBShB9IypO8F9x2T9KHfU1rJJ2bWAHfTltra2tdErWNq8ravh8RI31f3kTD/42V23MR0W2s\ngAHaWltb65KobVxN1cZhP5AU4QeSajr8uxte/yBtra2tdUnUNq5Gamv0Mz+A5jS95wfQkEbCb3ur\n7f/Yft/2o03UsBzb87aPFiMPzzVcyx7bC7aP9bWttn3A9snicclh0hqqrRUjNw8YWbrRbde2Ea8n\nfthv+zJJ70m6TdIpSW9J2hER7060kGXYnpfUjYjG+4Rt/0zSp5L+FBE/Ktp+K+l8ROwq/uNcFRG/\nakltj0v6tOmRm4sBZdb1jywt6U5Jv1SD225AXXerge3WxJ5/i6T3I+KDiPhM0ouStjdQR+tFxCFJ\n5y9p3i5pbzG/V4t/PBO3TG2tEBFnIuKdYv6CpC9Hlm502w2oqxFNhH+9pI/6np9Su4b8Dkmv2X7b\n9mzTxSxhbTFsuiR9LGltk8UsYejIzZN0ycjSrdl244x4XTVO+H3TzRHxE0l3SHqgOLxtpVj8zNam\n7pqRRm6elCVGlv5Kk9tu3BGvq9ZE+E9L2tD3/NqirRUi4nTxuCDpFbVv9OGzXw6SWjwuNFzPV9o0\ncvNSI0urBduuTSNeNxH+tyRttH297Ssl3SNpXwN1fIPtq4oTMbJ9laTb1b7Rh/dJ2lnM75T0aoO1\nfE1bRm5ebmRpNbztWjfidURMfJK0TYtn/P8r6ddN1LBMXT+Q9K9iOt50bZJe0OJh4P+0eG7kPknf\nlXRQ0klJ/5C0ukW1/VnSUUlHtBi0dQ3VdrMWD+mPSDpcTNua3nYD6mpku3GFH5AUJ/yApAg/kBTh\nB5Ii/EBShB9IivADSRF+ICnCDyT1fxGxXsp5h/5nAAAAAElFTkSuQmCC\n",
152 |       "text/plain": [
153 |        "<matplotlib.figure.Figure at 0x7f518822fc50>"
154 |       ]
155 |      },
156 |      "metadata": {},
157 |      "output_type": "display_data"
158 |     }
159 |    ],
160 |    "source": [
161 |     "rbm_demo_utils.display_image(example_ev.data[0])"
162 |    ]
163 |   }
164 |  ],
165 |  "metadata": {
166 |   "kernelspec": {
167 |    "display_name": "Python 3",
168 |    "language": "python",
169 |    "name": "python3"
170 |   },
171 |   "language_info": {
172 |    "codemirror_mode": {
173 |     "name": "ipython",
174 |     "version": 3
175 |    },
176 |    "file_extension": ".py",
177 |    "mimetype": "text/x-python",
178 |    "name": "python",
179 |    "nbconvert_exporter": "python",
180 |    "pygments_lexer": "ipython3",
181 |    "version": "3.5.2"
182 |   }
183 |  },
184 |  "nbformat": 4,
185 |  "nbformat_minor": 2
186 | }
187 | 


--------------------------------------------------------------------------------
/recitation-11/__pycache__/rbm_demo_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-11/__pycache__/rbm_demo_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/recitation-11/__pycache__/rbm_models.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-11/__pycache__/rbm_models.cpython-35.pyc


--------------------------------------------------------------------------------
/recitation-11/data/processed/test.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-11/data/processed/test.pt


--------------------------------------------------------------------------------
/recitation-11/data/raw/t10k-images-idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-11/data/raw/t10k-images-idx3-ubyte


--------------------------------------------------------------------------------
/recitation-11/data/raw/t10k-labels-idx1-ubyte:
--------------------------------------------------------------------------------
1 |     ' 		 	 		   		  				 						   		  	  	 				 		 		  					      			  	 	   		 			 		   		 			 		 	 		 	   		 	  	   	 		  	 				    					  	    	  		 		  	 	   	 		 	   			 			  	  		 	 	 	 		 		 			 			 	 	 	 	  	   	   	 	 		   		   						 	 		 	    	 						 	 		 	  		 		 		  	   			 	    			   	 		 				 			 		  	  	  	 		       				  	  			   			 		 	 			   	 	 	 	 				   		   	  		   		 	 	  	 	  		  		 								   		 		    				   	  	 	 	 		 			  	 			   			 	 	  			  	 			  	   		  	    		 			     	 			  		    	  		  	    						 			  					 	 		   	 		 			   				    		   			    					    	 	    		      		        	     	 				 		   	     						  			 				 		 	 	   			  	      		  		   	 			   			   	 		    		 			  					  	 				 					   			  	 			 	 		 						 	  	 	    	   	  								  	  		 				  	 			 			 				 	  		 										 		  	 	   	 	 	 	   								 		  	     			 				   	 		 							 	  	   	  		 				 	 								  	  	 		  	 		  	  					  	 	  				   		   						  		  						   	 	 	 		 	 		   	 		  	  	 					 	  	      			 	 	 	   		  	  		   					 	 	 	 	   	  	    		   	 	  		 	 			 	  	 	 				    		 	    			 	 	   		 	 				 		  	    	  	 		 	 		 	 	   	 	 	 		 	    		   			 		 	 	 	  	  		     	 				 	 	 	 					 		        		 	     	 		  						    	 	 	     	 				 			    		 		    	  		 		 	 	 				 		 	 	 		      		  	 		  	 	 	    	 		  							    	 	 	  	     							  		  	 	 	  				 		 	 	 	   	 	 	    	 	 					 	   	 	 	 		 	  	 		   		 		  	    			 	 		 			  	 	 	   	 	 					 	   	 	 			 		  		  	 	   	  	 	 		  	 			 	  		  	  	 		 	  	 	  		   	 	 	 		   				  	  	 		   	 	 	 	 		   	 	 		    			   		 	 	 	 	 	 	 	  		 		 		     	 		 	 	  	 	 			 	 	 		    			  	 	 	 	    		 		    	  		 		 	 	 				    		 	    			 	 	 	 			 	       		 				 	 	 			 				    		   			   		 	 	 			    			  	 	  	 	 				       					 	  			  	  	   	 	 	 	  		 	 		 	      				 	 	 	   		 	 					 	 	 	    		  	  		   		 	 	 	 		    				   	  	 		 	   		 	  		 	 	     	 	 


--------------------------------------------------------------------------------
/recitation-11/data/raw/train-labels-idx1-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-11/data/raw/train-labels-idx1-ubyte


--------------------------------------------------------------------------------
/recitation-11/net/hopfield/model.pytorch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-11/net/hopfield/model.pytorch


--------------------------------------------------------------------------------
/recitation-11/rbm_demo_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import torch
  4 | import torch.utils
  5 | import torchvision.datasets as datasets
  6 | import torchvision.transforms as transforms
  7 | from inferno.trainers.basic import Trainer
  8 | import matplotlib.pyplot as plt
  9 | import rbm_models
 10 | 
 11 | 
 12 | class MNIST(torch.utils.data.Dataset):
 13 |     def __init__(self, max_len=-1, include_label=True):
 14 |         super().__init__()
 15 |         self.mnist = datasets.MNIST(
 16 |             root='./data', train=True,
 17 |             download=True, transform=transforms.ToTensor())
 18 |         self.max_len = max_len
 19 |         self.include_label = include_label
 20 | 
 21 |     def __len__(self):
 22 |         if self.max_len < 0:
 23 |             return len(self.mnist)
 24 |         else:
 25 |             return self.max_len
 26 | 
 27 |     def __getitem__(self, idx):
 28 |         (img, label) = self.mnist[idx]
 29 |         img = img.view(-1)
 30 |         img = rbm_models.discretize(rbm_models.rescale(img))
 31 |         if not self.include_label:
 32 |             return (img, label)
 33 |         label_onehot = img.new(10).fill_(0)
 34 |         label_onehot[label] = 1
 35 |         label_onehot = rbm_models.rescale(label_onehot)
 36 |         result = torch.cat([img, label_onehot], dim=0)
 37 |         return (result, label)
 38 | 
 39 | 
 40 | class IdentityLoss(torch.nn.Module):
 41 |     def forward(self, x, _):
 42 |         return x
 43 | 
 44 | 
 45 | class LossPrinter(torch.nn.Module):
 46 |     def __init__(self, criterion):
 47 |         super().__init__()
 48 |         self.criterion = criterion
 49 | 
 50 |     def forward(self, *args, **kwargs):
 51 |         loss = self.criterion(*args, **kwargs)
 52 |         print("Loss: %f" % loss)
 53 |         return loss
 54 | 
 55 | 
 56 | def train(net, dataset, criterion, num_epochs,
 57 |           batch_size, learn_rate, dir_name):
 58 |     dir_name = os.path.join('net/', dir_name)
 59 |     trainer = Trainer(net[0])
 60 | 
 61 |     if (os.path.exists(os.path.join(dir_name, 'model.pytorch'))):
 62 |         net_temp = trainer.load_model(dir_name).model
 63 |         net[0].load_state_dict(net_temp.state_dict())
 64 |         print("Loaded checkpoint directly")
 65 |     else:
 66 |         if (not os.path.exists(dir_name)):
 67 |             os.makedirs(dir_name)
 68 |         data_loader = torch.utils.data.DataLoader(
 69 |             dataset, shuffle=True, batch_size=batch_size)
 70 |         net[0].train()
 71 | 
 72 |         trainer \
 73 |             .build_criterion(LossPrinter(criterion)) \
 74 |             .bind_loader('train', data_loader) \
 75 |             .build_optimizer('Adam', lr=learn_rate) \
 76 |             .set_max_num_epochs(num_epochs)
 77 | 
 78 |         if torch.cuda.is_available():
 79 |             trainer.cuda()
 80 | 
 81 |         trainer.fit()
 82 |         trainer.save_model(dir_name)
 83 |     net[0].cpu()
 84 |     net[0].eval()
 85 | 
 86 | 
 87 | def display_image(arr):
 88 |     width = int(np.sqrt(arr.size()[0]))
 89 |     label_onehot = arr[-10:]
 90 |     arr = (arr[:-10] + 1) / 2
 91 |     arr = arr.cpu().view(width, -1).numpy()
 92 |     plt.figure()
 93 |     plt.imshow(1.0 - arr, cmap='gray')
 94 |     _, pos = torch.max(label_onehot, 0)
 95 |     print(pos[0])
 96 | 
 97 | 
 98 | def display_reconstruction(net, dataset):
 99 |     (image, _) = dataset[np.random.randint(len(dataset))]
100 |     display_image(image)
101 |     image = torch.autograd.Variable(image).unsqueeze(dim=0)
102 |     reconst = net.decode(net.encode(image)).data[0]
103 |     display_image(reconst)
104 | 


--------------------------------------------------------------------------------
/recitation-11/rbm_models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | FLIP_PROB = 0.5
  4 | 
  5 | 
  6 | def rescale(x):
  7 |     return 2 * x - 1
  8 | 
  9 | 
 10 | def gumbel_noise_logits(template):
 11 |     canvas = torch.zeros_like(template)
 12 |     canvas.uniform_()
 13 |     return -torch.log(canvas)
 14 | 
 15 | 
 16 | def noisy_fields(fields, temperature):
 17 |     add_noise = gumbel_noise_logits(fields)
 18 |     sub_noise = gumbel_noise_logits(fields)
 19 |     return fields + (add_noise - sub_noise) * temperature
 20 | 
 21 | 
 22 | def discretize(values):
 23 |     return rescale((values > 0).float())
 24 | 
 25 | 
 26 | def _update(values, fields):
 27 |     products = values * fields
 28 |     rands = rescale(torch.zeros_like(values).uniform_())
 29 |     multipliers = -rescale(((products < 0) * (rands < FLIP_PROB)).float())
 30 |     values *= multipliers
 31 | 
 32 | 
 33 | class AbstractLoopNet(torch.nn.Module):
 34 |     def __init__(self, num_units):
 35 |         super().__init__()
 36 |         self.num_units = num_units
 37 |         self.weights = None
 38 | 
 39 |     def init_weights(self, fix_weights=True):
 40 |         torch.nn.init.xavier_normal(self.weights)
 41 |         if fix_weights:
 42 |             self.fix_weights()
 43 | 
 44 |     def fix_weights(self):
 45 |         weights = self.weights.data.clone()
 46 |         mask = (1 - torch.ones_like(weights[0]).diag())
 47 |         weights = weights + torch.transpose(weights, 0, 1)
 48 |         weights = mask * (weights / 2)
 49 |         self.weights.data.copy_(weights)
 50 | 
 51 |     def compute_fields(self, vals):
 52 |         raise NotImplementedError("Abstract function")
 53 | 
 54 |     def compute_energy(self, vals):
 55 |         fields = self.compute_fields(vals)
 56 |         return -0.5 * torch.sum(vals.data * fields.data, dim=1)
 57 | 
 58 |     def update(self, vals, fields):
 59 |         raise NotImplementedError("Abstract function")
 60 | 
 61 |     def run_iters(self, in_vals, num_iters):
 62 |         vals = in_vals.clone()
 63 |         for i in range(num_iters):
 64 |             fields = self.compute_fields(vals)
 65 |             self.update(vals.data, fields.data)
 66 |         return vals
 67 | 
 68 |     def forward(self, in_vals, num_iters):
 69 |         return self.run_iters(in_vals, num_iters)
 70 | 
 71 |     def inference(self, in_vals, num_iters, num_inference_iters):
 72 |         vals = self.run_iters(in_vals, num_iters)
 73 | 
 74 |         vals_avg = torch.zeros_like(vals)
 75 |         for i in range(num_inference_iters):
 76 |             fields = self.compute_fields(vals)
 77 |             self.update(vals.data, fields.data)
 78 |             vals_avg.data += vals.data
 79 |         vals_avg = vals_avg / num_inference_iters
 80 |         return discretize(vals_avg)
 81 | 
 82 | 
 83 | class HopfieldNet(AbstractLoopNet):
 84 |     def __init__(self, num_units):
 85 |         super().__init__(num_units)
 86 |         self.weights = torch.nn.Parameter(
 87 |             torch.Tensor(num_units, num_units))
 88 |         self.init_weights()
 89 | 
 90 |     def compute_fields(self, vals):
 91 |         return torch.matmul(
 92 |             self.weights.unsqueeze(dim=0),
 93 |             vals.unsqueeze(dim=2))[:, :, 0]
 94 | 
 95 |     def update(self, vals, fields):
 96 |         _update(vals, fields)
 97 | 
 98 | 
 99 | class StochasticHopfieldNet(AbstractLoopNet):
100 |     def __init__(self, num_units, temperature):
101 |         super().__init__(num_units)
102 |         self.weights = torch.nn.Parameter(
103 |             torch.Tensor(num_units, num_units))
104 |         self.temperature = temperature
105 |         self.init_weights()
106 | 
107 |     def compute_fields(self, vals):
108 |         return torch.matmul(
109 |             self.weights.unsqueeze(dim=0),
110 |             vals.unsqueeze(dim=2))[:, :, 0]
111 | 
112 |     def update(self, vals, fields):
113 |         _update(vals, noisy_fields(fields, self.temperature))
114 | 
115 | 
116 | class BoltzmannMachine(AbstractLoopNet):
117 |     def __init__(self, num_units, num_hidden,
118 |                  temperature, hidden_setup_iters):
119 |         super().__init__(num_units)
120 |         self.num_hidden = num_hidden
121 |         self.weights = torch.nn.Parameter(
122 |             torch.Tensor(num_units + num_hidden, num_units + num_hidden))
123 |         self.temperature = temperature
124 |         self.hidden_setup_iters = hidden_setup_iters
125 |         self.init_weights()
126 | 
127 |     def compute_fields(self, vals):
128 |         return torch.matmul(
129 |             self.weights.unsqueeze(dim=0),
130 |             vals.unsqueeze(dim=2))[:, :, 0]
131 | 
132 |     def update(self, vals, fields, include_visible=True):
133 |         old_vals = vals[:, :self.num_units].clone()
134 |         _update(vals, noisy_fields(fields, self.temperature))
135 | 
136 |         if not include_visible:
137 |             vals[:, :self.num_units] = old_vals
138 | 
139 |     def expand_data(self, in_visible_vals, num_samples):
140 |         batch_size = in_visible_vals.size()[0]
141 |         visible_vals = discretize(in_visible_vals)
142 |         hidden_vals = torch.autograd.Variable(
143 |             visible_vals.data.new(batch_size, self.num_hidden))
144 |         vals_list = []
145 | 
146 |         for i in range(num_samples):
147 |             hidden_vals.data.uniform_()
148 |             hidden_vals.data.copy_(discretize(rescale(hidden_vals.data)))
149 |             vals = torch.cat([visible_vals, hidden_vals], dim=1)
150 |             for i in range(self.hidden_setup_iters):
151 |                 fields = self.compute_fields(vals)
152 |                 self.update(vals.data, fields.data, False)
153 |             vals_list.append(vals)
154 |         return torch.cat(vals_list, dim=0)
155 | 
156 | 
157 | class RestrictedBoltzmannMachine(AbstractLoopNet):
158 |     def __init__(self, num_units, num_hidden, temperature):
159 |         super().__init__(num_units)
160 |         self.num_hidden = num_hidden
161 |         self.weights = torch.nn.Parameter(
162 |             torch.Tensor(num_hidden, num_units))
163 |         self.temperature = temperature
164 |         self.init_weights(fix_weights=False)
165 | 
166 |     def compute_fields(self, vals):
167 |         hidden = torch.matmul(
168 |             self.weights.unsqueeze(dim=0),
169 |             vals[:, :self.num_units].unsqueeze(dim=2))[:, :, 0]
170 |         visible = torch.matmul(
171 |             torch.transpose(self.weights, 0, 1).unsqueeze(dim=0),
172 |             vals[:, self.num_units:].unsqueeze(dim=2))[:, :, 0]
173 |         return torch.cat([visible, hidden], dim=1)
174 | 
175 |     def update(self, vals, fields):
176 |         _update(vals, noisy_fields(fields, self.temperature))
177 | 
178 |     def expand_data(self, in_visible_vals):
179 |         batch_size = in_visible_vals.size()[0]
180 |         visible_vals = discretize(in_visible_vals)
181 |         hidden_vals = torch.autograd.Variable(
182 |             visible_vals.data.new(batch_size, self.num_hidden).fill_(1))
183 | 
184 |         vals = torch.cat([visible_vals, hidden_vals], dim=1)
185 |         fields = self.compute_fields(vals)
186 |         self.update(vals.data, fields.data)
187 |         vals[:, :self.num_units] = in_visible_vals
188 |         return vals
189 | 
190 | 
191 | class HopfieldTrainWrapper(torch.nn.Module):
192 |     def __init__(self, net, num_evolve_iters):
193 |         super().__init__()
194 |         assert isinstance(net, HopfieldNet)
195 |         self.net = net
196 |         self.num_evolve_iters = num_evolve_iters
197 | 
198 |     def forward(self, in_vals):
199 |         self.net.fix_weights()
200 |         batch_size = in_vals.size()[0]
201 |         evolved_vals = self.net(in_vals, self.num_evolve_iters)
202 | 
203 |         positive_term = 1 / batch_size * torch.matmul(
204 |             torch.transpose(in_vals, 0, 1), in_vals)
205 |         negative_term = 1 / batch_size * torch.matmul(
206 |             torch.transpose(evolved_vals, 0, 1), evolved_vals)
207 |         weight_neg_grad = -(positive_term - negative_term)
208 | 
209 |         result = (self.net.weights *
210 |                   weight_neg_grad).sum() / self.net.num_units
211 |         if (result.data[0] < 0):
212 |             return result * 0
213 |         else:
214 |             return result
215 | 
216 | 
217 | class StochasticHopfieldTrainWrapper(torch.nn.Module):
218 |     def __init__(self, net, num_rand_samples, num_rand_sample_iters):
219 |         super().__init__()
220 |         assert isinstance(net, StochasticHopfieldNet)
221 |         self.net = net
222 |         self.num_rand_samples = num_rand_samples
223 |         self.num_rand_sample_iters = num_rand_sample_iters
224 | 
225 |     def forward(self, in_vals):
226 |         self.net.fix_weights()
227 |         batch_size = in_vals.size()[0]
228 |         rand_starts = torch.autograd.Variable(discretize(rescale(
229 |             torch.rand(self.num_rand_samples, self.net.num_units))))
230 |         sample_vals = self.net(rand_starts, self.num_rand_sample_iters)
231 | 
232 |         positive_term = 1 / batch_size * torch.matmul(
233 |             torch.transpose(in_vals, 0, 1), in_vals)
234 |         negative_term = 1 / self.num_rand_samples * torch.matmul(
235 |             torch.transpose(sample_vals, 0, 1), sample_vals)
236 |         weight_neg_grad = -(positive_term - negative_term)
237 | 
238 |         result = (self.net.weights *
239 |                   weight_neg_grad).sum() / self.net.num_units
240 |         if (result.data[0] < 0):
241 |             return result * 0
242 |         else:
243 |             return result
244 | 
245 | 
246 | class BoltzmannTrainWrapper(torch.nn.Module):
247 |     def __init__(self, net, num_train_samples,
248 |                  num_rand_samples, num_rand_sample_iters):
249 |         super().__init__()
250 |         assert isinstance(net, BoltzmannMachine)
251 |         self.net = net
252 |         self.num_train_samples = num_train_samples
253 |         self.num_rand_samples = num_rand_samples
254 |         self.num_rand_sample_iters = num_rand_sample_iters
255 | 
256 |     def forward(self, in_visible_vals):
257 |         self.net.fix_weights()
258 |         batch_size = in_visible_vals.size()[0]
259 |         batch_size_expand = batch_size * self.num_train_samples
260 |         num_total_units = self.net.num_units + self.net.num_hidden
261 |         in_vals = self.net.expand_data(
262 |             in_visible_vals, self.num_train_samples)
263 | 
264 |         rand_starts = torch.autograd.Variable(discretize(
265 |             rescale(torch.rand(self.num_rand_samples, num_total_units))))
266 |         sample_vals = self.net(rand_starts, self.num_rand_sample_iters)
267 | 
268 |         positive_term = 1 / batch_size_expand * torch.matmul(
269 |             torch.transpose(in_vals, 0, 1), in_vals)
270 |         negative_term = 1 / self.num_rand_samples * torch.matmul(
271 |             torch.transpose(sample_vals, 0, 1), sample_vals)
272 |         weight_neg_grad = -(positive_term - negative_term)
273 | 
274 |         result = (self.net.weights *
275 |                   weight_neg_grad).sum() / num_total_units
276 |         if (result.data[0] < 0):
277 |             return result * 0
278 |         else:
279 |             return result
280 | 
281 | 
282 | class RestrictedBoltzmannTrainWrapper(torch.nn.Module):
283 |     def __init__(self, net, num_evolve_iters):
284 |         super().__init__()
285 |         assert isinstance(net, RestrictedBoltzmannMachine)
286 |         self.net = net
287 |         self.num_evolve_iters = num_evolve_iters
288 | 
289 |     def forward(self, in_visible_vals):
290 |         batch_size = in_visible_vals.size()[0]
291 |         in_vals = self.net.expand_data(in_visible_vals)
292 |         evolved_vals = self.net(in_vals, self.num_evolve_iters)
293 | 
294 |         positive_term = 1 / batch_size * torch.matmul(
295 |             torch.transpose(in_vals[:, self.net.num_units:], 0, 1),
296 |             in_vals[:, :self.net.num_units])
297 |         negative_term = 1 / batch_size * torch.matmul(
298 |             torch.transpose(evolved_vals[:, self.net.num_units:], 0, 1),
299 |             evolved_vals[:, :self.net.num_units])
300 |         weight_neg_grad = -(positive_term - negative_term)
301 | 
302 |         avg_units = (self.net.num_units + self.net.num_hidden) / 2
303 |         result = (self.net.weights *
304 |                   weight_neg_grad).sum() / avg_units
305 |         if (result.data[0] < 0):
306 |             return result * 0
307 |         else:
308 |             return result
309 | 


--------------------------------------------------------------------------------
/recitation-2/Tutorial-pytorch.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pytorch Tutorial"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Pytorch is a python framework for machine learning\n",
 15 |     "\n",
 16 |     "- GPU-accelerated computations\n",
 17 |     "- automatic differentiation\n",
 18 |     "- modules for neural networks\n",
 19 |     "\n",
 20 |     "This tutorial will teach you the fundamentals of operating on pytorch tensors. For a worked example of how to build and train a pytorch network, see `pytorch-example.py`.\n",
 21 |     "\n",
 22 |     "For additional tutorials, see http://pytorch.org/tutorials/"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 26,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import torch\n",
 32 |     "import numpy as np\n",
 33 |     "from torch.autograd import Variable"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## Tensors"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "Tensors are the fundamental object for array data. The most common types you will use are `IntTensor` and `FloatTensor`."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 27,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "\n",
 60 |       " 1.4406e+06  4.5734e-41 -6.3731e-27\n",
 61 |       " 4.5733e-41  1.3011e-37  0.0000e+00\n",
 62 |       "[torch.FloatTensor of size 2x3]\n",
 63 |       "\n",
 64 |       "\n",
 65 |       " 0  0  0\n",
 66 |       " 0  0  0\n",
 67 |       "[torch.FloatTensor of size 2x3]\n",
 68 |       "\n"
 69 |      ]
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "# Create uninitialized tensor\n",
 74 |     "x = torch.FloatTensor(2,3)\n",
 75 |     "print(x)\n",
 76 |     "# Initialize to zeros\n",
 77 |     "x.zero_()\n",
 78 |     "print(x)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 28,
 84 |    "metadata": {},
 85 |    "outputs": [
 86 |     {
 87 |      "name": "stdout",
 88 |      "output_type": "stream",
 89 |      "text": [
 90 |       "\n",
 91 |       " 0.6965  0.2861  0.2269\n",
 92 |       " 0.5513  0.7195  0.4231\n",
 93 |       "[torch.FloatTensor of size 2x3]\n",
 94 |       "\n",
 95 |       "\n",
 96 |       " 0.6965  0.2861  0.2269\n",
 97 |       " 0.5513  0.7195  0.4231\n",
 98 |       "[torch.DoubleTensor of size 2x3]\n",
 99 |       "\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "# Create from numpy array (seed for repeatability)\n",
105 |     "np.random.seed(123)\n",
106 |     "np_array = np.random.random((2,3))\n",
107 |     "print(torch.FloatTensor(np_array))\n",
108 |     "print(torch.from_numpy(np_array))"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 29,
114 |    "metadata": {},
115 |    "outputs": [
116 |     {
117 |      "name": "stdout",
118 |      "output_type": "stream",
119 |      "text": [
120 |       "\n",
121 |       "-0.5214 -1.4914 -0.2381\n",
122 |       " 1.0306  0.2221  1.5162\n",
123 |       "[torch.FloatTensor of size 2x3]\n",
124 |       "\n"
125 |      ]
126 |     }
127 |    ],
128 |    "source": [
129 |     "# Create random tensor (seed for repeatability)\n",
130 |     "torch.manual_seed(123)\n",
131 |     "x=torch.randn(2,3)\n",
132 |     "print(x)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 30,
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "name": "stdout",
142 |      "output_type": "stream",
143 |      "text": [
144 |       "\n",
145 |       " 1  0  0\n",
146 |       " 0  1  0\n",
147 |       " 0  0  1\n",
148 |       "[torch.FloatTensor of size 3x3]\n",
149 |       "\n",
150 |       "\n",
151 |       " 1  1  1\n",
152 |       " 1  1  1\n",
153 |       "[torch.FloatTensor of size 2x3]\n",
154 |       "\n",
155 |       "\n",
156 |       " 0  0  0\n",
157 |       " 0  0  0\n",
158 |       "[torch.FloatTensor of size 2x3]\n",
159 |       "\n",
160 |       "\n",
161 |       " 0\n",
162 |       " 1\n",
163 |       " 2\n",
164 |       "[torch.FloatTensor of size 3]\n",
165 |       "\n"
166 |      ]
167 |     }
168 |    ],
169 |    "source": [
170 |     "# special tensors (see documentation)\n",
171 |     "print(torch.eye(3))\n",
172 |     "print(torch.ones(2,3))\n",
173 |     "print(torch.zeros(2,3))\n",
174 |     "print(torch.arange(0,3))"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "markdown",
179 |    "metadata": {},
180 |    "source": [
181 |     "All tensors have a `size` and `type`"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 31,
187 |    "metadata": {},
188 |    "outputs": [
189 |     {
190 |      "name": "stdout",
191 |      "output_type": "stream",
192 |      "text": [
193 |       "torch.Size([3, 4])\n",
194 |       "torch.FloatTensor\n"
195 |      ]
196 |     }
197 |    ],
198 |    "source": [
199 |     "x=torch.FloatTensor(3,4)\n",
200 |     "print(x.size())\n",
201 |     "print(x.type())"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "markdown",
206 |    "metadata": {},
207 |    "source": [
208 |     "## CPU and GPU"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "markdown",
213 |    "metadata": {},
214 |    "source": [
215 |     "Tensors can be copied between CPU and GPU. It is important that everything involved in a calculation is on the same device. \n",
216 |     "\n",
217 |     "This portion of the tutorial may not work for you if you do not have a GPU available."
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": 32,
223 |    "metadata": {},
224 |    "outputs": [
225 |     {
226 |      "name": "stdout",
227 |      "output_type": "stream",
228 |      "text": [
229 |       "\n",
230 |       " 0.5513  0.7192\n",
231 |       " 0.7195  0.4911\n",
232 |       " 0.4231  0.7800\n",
233 |       "[torch.FloatTensor of size 3x2]\n",
234 |       "\n",
235 |       "\n",
236 |       " 0.5513  0.7192\n",
237 |       " 0.7195  0.4911\n",
238 |       " 0.4231  0.7800\n",
239 |       "[torch.cuda.FloatTensor of size 3x2 (GPU 0)]\n",
240 |       "\n",
241 |       "\n",
242 |       " 0.5513  0.7192\n",
243 |       " 0.7195  0.4911\n",
244 |       " 0.4231  0.7800\n",
245 |       "[torch.FloatTensor of size 3x2]\n",
246 |       "\n",
247 |       "[[ 0.55131477  0.7191503 ]\n",
248 |       " [ 0.71946895  0.49111894]\n",
249 |       " [ 0.42310646  0.78002775]]\n",
250 |       "can't convert CUDA tensor to numpy (it doesn't support GPU arrays). Use .cpu() to move the tensor to host memory first.\n"
251 |      ]
252 |     }
253 |    ],
254 |    "source": [
255 |     "# create a tensor\n",
256 |     "x = torch.rand(3,2)\n",
257 |     "print(x)\n",
258 |     "# copy to GPU\n",
259 |     "y = x.cuda()\n",
260 |     "print(y)\n",
261 |     "# copy back to CPU\n",
262 |     "z = y.cpu()\n",
263 |     "print(z)\n",
264 |     "# get CPU tensor as numpy array\n",
265 |     "print(z.numpy())\n",
266 |     "# cannot get GPU tensor as numpy array directly\n",
267 |     "try:\n",
268 |     "  y.numpy()\n",
269 |     "except RuntimeError as e:\n",
270 |     "  print(e)"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "Operations between GPU and CPU tensors will fail. Operations require all arguments to be on the same device."
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 33,
283 |    "metadata": {},
284 |    "outputs": [
285 |     {
286 |      "name": "stdout",
287 |      "output_type": "stream",
288 |      "text": [
289 |       "torch.mm received an invalid combination of arguments - got (torch.FloatTensor, torch.cuda.FloatTensor), but expected one of:\n",
290 |       " * (torch.FloatTensor source, torch.FloatTensor mat2)\n",
291 |       "      didn't match because some of the arguments have invalid types: (\u001b[32;1mtorch.FloatTensor\u001b[0m, \u001b[31;1mtorch.cuda.FloatTensor\u001b[0m)\n",
292 |       " * (torch.SparseFloatTensor source, torch.FloatTensor mat2)\n",
293 |       "      didn't match because some of the arguments have invalid types: (\u001b[31;1mtorch.FloatTensor\u001b[0m, \u001b[31;1mtorch.cuda.FloatTensor\u001b[0m)\n",
294 |       "\n"
295 |      ]
296 |     }
297 |    ],
298 |    "source": [
299 |     "x = torch.rand(3,5)  # CPU tensor\n",
300 |     "y = torch.rand(5,4).cuda()  # GPU tensor\n",
301 |     "try:\n",
302 |     "  torch.mm(x,y)  # Operation between CPU and GPU fails\n",
303 |     "except TypeError as e:\n",
304 |     "  print(e)"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "metadata": {},
310 |    "source": [
311 |     "Typical code should include `if` statements or utilize helper functions so it can operate with or without the GPU."
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 34,
317 |    "metadata": {},
318 |    "outputs": [],
319 |    "source": [
320 |     "# Put tensor on CUDA if available\n",
321 |     "x = torch.rand(3,2)\n",
322 |     "if torch.cuda.is_available():\n",
323 |     "  x = x.cuda()\n",
324 |     "\n",
325 |     "# Do some calculations\n",
326 |     "y = x ** 2 \n",
327 |     "\n",
328 |     "# Copy to CPU if on GPU\n",
329 |     "if y.is_cuda:\n",
330 |     "  y = y.cpu()"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "metadata": {},
336 |    "source": [
337 |     "A convenient method is `new`, which creates a new tensor on the same device as another tensor. It should be used for creating tensors whenever possible."
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": 35,
343 |    "metadata": {},
344 |    "outputs": [
345 |     {
346 |      "name": "stdout",
347 |      "output_type": "stream",
348 |      "text": [
349 |       "\n",
350 |       " 1.4406e+06  4.5734e-41\n",
351 |       "[torch.FloatTensor of size 1x2]\n",
352 |       "\n",
353 |       "\n",
354 |       " 0.1280  0.5219\n",
355 |       "[torch.cuda.FloatTensor of size 1x2 (GPU 0)]\n",
356 |       "\n"
357 |      ]
358 |     }
359 |    ],
360 |    "source": [
361 |     "x1 = torch.rand(3,2)\n",
362 |     "x2 = x1.new(1,2)  # create cpu tensor\n",
363 |     "print(x2)\n",
364 |     "x1 = torch.rand(3,2).cuda()\n",
365 |     "x2 = x1.new(1,2)  # create cuda tensor\n",
366 |     "print(x2)"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "markdown",
371 |    "metadata": {},
372 |    "source": [
373 |     "Calculations executed on the GPU can be many times faster than numpy. However, numpy is still optimized for the CPU and many times faster than python `for` loops. Numpy calculations may be faster than GPU calculations for small arrays due to the cost of interfacing with the GPU."
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": 36,
379 |    "metadata": {},
380 |    "outputs": [
381 |     {
382 |      "name": "stdout",
383 |      "output_type": "stream",
384 |      "text": [
385 |       "CPU: 185.08557113818824ms\n",
386 |       "GPU: 52.751455921679735ms\n"
387 |      ]
388 |     }
389 |    ],
390 |    "source": [
391 |     "from timeit import timeit\n",
392 |     "# Create random data\n",
393 |     "x = torch.rand(1000,64)\n",
394 |     "y = torch.rand(64,32)\n",
395 |     "number = 10000  # number of iterations\n",
396 |     "\n",
397 |     "def square():\n",
398 |     "  z=torch.mm(x, y) # dot product (mm=matrix multiplication)\n",
399 |     "\n",
400 |     "# Time CPU\n",
401 |     "print('CPU: {}ms'.format(timeit(square, number=number)*1000))\n",
402 |     "# Time GPU\n",
403 |     "x, y = x.cuda(), y.cuda()\n",
404 |     "print('GPU: {}ms'.format(timeit(square, number=number)*1000))"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "markdown",
409 |    "metadata": {},
410 |    "source": [
411 |     "## Math, Linear Algebra, and Indexing"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "markdown",
416 |    "metadata": {},
417 |    "source": [
418 |     "Pytorch math and linear algebra is similar to numpy. Operators are overridden so you can use standard math operators (`+`,`-`, etc.) and expect a tensor as a result. See pytorch documentation for a complete list of available functions."
419 |    ]
420 |   },
421 |   {
422 |    "cell_type": "code",
423 |    "execution_count": 37,
424 |    "metadata": {},
425 |    "outputs": [
426 |     {
427 |      "name": "stdout",
428 |      "output_type": "stream",
429 |      "text": [
430 |       "10.0\n",
431 |       "85.79102325439453\n",
432 |       "2.0\n"
433 |      ]
434 |     }
435 |    ],
436 |    "source": [
437 |     "x = torch.arange(0,5)\n",
438 |     "print(torch.sum(x))\n",
439 |     "print(torch.sum(torch.exp(x)))\n",
440 |     "print(torch.mean(x))"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "markdown",
445 |    "metadata": {},
446 |    "source": [
447 |     "Pytorch indexing is similar to numpy indexing. See pytorch documentation for details."
448 |    ]
449 |   },
450 |   {
451 |    "cell_type": "code",
452 |    "execution_count": 38,
453 |    "metadata": {},
454 |    "outputs": [
455 |     {
456 |      "name": "stdout",
457 |      "output_type": "stream",
458 |      "text": [
459 |       "\n",
460 |       " 0.7526  0.5557\n",
461 |       " 0.6445  0.7588\n",
462 |       " 0.4765  0.2728\n",
463 |       "[torch.FloatTensor of size 3x2]\n",
464 |       "\n",
465 |       "\n",
466 |       " 0.6445\n",
467 |       " 0.7588\n",
468 |       "[torch.FloatTensor of size 2]\n",
469 |       "\n"
470 |      ]
471 |     }
472 |    ],
473 |    "source": [
474 |     "x = torch.rand(3,2)\n",
475 |     "print(x)\n",
476 |     "print(x[1,:])"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "markdown",
481 |    "metadata": {},
482 |    "source": [
483 |     "## Variables and Differentiation"
484 |    ]
485 |   },
486 |   {
487 |    "cell_type": "markdown",
488 |    "metadata": {},
489 |    "source": [
490 |     "Variables are used similarly to tensors but actually wrap tensors and provide automatic differentiation.\n",
491 |     "\n",
492 |     "- Variables you are differentiating with respect to must have `requires_grad=True`\n",
493 |     "- Call `.backward()` on variables you are differentiating"
494 |    ]
495 |   },
496 |   {
497 |    "cell_type": "code",
498 |    "execution_count": 39,
499 |    "metadata": {},
500 |    "outputs": [
501 |     {
502 |      "name": "stdout",
503 |      "output_type": "stream",
504 |      "text": [
505 |       "Variable containing:\n",
506 |       " 0\n",
507 |       " 1\n",
508 |       " 2\n",
509 |       " 3\n",
510 |       "[torch.FloatTensor of size 4]\n",
511 |       "\n",
512 |       "Variable containing:\n",
513 |       " 14\n",
514 |       "[torch.FloatTensor of size 1]\n",
515 |       "\n",
516 |       "Variable containing:\n",
517 |       " 0\n",
518 |       " 2\n",
519 |       " 4\n",
520 |       " 6\n",
521 |       "[torch.FloatTensor of size 4]\n",
522 |       "\n"
523 |      ]
524 |     }
525 |    ],
526 |    "source": [
527 |     "# Create variable\n",
528 |     "x = Variable(torch.arange(0,4), requires_grad=True)\n",
529 |     "# Calculate y=sum(x**2)\n",
530 |     "y = torch.sum(x**2)\n",
531 |     "# Calculate gradient (dy/dx=2x)\n",
532 |     "y.backward()\n",
533 |     "# Print values\n",
534 |     "print(x)\n",
535 |     "print(y)\n",
536 |     "print(x.grad)"
537 |    ]
538 |   },
539 |   {
540 |    "cell_type": "markdown",
541 |    "metadata": {},
542 |    "source": [
543 |     "Variables and Tensors cannot be mixed. Wrap all tensors to use them in automatic differentiation."
544 |    ]
545 |   },
546 |   {
547 |    "cell_type": "code",
548 |    "execution_count": 40,
549 |    "metadata": {},
550 |    "outputs": [
551 |     {
552 |      "name": "stdout",
553 |      "output_type": "stream",
554 |      "text": [
555 |       "\n",
556 |       " 1.3053  1.4458  0.8122  1.8480\n",
557 |       " 2.2392  2.2687  0.8424  2.3457\n",
558 |       " 1.9050  2.0709  1.0581  2.4536\n",
559 |       "[torch.FloatTensor of size 3x4]\n",
560 |       "\n",
561 |       "Variable containing:\n",
562 |       " 1.3053  1.4458  0.8122  1.8480\n",
563 |       " 2.2392  2.2687  0.8424  2.3457\n",
564 |       " 1.9050  2.0709  1.0581  2.4536\n",
565 |       "[torch.FloatTensor of size 3x4]\n",
566 |       "\n",
567 |       "torch.mm received an invalid combination of arguments - got (torch.FloatTensor, Variable), but expected one of:\n",
568 |       " * (torch.FloatTensor source, torch.FloatTensor mat2)\n",
569 |       "      didn't match because some of the arguments have invalid types: (\u001b[32;1mtorch.FloatTensor\u001b[0m, \u001b[31;1mVariable\u001b[0m)\n",
570 |       " * (torch.SparseFloatTensor source, torch.FloatTensor mat2)\n",
571 |       "      didn't match because some of the arguments have invalid types: (\u001b[31;1mtorch.FloatTensor\u001b[0m, \u001b[31;1mVariable\u001b[0m)\n",
572 |       "\n"
573 |      ]
574 |     }
575 |    ],
576 |    "source": [
577 |     "x=torch.rand(3,5)  # tensor\n",
578 |     "y=torch.rand(5,4)  # tensor\n",
579 |     "xv=Variable(x)  # variable\n",
580 |     "yv=Variable(y)  # variable\n",
581 |     "print(torch.mm(x,y))  # dot between two tensors OK\n",
582 |     "print(torch.mm(xv,yv))  # dot between two variables OK\n",
583 |     "try:\n",
584 |     "  fail=torch.mm(x,yv)  # dot between tensor and variable FAIL\n",
585 |     "except TypeError as e:\n",
586 |     "  print(e)"
587 |    ]
588 |   },
589 |   {
590 |    "cell_type": "markdown",
591 |    "metadata": {},
592 |    "source": [
593 |     "Differentiation accumulates gradients. This is sometimes what you want and sometimes not. **Make sure to zero gradients between batches if performing SGD or you will get strange results!**"
594 |    ]
595 |   },
596 |   {
597 |    "cell_type": "code",
598 |    "execution_count": 41,
599 |    "metadata": {},
600 |    "outputs": [
601 |     {
602 |      "name": "stdout",
603 |      "output_type": "stream",
604 |      "text": [
605 |       "Variable containing:\n",
606 |       " 0\n",
607 |       " 2\n",
608 |       " 4\n",
609 |       " 6\n",
610 |       "[torch.FloatTensor of size 4]\n",
611 |       "\n",
612 |       "Variable containing:\n",
613 |       "  0\n",
614 |       "  4\n",
615 |       "  8\n",
616 |       " 12\n",
617 |       "[torch.FloatTensor of size 4]\n",
618 |       "\n",
619 |       "Variable containing:\n",
620 |       " 0\n",
621 |       " 2\n",
622 |       " 4\n",
623 |       " 6\n",
624 |       "[torch.FloatTensor of size 4]\n",
625 |       "\n"
626 |      ]
627 |     }
628 |    ],
629 |    "source": [
630 |     "# Create a variable\n",
631 |     "x=Variable(torch.arange(0,4), requires_grad=True)\n",
632 |     "# Differentiate\n",
633 |     "torch.sum(x**2).backward()\n",
634 |     "print(x.grad)\n",
635 |     "# Differentiate again (accumulates gradient)\n",
636 |     "torch.sum(x**2).backward()\n",
637 |     "print(x.grad)\n",
638 |     "# Zero gradient before differentiating\n",
639 |     "x.grad.data.zero_()\n",
640 |     "torch.sum(x**2).backward()\n",
641 |     "print(x.grad)"
642 |    ]
643 |   },
644 |   {
645 |    "cell_type": "markdown",
646 |    "metadata": {},
647 |    "source": [
648 |     "## Neural Network Modules"
649 |    ]
650 |   },
651 |   {
652 |    "cell_type": "markdown",
653 |    "metadata": {},
654 |    "source": [
655 |     "Pytorch provides a framework for developing neural network modules that takes care of things like tracking a list of parameters for you.\n",
656 |     "\n",
657 |     "- `nn.Module` objects are reusable components such as dense layers and activation functions\n",
658 |     "- You can write custom modules for any experimental layers\n",
659 |     "- You can combine modules into larger module classes"
660 |    ]
661 |   },
662 |   {
663 |    "cell_type": "code",
664 |    "execution_count": 42,
665 |    "metadata": {},
666 |    "outputs": [],
667 |    "source": [
668 |     "# create a simple sequential network (`nn.Module` object) from layers (other `nn.Module` objects)\n",
669 |     "net = torch.nn.Sequential(\n",
670 |     "    torch.nn.Linear(28*28,256),\n",
671 |     "    torch.nn.Sigmoid(),\n",
672 |     "    torch.nn.Linear(256,10))"
673 |    ]
674 |   },
675 |   {
676 |    "cell_type": "code",
677 |    "execution_count": 43,
678 |    "metadata": {},
679 |    "outputs": [],
680 |    "source": [
681 |     "# create a more customizable network module\n",
682 |     "class MyNetwork(torch.nn.Module):\n",
683 |     "    def __init__(self):\n",
684 |     "        super().__init__()\n",
685 |     "        self.layer1 = torch.nn.Linear(28*28,256)\n",
686 |     "        self.layer2 = torch.nn.Sigmoid()\n",
687 |     "        self.layer3 = torch.nn.Linear(256,10)\n",
688 |     "\n",
689 |     "    def forward(self, input_val):\n",
690 |     "        h = input_val\n",
691 |     "        h = self.layer1(h)\n",
692 |     "        h = self.layer2(h)\n",
693 |     "        h = self.layer3(h)\n",
694 |     "        return h\n",
695 |     "\n",
696 |     "net = MyNetwork()"
697 |    ]
698 |   },
699 |   {
700 |    "cell_type": "markdown",
701 |    "metadata": {},
702 |    "source": [
703 |     "## Saving and Loading"
704 |    ]
705 |   },
706 |   {
707 |    "cell_type": "code",
708 |    "execution_count": 44,
709 |    "metadata": {},
710 |    "outputs": [
711 |     {
712 |      "name": "stdout",
713 |      "output_type": "stream",
714 |      "text": [
715 |       "odict_keys(['0.weight', '0.bias', '2.weight', '2.bias'])\n"
716 |      ]
717 |     }
718 |    ],
719 |    "source": [
720 |     "# get dictionary of keys to weights using `state_dict`\n",
721 |     "net = torch.nn.Sequential(\n",
722 |     "    torch.nn.Linear(28*28,256),\n",
723 |     "    torch.nn.Sigmoid(),\n",
724 |     "    torch.nn.Linear(256,10))\n",
725 |     "print(net.state_dict().keys())"
726 |    ]
727 |   },
728 |   {
729 |    "cell_type": "code",
730 |    "execution_count": 45,
731 |    "metadata": {},
732 |    "outputs": [],
733 |    "source": [
734 |     "# save a dictionary\n",
735 |     "torch.save(net.state_dict(),'test.t7')\n",
736 |     "# load a dictionary\n",
737 |     "net.load_state_dict(torch.load('test.t7'))"
738 |    ]
739 |   },
740 |   {
741 |    "cell_type": "markdown",
742 |    "metadata": {},
743 |    "source": [
744 |     "## Building a Neural Network\n",
745 |     "\n",
746 |     "For a worked example of how to build and train a pytorch network, see `pytorch-example.py`.\n",
747 |     "\n",
748 |     "Good luck!"
749 |    ]
750 |   },
751 |   {
752 |    "cell_type": "code",
753 |    "execution_count": null,
754 |    "metadata": {},
755 |    "outputs": [],
756 |    "source": []
757 |   }
758 |  ],
759 |  "metadata": {
760 |   "kernelspec": {
761 |    "display_name": "Python 3",
762 |    "language": "python",
763 |    "name": "python3"
764 |   },
765 |   "language_info": {
766 |    "codemirror_mode": {
767 |     "name": "ipython",
768 |     "version": 3
769 |    },
770 |    "file_extension": ".py",
771 |    "mimetype": "text/x-python",
772 |    "name": "python",
773 |    "nbconvert_exporter": "python",
774 |    "pygments_lexer": "ipython3",
775 |    "version": "3.5.2"
776 |   }
777 |  },
778 |  "nbformat": 4,
779 |  "nbformat_minor": 2
780 | }
781 | 


--------------------------------------------------------------------------------
/recitation-2/pytorch-example.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.utils.data
 4 | 
 5 | 
 6 | class OnesCounter(torch.nn.Module):
 7 |     def __init__(self, input_size):
 8 |         super().__init__()
 9 |         self.input_size = input_size
10 | 
11 |         count_bitwidth = int(np.ceil(np.log2(input_size + 1)))
12 |         self.to_hidden1 = torch.nn.Linear(input_size, 2 * input_size)
13 |         self.hidden_sigmoid1 = torch.nn.Sigmoid()
14 |         self.to_hidden2 = torch.nn.Linear(2 * input_size, 2 * input_size)
15 |         self.hidden_sigmoid2 = torch.nn.Sigmoid()
16 |         self.to_binary = torch.nn.Linear(2 * input_size, count_bitwidth)
17 | 
18 |     def forward(self, input_val):
19 |         hidden1 = self.hidden_sigmoid1(self.to_hidden1(input_val))
20 |         hidden2 = self.hidden_sigmoid2(self.to_hidden2(hidden1))
21 |         return self.to_binary(hidden2)
22 | 
23 | 
24 | def load_data():
25 |     # We'll just make our data on the spot here, but
26 |     # we usually load real data sets from a file
27 | 
28 |     # Create 10000 random 7-bit inputs
29 |     data = np.random.binomial(1, 0.5, size=(10000, 7))
30 | 
31 |     # Count the number of 1's in each input
32 |     labels = data.sum(axis=1)
33 | 
34 |     # Create the binary encoding of the ground truth labels
35 |     # As a bit of practice using Numpy, we're going to do this
36 |     # without using a Python loop.
37 |     labels_binary = np.unpackbits(labels.astype(np.uint8)).reshape((-1,8))
38 |     labels_binary = labels_binary[:,-3:]
39 | 
40 |     return (data, labels_binary)
41 | 
42 | 
43 | def to_tensor(numpy_array):
44 |     # Numpy array -> Tensor
45 |     return torch.from_numpy(numpy_array).float()
46 | 
47 | 
48 | def to_variable(tensor):
49 |     # Tensor -> Variable (on GPU if possible)
50 |     if torch.cuda.is_available():
51 |         # Tensor -> GPU Tensor
52 |         tensor = tensor.cuda()
53 |     return torch.autograd.Variable(tensor)
54 | 
55 | 
56 | def training_routine(num_epochs, minibatch_size, learn_rate):
57 |     (data, labels_binary) = load_data()
58 | 
59 |     my_net = OnesCounter(7)  # Create the network,
60 |     loss_fn = torch.nn.BCEWithLogitsLoss()  # and choose the loss function / optimizer
61 |     optim = torch.optim.SGD(my_net.parameters(), lr=learn_rate)
62 | 
63 |     if torch.cuda.is_available():
64 |         # Move the network and the optimizer to the GPU
65 |         my_net = my_net.cuda()
66 |         loss_fn = loss_fn.cuda()
67 | 
68 |     dataset = torch.utils.data.TensorDataset(
69 |         to_tensor(data), to_tensor(labels_binary))
70 |     data_loader = torch.utils.data.DataLoader(
71 |         dataset, batch_size=minibatch_size, shuffle=True)
72 | 
73 |     for epoch in range(num_epochs):
74 |         losses = []
75 |         for (input_val, label) in data_loader:
76 |             optim.zero_grad()  # Reset the gradients
77 | 
78 |             prediction = my_net(to_variable(input_val))  # Feed forward
79 |             loss = loss_fn(prediction, to_variable(label))  # Compute losses
80 |             loss.backward()  # Backpropagate the gradients
81 |             losses.append(loss.data.cpu().numpy())
82 |             optim.step()  # Update the network
83 |         print("Epoch {} Loss: {:.4f}".format(epoch, np.asscalar(np.mean(losses))))
84 |     return my_net
85 | 
86 | if __name__ == '__main__':
87 |     net = training_routine(100, 50, 2)
88 |     x = to_variable(to_tensor(np.array([[1,0,1,1,0,1,0], [0,1,0,0,0,0,0], [1,1,1,0,0,0,0]])))
89 |     y = net(x)
90 |     print('X: {}'.format(x.data.cpu().numpy()))
91 |     print('Y (logits): {}'.format(y.data.cpu().numpy()))
92 |     print('Y (argmax): {}'.format(y.data.cpu().numpy() > 0))
93 | 


--------------------------------------------------------------------------------
/recitation-4/image_folder/face1/image1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-4/image_folder/face1/image1.jpg


--------------------------------------------------------------------------------
/recitation-4/image_folder/face2/image2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-4/image_folder/face2/image2.jpg


--------------------------------------------------------------------------------
/recitation-4/pytorch-mnist-cnn-example.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | from inferno.extensions.layers.reshape import Flatten
  7 | from inferno.trainers.basic import Trainer
  8 | from inferno.trainers.callbacks.logging.tensorboard import TensorboardLogger
  9 | from torchvision import datasets, transforms
 10 | 
 11 | 
 12 | class MNISTCNNModel(nn.Module):
 13 |     def __init__(self):
 14 |         super(MNISTCNNModel, self).__init__()
 15 |         self.layers = nn.ModuleList([
 16 |             nn.Conv2d(in_channels=1, out_channels=32, padding=1, kernel_size=3),  # 28*28
 17 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),  # 14*14
 18 |             nn.Conv2d(in_channels=32, out_channels=64, padding=1, kernel_size=3),  # 14*14
 19 |             nn.MaxPool2d(kernel_size=2, stride=2),  # 7*7
 20 |             Flatten(),
 21 |             nn.Linear(in_features=64 * 7 * 7, out_features=256),
 22 |             nn.ReLU(),
 23 |             nn.Linear(in_features=256, out_features=10)
 24 |         ])
 25 |         self.firstrun = True
 26 | 
 27 |     def forward(self, input):
 28 |         h = input
 29 |         if self.firstrun:
 30 |             print("****************************************")
 31 |             print("input: {}".format(h.size()))
 32 |         for layer in self.layers:
 33 |             h = layer(h)
 34 |             if self.firstrun:
 35 |                 print("{}: {}".format(layer, h.size()))
 36 |         if self.firstrun:
 37 |             print("****************************************")
 38 |         self.firstrun = False
 39 |         return h
 40 | 
 41 | 
 42 | def mnist_data_loaders(args):
 43 |     kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
 44 |     train_loader = torch.utils.data.DataLoader(
 45 |         datasets.MNIST('./data', train=True, download=True,
 46 |                        transform=transforms.Compose([
 47 |                            transforms.ToTensor(),
 48 |                            transforms.Normalize((0.1307,), (0.3081,))
 49 |                        ])),
 50 |         batch_size=args.batch_size, shuffle=True, **kwargs)
 51 |     test_loader = torch.utils.data.DataLoader(
 52 |         datasets.MNIST('./data', train=False, transform=transforms.Compose([
 53 |             transforms.ToTensor(),
 54 |             transforms.Normalize((0.1307,), (0.3081,))
 55 |         ])),
 56 |         batch_size=args.test_batch_size, shuffle=True, **kwargs)
 57 |     return train_loader, test_loader
 58 | 
 59 | 
 60 | def train_model(args):
 61 |     model = MNISTCNNModel()
 62 |     train_loader, validate_loader = mnist_data_loaders(args)
 63 | 
 64 |     # Build trainer
 65 |     trainer = Trainer(model) \
 66 |         .build_criterion('CrossEntropyLoss') \
 67 |         .build_metric('CategoricalError') \
 68 |         .build_optimizer('Adam') \
 69 |         .validate_every((2, 'epochs')) \
 70 |         .save_every((5, 'epochs')) \
 71 |         .save_to_directory(args.save_directory) \
 72 |         .set_max_num_epochs(args.epochs) \
 73 |         .build_logger(TensorboardLogger(log_scalars_every=(1, 'iteration'),
 74 |                                         log_images_every='never'),
 75 |                       log_directory=args.save_directory)
 76 | 
 77 |     # Bind loaders
 78 |     trainer \
 79 |         .bind_loader('train', train_loader) \
 80 |         .bind_loader('validate', validate_loader)
 81 | 
 82 |     if args.cuda:
 83 |         trainer.cuda()
 84 | 
 85 |     # Go!
 86 |     trainer.fit()
 87 | 
 88 | 
 89 | def main(argv):
 90 |     # Training settings
 91 |     parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 92 |     parser.add_argument('--batch-size', type=int, default=64, metavar='N',
 93 |                         help='input batch size for training (default: 64)')
 94 |     parser.add_argument('--save-directory', type=str, default='output/inferno',
 95 |                         help='output directory')
 96 |     parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
 97 |                         help='input batch size for testing (default: 1000)')
 98 |     parser.add_argument('--epochs', type=int, default=20, metavar='N',
 99 |                         help='number of epochs to train (default: 20)')
100 |     parser.add_argument('--no-cuda', action='store_true', default=False,
101 |                         help='disables CUDA training')
102 |     args = parser.parse_args()
103 |     args.cuda = not args.no_cuda and torch.cuda.is_available()
104 |     train_model(args)
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     main(sys.argv[1:])
109 | 


--------------------------------------------------------------------------------
/recitation-5/hamlet.txt:
--------------------------------------------------------------------------------
1 | to be, or not to be: that is the question: whether 'tis nobler in the mind to suffer the slings and arrows of outrageous fortune, or to take arms against a sea of troubles, and by opposing end them? to die: to sleep; no more; and, by a sleep to say we end the heart-ache and the thousand natural shocks that flesh is heir to, 'tis a consummation devoutly to be wish'd. to die, to sleep; to sleep: perchance to dream: ay, there's the rub; for in that sleep of death what dreams may come when we have shuffled off this mortal coil, must give us pause. there's the respect that makes calamity of so long a life; for who would bear the whips and scorns of time, the oppressor's wrong, the proud man's contumely, the pangs of dispriz'd love, the law's delay, the insolence of office, and the spurns that patient merit of the unworthy takes, when he himself might his quietus make with a bare bodkin? Who would fardels bear, to grunt and sweat under a weary life, but that the dread of something after death, the undiscover'd country from whose bourn no traveller returns, puzzles the will, and makes us rather bear those ills we have, than fly to others that we know not of? thus consience doth make cowards of us all; and thus the native hue of resolution is sicklied o'er with the pale cast of thought, and enterprises of great pith and moment with this regard their currents turn awry, and lose the name of action.


--------------------------------------------------------------------------------
/recitation-5/recitation5.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Recurrent Neural Networks Tutorial\n",
  8 |     "\n",
  9 |     "In HW1 you've worked on building simple MLP models that you used to classify single inputs of MNIST images into ten classes, and in HW2 you will work with CNNs to recognize objects in images. However, real world data are rarely standalone fixed-size vectors that do not depend on context - speech segments, for instance, have arbitrary lengths, and evidently depend on context. In this tutorial, we will take a small step in this direction by building a basic RNN for generating text by learning from some sample text files that we provide."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "# You must random walk before you rnn\n",
 17 |     "\n",
 18 |     "Let's begin by loading a choice selection of tweets by the leader of the free world, courtesy a dataset available on Kaggle (https://www.kaggle.com/kingburrito666/better-donald-trump-tweets), formatted for this recitation*.\n",
 19 |     "\n",
 20 |     "\\**11785 staff are not responsible for content, accuracy, or sanity of this dataset*"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 1,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import numpy as np\n",
 30 |     "from collections import Counter\n",
 31 |     "\n",
 32 |     "history_length = 3\n",
 33 |     "\n",
 34 |     "def process_input(filename):\n",
 35 |     "    rawtext = open(filename).read()\n",
 36 |     "    sequences = [rawtext[i:i+history_length] for i in range(int(len(rawtext)/history_length))]\n",
 37 |     "    stats = Counter(sequences)\n",
 38 |     "    tokens = []\n",
 39 |     "    counts = []\n",
 40 |     "    for i in stats.most_common():\n",
 41 |     "        tokens.append(i[0])\n",
 42 |     "        counts.append(i[1])\n",
 43 |     "    return stats\n",
 44 |     "    \n",
 45 |     "def next_char(cur,stats):\n",
 46 |     "    seed = cur[1:]\n",
 47 |     "    candidates = []\n",
 48 |     "    candidatec = []\n",
 49 |     "    for k in stats.keys():\n",
 50 |     "        if seed==k[:-1]:\n",
 51 |     "            candidates.append(k)\n",
 52 |     "            candidatec.append(float(stats[k]))\n",
 53 |     "    candidatep = [x/sum(candidatec) for x in candidatec]\n",
 54 |     "    return candidates[np.random.choice(len(candidatec),p=candidatep)]\n",
 55 |     "\n",
 56 |     "def sample(length,running_state, stats):\n",
 57 |     "    output = ''\n",
 58 |     "    for i in range(length):\n",
 59 |     "        output+=running_state[0]\n",
 60 |     "        running_state = next_char(running_state,stats)\n",
 61 |     "    return output"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 2,
 67 |    "metadata": {},
 68 |    "outputs": [
 69 |     {
 70 |      "data": {
 71 |       "text/plain": [
 72 |        "'oban an t co bad firs  the  speco wor busins han h'"
 73 |       ]
 74 |      },
 75 |      "execution_count": 2,
 76 |      "metadata": {},
 77 |      "output_type": "execute_result"
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "tweet_stats = process_input('tweets.txt')\n",
 82 |     "sample(50,'oba',tweet_stats)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "To convince you that this is a reasonable approach, let's take a more reasonable example of the English language:"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 3,
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "data": {
 99 |       "text/plain": [
100 |        "\"the min to mings fore's to sand be: the them? thei\""
101 |       ]
102 |      },
103 |      "execution_count": 3,
104 |      "metadata": {},
105 |      "output_type": "execute_result"
106 |     }
107 |    ],
108 |    "source": [
109 |     "hamlet_stats = process_input('hamlet.txt')\n",
110 |     "sample(50, 'the',hamlet_stats)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "And if we further increase the running history that this extremely simple model keeps, we see better and better examples:"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 5,
123 |    "metadata": {},
124 |    "outputs": [
125 |     {
126 |      "name": "stdout",
127 |      "output_type": "stream",
128 |      "text": [
129 |       "and  vegallowers   new https     your see   t come\n",
130 |       "the that fles, and by a sea of outrageousand them?\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "history_length = 4\n",
136 |     "tweet_stats = process_input('tweets.txt')\n",
137 |     "print(sample(50,'and ',tweet_stats))\n",
138 |     "hamlet_stats = process_input('hamlet.txt')\n",
139 |     "print(sample(50,'the ',hamlet_stats))"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 6,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "obamas belief effortunited past the field  carolin\n",
152 |       "to be, or not to say we end the question: whether \n"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "history_length = 5\n",
158 |     "tweet_stats = process_input('tweets.txt')\n",
159 |     "print(sample(50,'obama',tweet_stats))\n",
160 |     "hamlet_stats = process_input('hamlet.txt')\n",
161 |     "print(sample(50,'to be',hamlet_stats))"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "However, there are some inherent limitations to this approach. Character-level n-grams do get better with increasing n, but we run into all sorts of limitations in terms of memory (possible combinations are exponential in the size of alphabet) and sparsity (it is HIGHLY unlikely that you will observe all possible n-grams in the data). There exists a lot of literature over the past few decades dealing with engineering NLP techniques to get around this (cf. [1]), but even then we are limited by the fact that information from (n+1) steps ago gets washed out.\n",
169 |     "\n",
170 |     "\n",
171 |     "[1] Chen and Goodman. An Empirical Study of Smoothing Techniques for Language Modeling. 1998."
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "# Recurrent Neural Networks\n",
179 |     "\n",
180 |     "So far, we've been working with networks where the input size was fixed. Today let's take the example of English language. A typical method of vectorizing words is to convert them into a one-hot representation over the vocabulary.\n",
181 |     "\n",
182 |     "Let's take one such sample vector representation x which belongs to the set of all such vectors X (the vocabulary).\n",
183 |     "\n",
184 |     "In the previous section, we implemented a Markov chain that generates sentences given a seed and statistics over the training data. From a high-level standpoint, we were looking at the following method:"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 7,
190 |    "metadata": {},
191 |    "outputs": [
192 |     {
193 |      "data": {
194 |       "text/plain": [
195 |        "'\\ndef markov_generator(x):\\n    current_state = current_state\\n    for i in range(some target length):\\n        output += generate_single_step_output(current_state)\\n        new_state = random_process(current_state)\\n        current_state = new_state\\n    return output\\n'"
196 |       ]
197 |      },
198 |      "execution_count": 7,
199 |      "metadata": {},
200 |      "output_type": "execute_result"
201 |     }
202 |    ],
203 |    "source": [
204 |     "# This code is meant as an overview, not meant to compile\n",
205 |     "'''\n",
206 |     "def markov_generator(x):\n",
207 |     "    current_state = current_state\n",
208 |     "    for i in range(some target length):\n",
209 |     "        output += generate_single_step_output(current_state)\n",
210 |     "        new_state = random_process(current_state)\n",
211 |     "        current_state = new_state\n",
212 |     "    return output\n",
213 |     "'''"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "Here, a string of length n corresponds to the current state. Evidently, forgetting your previous state at every step is not going to be able to capture long term dependencies. We have to stop being Markovian!\n",
221 |     "\n",
222 |     "Instead, what we need is a method of *storing* what we have observed so far. Consider the following method:"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": 8,
228 |    "metadata": {},
229 |    "outputs": [
230 |     {
231 |      "data": {
232 |       "text/plain": [
233 |        "'\\nclass generator_with_memory:\\n    def __init__():\\n        self.memory_state = self.init_memory()\\n    def step(x):\\n        self.memory_state = smart_process(x,self.memory_state)\\n        y = generate_output(self.memory_state)\\n        return y\\n'"
234 |       ]
235 |      },
236 |      "execution_count": 8,
237 |      "metadata": {},
238 |      "output_type": "execute_result"
239 |     }
240 |    ],
241 |    "source": [
242 |     "# This code is meant as an overview, not meant to compile\n",
243 |     "'''\n",
244 |     "class generator_with_memory:\n",
245 |     "    def __init__():\n",
246 |     "        self.memory_state = self.init_memory()\n",
247 |     "    def step(x):\n",
248 |     "        self.memory_state = smart_process(x,self.memory_state)\n",
249 |     "        y = generate_output(self.memory_state)\n",
250 |     "        return y\n",
251 |     "'''"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "markdown",
256 |    "metadata": {},
257 |    "source": [
258 |     "By defining an object with an associated memory_state and a smart choice of smart_process() and generate_output(), we might be able to capture long term dependencies by *storing* them in our memory_state.\n",
259 |     "\n",
260 |     "How can we create such functions? One choice could be to have a linear transform between all associated vectors. Let's look at our vectors so far. Let's call memory_state as h for convenience.\n",
261 |     "\n",
262 |     "x: Input vector\n",
263 |     "\n",
264 |     "y: Output vector\n",
265 |     "\n",
266 |     "h: \"Hidden\" memory_state vector"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 9,
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "data": {
276 |       "text/plain": [
277 |        "'\\ndef step(x):\\n    self.h = np.tanh(np.dot(self.W_hh, self.h) + np.dot(self.W_xh, x))\\n    y = np.dot(self.W_hy, self.h)\\n    return y\\n'"
278 |       ]
279 |      },
280 |      "execution_count": 9,
281 |      "metadata": {},
282 |      "output_type": "execute_result"
283 |     }
284 |    ],
285 |    "source": [
286 |     "# This code is meant as an overview, not meant to compile\n",
287 |     "'''\n",
288 |     "def step(x):\n",
289 |     "    self.h = np.tanh(np.dot(self.W_hh, self.h) + np.dot(self.W_xh, x))\n",
290 |     "    y = np.dot(self.W_hy, self.h)\n",
291 |     "    return y\n",
292 |     "'''"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "markdown",
297 |    "metadata": {},
298 |    "source": [
299 |     "The update step incorporates both the current input x and the current memory state h. This can replace smart_process above and incorporate memory into our model! We can initialize the model parameters W_hh, W_xh, W_hy randomly and learn them from our data.\n",
300 |     "\n",
301 |     "It is also important to note that this method is able to work with arbitrary length sequences. We can just step through the entire sequence and rely on h to keep track of long range dependencies. Thus we have moved away from the fixed input size paradigm as well! You can visualize the process of stepping through the sequence from the following image (credits: Wikipedia)\n",
302 |     "\n",
303 |     "<img src=\"rnn.svg\" width=700>\n",
304 |     "\n",
305 |     "You can come up with more imaginative ways to step through the sequence. For instance, why should we restrict ourselves to a flat sequence? A step combines the current input x_t with the current hidden state h_t. One can recursively apply this combination step, such as the authors of [2] who used this idea for sentiment analysis. \n",
306 |     "\n",
307 |     "\n",
308 |     "<img src=\"recursive1.png\" width=400>\n",
309 |     "\n",
310 |     "One can also stack things - instead of having just one recurrent layer you can use\n",
311 |     "\n",
312 |     "y1 = rnn1.step(x)\n",
313 |     "\n",
314 |     "y2 = rnn2.step(y1)\n",
315 |     "\n",
316 |     "and so on.\n",
317 |     "\n",
318 |     "[2] Socher, Manning, Ng. Recursive deep models for semantic compositionality over a sentiment treebank. EMNLP 2013.\n",
319 |     "\n",
320 |     "\n",
321 |     "# RNNs using PyTorch\n",
322 |     "\n",
323 |     "Let's work with an example task and implement an RNN using PyTorch. Specifically, let's look at a simple time series prediction problem. For the purpose of this recitation we will work on a very simple time series prediction problem adapted from PyTorch examples [3]. We'll generate a random set of sinusoidal waves and use LSTMCell units to learn these waves and predict the next few steps.\n",
324 |     "\n",
325 |     "\n",
326 |     "[3] https://github.com/pytorch/examples/tree/master/time_sequence_prediction"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": 10,
332 |    "metadata": {},
333 |    "outputs": [],
334 |    "source": [
335 |     "import torch\n",
336 |     "import torch.nn as nn\n",
337 |     "from torch.autograd import Variable\n",
338 |     "import torch.optim as optim\n",
339 |     "import matplotlib\n",
340 |     "matplotlib.use('Agg')\n",
341 |     "import matplotlib.pyplot as plt\n",
342 |     "\n",
343 |     "np.random.seed(0)\n",
344 |     "torch.manual_seed(0)\n",
345 |     "\n",
346 |     "T = 80    # range from which ints are sampled\n",
347 |     "L = 1000  # Length of generated sequence\n",
348 |     "N = 100   # number of examples\n",
349 |     "future = 1000 # length of sequence to predict\n",
350 |     "# generating a sinusoidal time series\n",
351 |     "x = np.empty((N, L), 'int64')\n",
352 |     "x[:] = np.array(range(L)) + np.random.randint(- 4 * T, 4 * T, N).reshape(N, 1)\n",
353 |     "data = np.sin(x / 1.0 / T).astype('float64')\n",
354 |     "\n",
355 |     "\n",
356 |     "class Sequence(nn.Module):\n",
357 |     "    def __init__(self):\n",
358 |     "        super(Sequence, self).__init__()\n",
359 |     "        self.lstm1 = nn.LSTMCell(1, 32)\n",
360 |     "        self.lstm2 = nn.LSTMCell(32, 32)\n",
361 |     "        self.linear = nn.Linear(32, 1)\n",
362 |     "    def forward(self, input, future = 0):\n",
363 |     "        outputs = []\n",
364 |     "        h_t = Variable(torch.zeros(input.size(0), 32).double(), requires_grad=False)\n",
365 |     "        c_t = Variable(torch.zeros(input.size(0), 32).double(), requires_grad=False)\n",
366 |     "        h_t2 = Variable(torch.zeros(input.size(0), 32).double(), requires_grad=False)\n",
367 |     "        c_t2 = Variable(torch.zeros(input.size(0), 32).double(), requires_grad=False)\n",
368 |     "        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):\n",
369 |     "            h_t, c_t = self.lstm1(input_t, (h_t, c_t))\n",
370 |     "            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))\n",
371 |     "            output = self.linear(h_t2)\n",
372 |     "            outputs += [output]\n",
373 |     "        for i in range(future):# predicting future\n",
374 |     "            h_t, c_t = self.lstm1(output, (h_t, c_t))\n",
375 |     "            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))\n",
376 |     "            output = self.linear(h_t2)\n",
377 |     "            outputs += [output]\n",
378 |     "        outputs = torch.stack(outputs, 1).squeeze(2)\n",
379 |     "        return outputs\n",
380 |     "    \n",
381 |     "def save_plot_wave(y_gen):\n",
382 |     "    plt.figure(figsize=(30,10))\n",
383 |     "    plt.title('Predict future values for time sequence', fontsize=30)\n",
384 |     "    plt.xlabel('x', fontsize=20)\n",
385 |     "    plt.ylabel('y', fontsize=20)\n",
386 |     "    plt.xticks(fontsize=20)\n",
387 |     "    plt.yticks(fontsize=20)\n",
388 |     "    plt.plot(np.arange(input.size(1)), y_gen[0][:input.size(1)], 'b', linewidth = 2.0)\n",
389 |     "    plt.plot(np.arange(input.size(1), input.size(1) + future), y_gen[0][input.size(1):], 'b' + ':', linewidth = 2.0)\n",
390 |     "    plt.savefig('predict%d.pdf'%i)\n",
391 |     "    plt.close()"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "markdown",
396 |    "metadata": {},
397 |    "source": [
398 |     "Notice the staggered nature of input and target data below:"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": 11,
404 |    "metadata": {},
405 |    "outputs": [],
406 |    "source": [
407 |     "input = Variable(torch.from_numpy(data[1:, :-1]), requires_grad=False)\n",
408 |     "target = Variable(torch.from_numpy(data[1:, 1:]), requires_grad=False)\n",
409 |     "test_input = Variable(torch.from_numpy(data[:1, :-1]), requires_grad=False)\n",
410 |     "test_target = Variable(torch.from_numpy(data[:1, 1:]), requires_grad=False)"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "markdown",
415 |    "metadata": {},
416 |    "source": [
417 |     "Here we will use the LBFGS optimizer. It is used for parameter estimation by minimizing a smooth f(x) over unconstrained real-valued vector x. It needs to reevaluate the function multiple times, so you have to pass in a closure that allows recomputation. The closure should clear the gradients, compute the loss, and return it [4]. LBFGS also does not support per-parameter options.\n",
418 |     "\n",
419 |     "\n",
420 |     "[4] http://pytorch.org/docs/master/optim.html#optimizer-step-closure"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": null,
426 |    "metadata": {},
427 |    "outputs": [
428 |     {
429 |      "name": "stdout",
430 |      "output_type": "stream",
431 |      "text": [
432 |       "Step:  0\n",
433 |       "Loss: 0.534735817744\n",
434 |       "Loss: 0.51595643509\n",
435 |       "Loss: 0.487655250214\n",
436 |       "Loss: 0.476866620552\n",
437 |       "Loss: 0.443208983365\n",
438 |       "Loss: 0.354411918037\n",
439 |       "Loss: 0.230974896977\n",
440 |       "Loss: 6.21625536659\n",
441 |       "Loss: 0.107385785838\n",
442 |       "Loss: 0.0244810777848\n",
443 |       "Loss: 0.00949078034371\n",
444 |       "Loss: 0.00899265775527\n",
445 |       "Loss: 0.00716001025314\n",
446 |       "Loss: 0.00574255749538\n",
447 |       "Loss: 0.00422940005235\n",
448 |       "Loss: 0.00372545641453\n",
449 |       "Loss: 0.00351897569446\n",
450 |       "Loss: 0.00316297494398\n",
451 |       "Loss: 0.00197494376456\n",
452 |       "Loss: 0.0012997484383\n",
453 |       "Test loss: 0.00116922468676\n",
454 |       "Step:  1\n",
455 |       "Loss: 0.00145462772409\n",
456 |       "Loss: 0.00118182828882\n",
457 |       "Loss: 0.00114993069323\n",
458 |       "Loss: 0.00111182542918\n",
459 |       "Loss: 0.00104719865128\n",
460 |       "Loss: 0.000952630861115\n",
461 |       "Loss: 0.000879430665862\n",
462 |       "Loss: 0.000858234694016\n",
463 |       "Loss: 0.000823140827356\n",
464 |       "Loss: 0.000815607769945\n",
465 |       "Loss: 0.000785582041389\n",
466 |       "Loss: 0.000768247239385\n",
467 |       "Loss: 0.000739432594294\n",
468 |       "Loss: 0.000691610451779\n",
469 |       "Loss: 0.000639641483622\n",
470 |       "Loss: 0.00056794222483\n",
471 |       "Loss: 0.000582874034494\n",
472 |       "Loss: 0.000521702656905\n",
473 |       "Loss: 0.000527696125628\n",
474 |       "Loss: 0.000490990391194\n",
475 |       "Test loss: 0.000364178401968\n",
476 |       "Step:  2\n",
477 |       "Loss: 0.000475000223186\n",
478 |       "Loss: 0.000443743208666\n",
479 |       "Loss: 0.000405212550299\n",
480 |       "Loss: 0.000363450478623\n",
481 |       "Loss: 0.000349381608751\n",
482 |       "Loss: 0.000329962760153\n",
483 |       "Loss: 0.00032021876075\n",
484 |       "Loss: 0.00031586417019\n",
485 |       "Loss: 0.000313784756038\n",
486 |       "Loss: 0.00031245535365\n",
487 |       "Loss: 0.000311075650661\n",
488 |       "Loss: 0.000307976767579\n",
489 |       "Loss: 0.000301441406567\n",
490 |       "Loss: 0.000287378695643\n",
491 |       "Loss: 0.000260281280745\n",
492 |       "Loss: 0.000258994054642\n",
493 |       "Loss: 0.000232215869055\n",
494 |       "Loss: 0.000223077141369\n",
495 |       "Loss: 0.000210962214309\n",
496 |       "Loss: 0.000203371911198\n",
497 |       "Test loss: 0.000125290996629\n",
498 |       "Step:  3\n",
499 |       "Loss: 0.000198622762651\n",
500 |       "Loss: 0.000193427880996\n",
501 |       "Loss: 0.000189139375684\n",
502 |       "Loss: 0.000184419686344\n",
503 |       "Loss: 0.000179143510437\n",
504 |       "Loss: 0.000170780254305\n",
505 |       "Loss: 0.000179680505847\n",
506 |       "Loss: 0.000164723393531\n",
507 |       "Loss: 0.00016305363411\n",
508 |       "Loss: 0.000160433839687\n",
509 |       "Loss: 0.000158777652822\n",
510 |       "Loss: 0.000157648103219\n",
511 |       "Loss: 0.000156508232341\n",
512 |       "Loss: 0.000155578153118\n",
513 |       "Loss: 0.000150368320592\n",
514 |       "Loss: 0.000141645111445\n",
515 |       "Loss: 0.000129853579478\n",
516 |       "Loss: 0.000146714386213\n",
517 |       "Loss: 0.000117126864642\n",
518 |       "Loss: 0.000114706457878\n",
519 |       "Test loss: 7.45965447817e-05\n",
520 |       "Step:  4\n",
521 |       "Loss: 0.000111926204751\n",
522 |       "Loss: 0.000108899712335\n",
523 |       "Loss: 0.000107294608565\n",
524 |       "Loss: 0.000106671826036\n",
525 |       "Loss: 0.000106522464644\n",
526 |       "Loss: 0.000106427519517\n",
527 |       "Loss: 0.000106135408676\n",
528 |       "Loss: 0.000105073525117\n",
529 |       "Loss: 0.000104012128891\n",
530 |       "Loss: 0.000103348722002\n",
531 |       "Loss: 0.000102839505304\n",
532 |       "Loss: 0.000102803400478\n",
533 |       "Loss: 0.000102594312476\n",
534 |       "Loss: 0.000102549012321\n",
535 |       "Loss: 0.000102485443702\n",
536 |       "Loss: 0.000102429646339\n",
537 |       "Loss: 0.000102082526553\n",
538 |       "Loss: 0.00010038107925\n",
539 |       "Loss: 9.70681808915e-05\n",
540 |       "Loss: 9.29469168545e-05\n",
541 |       "Test loss: 3.742124371e-05\n",
542 |       "Step:  5\n",
543 |       "Loss: 8.55601804036e-05\n",
544 |       "Loss: 7.54651407203e-05\n",
545 |       "Loss: 7.23616353648e-05\n",
546 |       "Loss: 7.19238419828e-05\n",
547 |       "Loss: 6.96617404737e-05\n",
548 |       "Loss: 6.84061907269e-05\n",
549 |       "Loss: 6.55017273389e-05\n",
550 |       "Loss: 6.21217438185e-05\n",
551 |       "Loss: 6.0200384921e-05\n",
552 |       "Loss: 5.94776952751e-05\n",
553 |       "Loss: 5.86855054211e-05\n",
554 |       "Loss: 5.81859965346e-05\n",
555 |       "Loss: 5.76433216865e-05\n",
556 |       "Loss: 5.72618077182e-05\n",
557 |       "Loss: 5.6936382239e-05\n",
558 |       "Loss: 5.66102452019e-05\n",
559 |       "Loss: 5.63132603047e-05\n",
560 |       "Loss: 5.55504245353e-05\n",
561 |       "Loss: 5.41494236844e-05\n",
562 |       "Loss: 5.13797730005e-05\n",
563 |       "Test loss: 2.21711801414e-05\n",
564 |       "Step:  6\n",
565 |       "Loss: 5.01737084731e-05\n",
566 |       "Loss: 4.96079793364e-05\n",
567 |       "Loss: 4.69376299575e-05\n",
568 |       "Loss: 4.56767975966e-05\n",
569 |       "Loss: 4.28155281916e-05\n",
570 |       "Loss: 4.14869889281e-05\n",
571 |       "Loss: 4.07336433813e-05\n",
572 |       "Loss: 3.94422540212e-05\n",
573 |       "Loss: 3.88026449018e-05\n",
574 |       "Loss: 3.7584348081e-05\n",
575 |       "Loss: 3.77610014954e-05\n",
576 |       "Loss: 3.66763214962e-05\n",
577 |       "Loss: 3.64133434337e-05\n",
578 |       "Loss: 3.59646375262e-05\n",
579 |       "Loss: 3.58120481536e-05\n",
580 |       "Loss: 3.56361182482e-05\n",
581 |       "Loss: 3.53170405431e-05\n",
582 |       "Loss: 3.49339371354e-05\n",
583 |       "Loss: 3.45714361391e-05\n"
584 |      ]
585 |     }
586 |    ],
587 |    "source": [
588 |     "# build the model\n",
589 |     "seq = Sequence()\n",
590 |     "seq.double()\n",
591 |     "criterion = nn.MSELoss()\n",
592 |     "optimizer = optim.LBFGS(seq.parameters(), lr=0.8)\n",
593 |     "#begin to train\n",
594 |     "for i in range(11):\n",
595 |     "    print('Step: ', i)\n",
596 |     "    def closure():\n",
597 |     "        optimizer.zero_grad()\n",
598 |     "        out = seq(input)\n",
599 |     "        loss = criterion(out, target)\n",
600 |     "        print('Loss:', loss.data.numpy()[0])\n",
601 |     "        loss.backward()\n",
602 |     "        return loss\n",
603 |     "    optimizer.step(closure)\n",
604 |     "    # begin to predict\n",
605 |     "    pred = seq(test_input, future = future)\n",
606 |     "    loss = criterion(pred[:, :-future], test_target)\n",
607 |     "    print('Test loss:', loss.data.numpy()[0])\n",
608 |     "    y = pred.data.numpy()\n",
609 |     "    save_plot_wave(y)"
610 |    ]
611 |   },
612 |   {
613 |    "cell_type": "code",
614 |    "execution_count": null,
615 |    "metadata": {
616 |     "collapsed": true
617 |    },
618 |    "outputs": [],
619 |    "source": []
620 |   }
621 |  ],
622 |  "metadata": {
623 |   "kernelspec": {
624 |    "display_name": "Python 3",
625 |    "language": "python",
626 |    "name": "python3"
627 |   },
628 |   "language_info": {
629 |    "codemirror_mode": {
630 |     "name": "ipython",
631 |     "version": 3
632 |    },
633 |    "file_extension": ".py",
634 |    "mimetype": "text/x-python",
635 |    "name": "python",
636 |    "nbconvert_exporter": "python",
637 |    "pygments_lexer": "ipython3",
638 |    "version": "3.6.3"
639 |   }
640 |  },
641 |  "nbformat": 4,
642 |  "nbformat_minor": 2
643 | }
644 | 


--------------------------------------------------------------------------------
/recitation-5/recursive1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-5/recursive1.png


--------------------------------------------------------------------------------
/recitation-6/output/shakespeare/checkpoint.pytorch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-6/output/shakespeare/checkpoint.pytorch


--------------------------------------------------------------------------------
/recitation-6/shakespeare.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from inferno.extensions.metrics.categorical import CategoricalError
  8 | from inferno.trainers.basic import Trainer
  9 | from inferno.trainers.callbacks.base import Callback
 10 | from torch import nn
 11 | from torch.autograd import Variable
 12 | from torch.utils.data.dataloader import DataLoader
 13 | from torch.utils.data.dataset import TensorDataset
 14 | 
 15 | 
 16 | def read_corpus():
 17 |     filename = 't8.shakespeare.txt'
 18 |     lines = []
 19 |     with open(filename, 'r') as f:
 20 |         for pos, line in enumerate(f):
 21 |             if 243 < pos < 124440:
 22 |                 if len(line.strip()) > 0:
 23 |                     lines.append(line)
 24 |     corpus = " ".join(lines)
 25 |     return corpus
 26 | 
 27 | 
 28 | def get_charmap(corpus):
 29 |     chars = list(set(corpus))
 30 |     chars.sort()
 31 |     charmap = {c: i for i, c in enumerate(chars)}
 32 |     return chars, charmap
 33 | 
 34 | 
 35 | def map_corpus(corpus, charmap):
 36 |     return np.array([charmap[c] for c in corpus], dtype=np.int64)
 37 | 
 38 | 
 39 | def batchify(array, args):
 40 |     batch_len = args.batch_len
 41 |     batches = array.shape[0] // batch_len
 42 |     array = array[:batches * batch_len]
 43 |     return array.reshape((batches, batch_len))
 44 | 
 45 | 
 46 | def make_inputs(targets):
 47 |     # batches: (n, batch_len)
 48 |     return np.pad(targets[:, :-1] + 1, [(0, 0), (1, 0)], mode='constant')
 49 | 
 50 | 
 51 | def sample_gumbel(shape, eps=1e-10, out=None):
 52 |     """
 53 |     Sample from Gumbel(0, 1)
 54 |     based on
 55 |     https://github.com/ericjang/gumbel-softmax/blob/3c8584924603869e90ca74ac20a6a03d99a91ef9/Categorical%20VAE.ipynb ,
 56 |     (MIT license)
 57 |     """
 58 |     U = out.resize_(shape).uniform_() if out is not None else torch.rand(shape)
 59 |     return - torch.log(eps - torch.log(U + eps))
 60 | 
 61 | 
 62 | class TextModel(nn.Module):
 63 |     def __init__(self, charcount, args):
 64 |         super(TextModel, self).__init__()
 65 |         self.charcount = charcount
 66 |         self.embedding = nn.Embedding(num_embeddings=charcount + 1, embedding_dim=args.embedding_dim)
 67 |         self.rnns = nn.ModuleList([
 68 |             nn.LSTM(input_size=args.embedding_dim, hidden_size=args.hidden_dim, batch_first=True),
 69 |             nn.LSTM(input_size=args.hidden_dim, hidden_size=args.hidden_dim, batch_first=True),
 70 |             nn.LSTM(input_size=args.hidden_dim, hidden_size=args.embedding_dim, batch_first=True)])
 71 |         self.projection = nn.Linear(in_features=args.embedding_dim, out_features=charcount)
 72 | 
 73 |     def forward(self, input, forward=0, stochastic=False):
 74 |         h = input  # (n, t)
 75 |         h = self.embedding(h)  # (n, t, c)
 76 |         states = []
 77 |         for rnn in self.rnns:
 78 |             h, state = rnn(h)
 79 |             states.append(state)
 80 |         h = self.projection(h)
 81 |         if stochastic:
 82 |             gumbel = Variable(sample_gumbel(shape=h.size(), out=h.data.new()))
 83 |             h += gumbel
 84 |         logits = h
 85 |         if forward > 0:
 86 |             outputs = []
 87 |             h = torch.max(logits[:, -1:, :], dim=2)[1] + 1
 88 |             for i in range(forward):
 89 |                 h = self.embedding(h)
 90 |                 for j, rnn in enumerate(self.rnns):
 91 |                     h, state = rnn(h, states[j])
 92 |                     states[j] = state
 93 |                 h = self.projection(h)
 94 |                 if stochastic:
 95 |                     gumbel = Variable(sample_gumbel(shape=h.size(), out=h.data.new()))
 96 |                     h += gumbel
 97 |                 outputs.append(h)
 98 |                 h = torch.max(h, dim=2)[1] + 1
 99 |             logits = torch.cat([logits] + outputs, dim=1)
100 |         return logits
101 | 
102 | 
103 | def generate(model, sequence_length, batch_size, args, stochastic=False, inp=None):
104 |     if inp is None:
105 |         inp = Variable(torch.zeros(batch_size, 1)).long()
106 |         if args.cuda:
107 |             inp = inp.cuda()
108 |     model.eval()
109 |     logits = model(inp, forward=sequence_length, stochastic=stochastic)
110 |     classes = torch.max(logits, dim=2)[1]
111 |     return classes
112 | 
113 | 
114 | class CrossEntropyLoss3D(nn.CrossEntropyLoss):
115 |     def forward(self, input, target):
116 |         return super(CrossEntropyLoss3D, self).forward(input.view(-1, input.size()[2]), target.view(-1))
117 | 
118 | 
119 | class CategoricalError3D(CategoricalError):
120 |     def forward(self, prediction, target):
121 |         return super(CategoricalError3D, self).forward(prediction.view(-1, prediction.size()[2]), target.view(-1))
122 | 
123 | 
124 | class CustomLogger(Callback):
125 |     def end_of_training_iteration(self, **_):
126 |         training_loss = self.trainer.get_state('training_loss', default=0)
127 |         training_error = self.trainer.get_state('training_error', default=0)
128 |         print("Training loss: {} error: {}".format(training_loss.numpy()[0], training_error))
129 | 
130 | 
131 | def to_text(preds, charset):
132 |     return ["".join(charset[c] for c in line) for line in preds]
133 | 
134 | 
135 | def print_generated(lines):
136 |     for i, line in enumerate(lines):
137 |         print("Generated text {}: {}".format(i, line))
138 | 
139 | 
140 | def train_model(model, dataset, args):
141 |     kw = {'num_workers': 2, 'pin_memory': True} if args.cuda else {}
142 |     loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=True, **kw)
143 |     trainer = Trainer(model) \
144 |         .build_criterion(CrossEntropyLoss3D) \
145 |         .build_metric(CategoricalError3D) \
146 |         .build_optimizer('Adam', weight_decay=1e-6) \
147 |         .save_every((1, 'epochs')) \
148 |         .save_to_directory(args.save_directory) \
149 |         .set_max_num_epochs(args.epochs) \
150 |         .register_callback(CustomLogger) \
151 |         .bind_loader('train', loader)
152 | 
153 |     if args.cuda:
154 |         trainer.cuda()
155 | 
156 |     # Go!
157 |     trainer.fit()
158 | 
159 | 
160 | def main(argv):
161 |     # Argparse
162 |     parser = argparse.ArgumentParser(description='PyTorch Shakespeare Example')
163 |     parser.add_argument('--batch-size', type=int, default=64, metavar='N',
164 |                         help='input batch size for training (default: 64)')
165 |     parser.add_argument('--save-directory', type=str, default='output/shakespeare',
166 |                         help='output directory')
167 |     parser.add_argument('--epochs', type=int, default=20, metavar='N',
168 |                         help='number of epochs to train')
169 |     parser.add_argument('--batch-len', type=int, default=200, metavar='N',
170 |                         help='Batch length')
171 |     parser.add_argument('--hidden-dim', type=int, default=256, metavar='N',
172 |                         help='Hidden dim')
173 |     parser.add_argument('--embedding-dim', type=int, default=128, metavar='N',
174 |                         help='Embedding dim')
175 |     parser.add_argument('--no-cuda', action='store_true', default=False,
176 |                         help='disables CUDA training')
177 |     args = parser.parse_args(argv)
178 |     args.cuda = not args.no_cuda and torch.cuda.is_available()
179 | 
180 |     # Read and process data
181 |     corpus = read_corpus()
182 |     print("Corpus: {}...{}".format(corpus[:50], corpus[-50:]))
183 |     print("Total character count: {}".format(len(corpus)))
184 |     chars, charmap = get_charmap(corpus)
185 |     charcount = len(chars)
186 |     print("Unique character count: {}".format(len(chars)))
187 |     array = map_corpus(corpus, charmap)
188 |     targets = batchify(array, args=args)
189 |     inputs = make_inputs(targets)
190 |     dataset = TensorDataset(torch.from_numpy(inputs), torch.from_numpy(targets))
191 | 
192 |     # Train or load a model
193 |     checkpoint_path = os.path.join(args.save_directory, 'checkpoint.pytorch')
194 |     if not os.path.exists(checkpoint_path):
195 |         model = TextModel(charcount=charcount, args=args)
196 |         train_model(model=model, dataset=dataset, args=args)
197 |     else:
198 |         trainer = Trainer().load(from_directory=args.save_directory)
199 |         model = TextModel(charcount=charcount, args=args)
200 |         model.load_state_dict(trainer.model.state_dict())
201 |         if args.cuda:
202 |             model = model.cuda()
203 | 
204 |     # Generate deterministic text
205 | 
206 |     print("Deterministic")
207 |     generated = generate(model, sequence_length=1000, batch_size=2, stochastic=False, args=args).data.cpu().numpy()
208 |     print_generated(to_text(preds=generated, charset=chars))
209 | 
210 |     # Seed deterministic text
211 |     seeds = ['KING RICHARD', 'KING RICHARD', 'Enter Falsta', 'SHAKESPEARE ']
212 |     assert len(set(len(s) for s in seeds)) == 1
213 |     inp = np.array([[charmap[c] for c in l] for l in seeds], dtype=np.int64)
214 |     inp = np.pad(inp + 1, [(0, 0), (1, 0)], mode='constant')
215 |     inp = Variable(torch.from_numpy(inp))
216 |     if args.cuda:
217 |         inp = inp.cuda()
218 |     # Generate stochastic text
219 |     generated = generate(model, sequence_length=2000, batch_size=5, stochastic=False, inp=inp,
220 |                          args=args).data.cpu().numpy()
221 |     text = to_text(preds=generated, charset=chars)
222 |     for i, (s, t) in enumerate(zip(seeds, text)):
223 |         print("Deterministic #{} (seed={}): {}".format(i, s, t))
224 | 
225 |     # Generate stochastic text
226 |     print("Stochastic")
227 |     generated = generate(model, sequence_length=1000, batch_size=5, stochastic=True, args=args).data.cpu().numpy()
228 |     print_generated(to_text(preds=generated, charset=chars))
229 | 
230 | 
231 | if __name__ == '__main__':
232 |     main(sys.argv[1:])
233 | 


--------------------------------------------------------------------------------
/recitation-8/data/0.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-8/data/0.gif


--------------------------------------------------------------------------------
/recitation-8/data/1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-8/data/1.gif


--------------------------------------------------------------------------------
/recitation-8/data/2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-8/data/2.gif


--------------------------------------------------------------------------------
/recitation-8/data/3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-8/data/3.gif


--------------------------------------------------------------------------------
/recitation-8/data/4.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-8/data/4.gif


--------------------------------------------------------------------------------
/recitation-8/data/5.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-8/data/5.gif


--------------------------------------------------------------------------------
/recitation-8/data/6.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-8/data/6.gif


--------------------------------------------------------------------------------
/recitation-8/data/7.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-8/data/7.gif


--------------------------------------------------------------------------------
/recitation-8/data/8.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-8/data/8.gif


--------------------------------------------------------------------------------
/recitation-8/data/9.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-8/data/9.gif


--------------------------------------------------------------------------------
/recitation-8/data/numbers.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-8/data/numbers.npy


--------------------------------------------------------------------------------
/recitation-8/number_colors.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image
  2 | import numpy as np
  3 | import torch
  4 | import torch.utils.data
  5 | import torch.nn.functional as F
  6 | import matplotlib.pyplot
  7 | 
  8 | from inferno.trainers.basic import Trainer
  9 | 
 10 | 
 11 | def make_numeral_arrs(path):
 12 |     arrs = [1 - np.array(
 13 |         Image.open(path + '/' + str(v) + '.gif').convert('L')) // 255
 14 |         for v in range(10)]
 15 |     np.save("data/numbers.npy", arrs)
 16 | 
 17 | 
 18 | def display_image(arr):
 19 |     arr = np.transpose(np.array(
 20 |         arr[:3, :, :].cpu().numpy() * 255, dtype='uint8'), [1, 2, 0])
 21 |     matplotlib.pyplot.imshow(arr)
 22 | 
 23 | 
 24 | def display_attention(var):
 25 |     arr = np.log(var.data.numpy() + 1e-9)
 26 |     matplotlib.pyplot.imshow(arr)
 27 | 
 28 | 
 29 | word_dict = {"zero": 0, "one": 1, "two": 2, "three": 3,
 30 |              "four": 4, "five": 5, "six": 6,
 31 |              "seven": 7, "eight": 8, "nine": 9,
 32 |              "red": 10, "green": 11, "blue": 12,
 33 |              "left": 13, "middle": 14, "right": 15}
 34 | word_inv_dict = {v: k for (k, v) in word_dict.items()}
 35 | 
 36 | 
 37 | def to_words(labels):
 38 |     return ' '.join([word_inv_dict[l] for l in labels])
 39 | 
 40 | 
 41 | class NumberColorsDataset(torch.utils.data.Dataset):
 42 | 
 43 |     def __init__(self):
 44 |         torch.utils.data.Dataset.__init__(self)
 45 |         self.numeral_arrs = np.load("data/numbers.npy")
 46 |         self.color_ord_list = np.array([
 47 |             [0, 1, 2],
 48 |             [0, 2, 1],
 49 |             [1, 0, 2],
 50 |             [1, 2, 0],
 51 |             [2, 0, 1],
 52 |             [2, 1, 0]])
 53 | 
 54 |     def __len__(self):
 55 |         return 6000
 56 | 
 57 |     def __getitem__(self, idx):
 58 |         number = idx % 1000
 59 |         color_ord_idx = idx // 1000
 60 | 
 61 |         digits = np.zeros(3, dtype='i')
 62 |         digits[2] = number % 10
 63 |         number //= 10
 64 |         digits[1] = number % 10
 65 |         number //= 10
 66 |         digits[0] = number % 10
 67 |         color_ord = self.color_ord_list[color_ord_idx]
 68 | 
 69 |         imgs = np.array([
 70 |             self.numeral_arrs[digits[0]],
 71 |             self.numeral_arrs[digits[1]],
 72 |             self.numeral_arrs[digits[2]]])
 73 | 
 74 |         (_, h, w) = imgs.shape
 75 |         colored = np.zeros((3, 4, h, w))
 76 |         colored[0, color_ord[0], :, :] = imgs[0]
 77 |         colored[1, color_ord[1], :, :] = imgs[1]
 78 |         colored[2, color_ord[2], :, :] = imgs[2]
 79 | 
 80 |         combined = np.concatenate(colored, axis=2)
 81 |         combined[3, :, :] = np.linspace(0.0, 1.0, 3 * w).reshape(1, 1, 3 * w)
 82 | 
 83 |         color_inv_ord = np.argsort(color_ord)
 84 | 
 85 |         labels = []
 86 |         labels.append(word_dict["red"])
 87 |         labels.append(digits[color_inv_ord[0]])
 88 |         labels.append(color_inv_ord[0] + word_dict["left"])
 89 |         labels.append(word_dict["green"])
 90 |         labels.append(digits[color_inv_ord[1]])
 91 |         labels.append(color_inv_ord[1] + word_dict["left"])
 92 |         labels.append(word_dict["blue"])
 93 |         labels.append(digits[color_inv_ord[2]])
 94 |         labels.append(color_inv_ord[2] + word_dict["left"])
 95 |         # labels = [digits[0]]
 96 | 
 97 |         label_vec = torch.from_numpy(np.array(labels)).long()
 98 | 
 99 |         return torch.from_numpy(combined).float(), len(labels), \
100 |             label_vec, label_vec
101 | 
102 | 
103 | class NumberColorsNet(torch.nn.Module):
104 |     def __init__(self, num_queries=4, query_size=16, val_size=64):
105 |         super().__init__()
106 | 
107 |         # The convolutional layers encode the input
108 |         self.convs = torch.nn.ModuleList()
109 |         self.convs.append(torch.nn.Conv2d(
110 |             4, 64, kernel_size=3, padding=0, stride=2))
111 |         self.convs.append(torch.nn.Conv2d(
112 |             64, 128, kernel_size=3, padding=0, stride=2))
113 |         self.convs.append(torch.nn.Conv2d(
114 |             128, 256, kernel_size=3, padding=0))
115 |         self.convs.append(torch.nn.Conv2d(
116 |             256, 256, kernel_size=3, padding=0))
117 |         self.key_conv = torch.nn.Conv2d(
118 |             256, query_size, kernel_size=3, padding=1)
119 |         self.val_conv = torch.nn.Conv2d(
120 |             256, val_size, kernel_size=3, padding=1)
121 | 
122 |         self.test = torch.nn.Linear(query_size, len(word_dict))
123 | 
124 |         # Embedding to convert output labels to RNN input
125 |         self.embedding = torch.nn.Embedding(len(word_dict), len(word_dict))
126 | 
127 |         # The hidden state of the RNN layer is used as the query
128 |         self.rnns = torch.nn.ModuleList()
129 |         self.rnn_inith = torch.nn.ParameterList()
130 |         self.rnn_initc = torch.nn.ParameterList()
131 | 
132 |         queries_total = num_queries * query_size
133 | 
134 |         self.rnns.append(torch.nn.LSTMCell(
135 |             val_size * num_queries + len(word_dict), 128))
136 |         self.rnn_inith.append(torch.nn.Parameter(torch.rand(1, 128)))
137 |         self.rnn_initc.append(torch.nn.Parameter(torch.rand(1, 128)))
138 | 
139 |         self.rnns.append(torch.nn.LSTMCell(128, 64))
140 |         self.rnn_inith.append(torch.nn.Parameter(torch.rand(1, 64)))
141 |         self.rnn_initc.append(torch.nn.Parameter(torch.rand(1, 64)))
142 | 
143 |         # Linear layers convert the hidden state to the query
144 |         self.query_linears = torch.nn.ModuleList()
145 |         self.query_linears.append(torch.nn.Linear(64, 64))
146 |         self.query_linears.append(torch.nn.Linear(64, queries_total))
147 | 
148 |         # Linear layers convert the hidden state to the output
149 |         self.output_linears = torch.nn.ModuleList()
150 |         self.output_linears.append(torch.nn.Linear(64, 64))
151 |         self.output_linears.append(torch.nn.Linear(64, len(word_dict)))
152 | 
153 |         # Leaky relu activation
154 |         self.leaky_relu = torch.nn.LeakyReLU(negative_slope=0.2)
155 | 
156 |         for param in self.parameters():
157 |             if param.ndimension() >= 2:
158 |                 torch.nn.init.xavier_uniform(param)
159 |             else:
160 |                 param.data.zero_()
161 | 
162 |         self.attentions = []
163 |         self.num_queries = num_queries
164 |         self.query_size = query_size
165 | 
166 |     def forward(self, img, num_iters, label_batch):
167 |         for conv in self.convs:
168 |             img = self.leaky_relu(conv(img))
169 |         key = self.key_conv(img)
170 |         val = self.val_conv(img)
171 |         (b, c, w, h) = val.size()
172 | 
173 |         # return [self.test(img.view(b, c, w * h).mean(dim=2))], num_iters
174 | 
175 |         outputs = []
176 |         self.attentions = []
177 | 
178 |         output_embed = img.data.new(b).long()
179 |         output_embed[:] = 2
180 |         output_embed = self.embedding(
181 |             torch.autograd.Variable(output_embed))
182 |         hidden = [h.repeat(b, 1) for h in self.rnn_inith]
183 |         cell = [c.repeat(b, 1) for c in self.rnn_initc]
184 | 
185 |         for i in range(num_iters.max().data[0]):
186 |             queries = hidden[-1]
187 |             for (i, linear) in enumerate(self.query_linears):
188 |                 if i == len(self.query_linears) - 1:
189 |                     queries = linear(queries)
190 |                 else:
191 |                     queries = self.leaky_relu(linear(queries))
192 |             queries = queries.view((b, self.query_size, self.num_queries))
193 | 
194 |             attention = torch.matmul(torch.transpose(
195 |                 key.view((b, self.query_size, w * h)), 1, 2), queries)
196 |             attention = F.elu(attention) + 1.0
197 |             attention /= (1e-6 + attention.sum(dim=1, keepdim=True))
198 |             self.attentions.append(torch.transpose(
199 |                 attention, 1, 2).contiguous().view(
200 |                 (b, self.num_queries, w, h)))
201 | 
202 |             fused = torch.matmul(val.view((b, c, w * h)), attention).view(
203 |                 (b, c * self.num_queries))
204 | 
205 |             rnn_input = torch.cat([fused, output_embed], dim=1)
206 | 
207 |             for (j, rnn) in enumerate(self.rnns):
208 |                 hidden[j], cell[j] = rnn(rnn_input, (hidden[j], cell[j]))
209 |                 rnn_input = hidden[j]
210 | 
211 |             output = hidden[-1]
212 |             for (i, linear) in enumerate(self.output_linears):
213 |                 if i == len(self.output_linears) - 1:
214 |                     output = linear(output)
215 |                 else:
216 |                     output = self.leaky_relu(linear(output))
217 |             outputs.append(output)
218 | 
219 |             if self.training:
220 |                 output_embed = self.embedding(label_batch[:, i])
221 |             else:
222 |                 output_embed = self.embedding(output.max(dim=1)[1])
223 | 
224 |         return torch.stack(outputs, dim=1), num_iters
225 | 
226 |     def get_attentions(self):
227 |         return self.attentions
228 | 
229 | 
230 | class SeqCrossEntropyLoss(torch.nn.Module):
231 |     def __init__(self):
232 |         super().__init__()
233 |         self.losses = 0.0
234 |         self.count = 0
235 | 
236 |     def forward(self, val, label_batch):
237 |         (outputs, num_iters) = val
238 |         # loss = F.cross_entropy(outputs[0], label_batch[:, 0])
239 |         # print(loss)
240 | 
241 |         output_list = []
242 |         label_list = []
243 |         for (output, labels, num_iter) in zip(
244 |                 outputs, label_batch, num_iters):
245 |             num_iter = num_iter.data[0]
246 |             output_list.append(output[:num_iter])
247 |             label_list.append(labels[:num_iter])
248 | 
249 |         outputs = torch.cat(output_list)
250 |         labels = torch.cat(label_list)
251 |         loss = torch.nn.functional.cross_entropy(
252 |             outputs, labels)
253 | 
254 |         self.losses += loss.data[0]
255 |         self.count += 1
256 |         if self.count % 10 == 0:
257 |             print(self.losses / self.count)
258 |             self.losses = 0.0
259 |             self.count = 0
260 |         return loss
261 | 
262 | 
263 | def train(net, num_epochs, dataset):
264 |     data_loader = torch.utils.data.DataLoader(
265 |         dataset, shuffle=True, batch_size=100)
266 |     net.train(mode=True)
267 | 
268 |     for i in range(num_epochs):
269 |         lr = 0.002 / (1 + 0.2 * i)
270 |         trainer = Trainer(net) \
271 |             .build_criterion(SeqCrossEntropyLoss) \
272 |             .build_optimizer('Adam', lr=lr) \
273 |             .set_max_num_epochs(1) \
274 |             .save_every((10, 'iterations')) \
275 |             .save_to_directory('net/')
276 | 
277 |         trainer.bind_loader('train', data_loader, num_inputs=3)
278 | 
279 |         if torch.cuda.is_available():
280 |             trainer.cuda()
281 | 
282 |         trainer.fit()
283 | 
284 | 
285 | def main():
286 |     net = NumberColorsNet()
287 |     dataset = NumberColorsDataset()
288 |     train(net, 200, dataset)
289 | 


--------------------------------------------------------------------------------
/recitation-9/autoencoder_demo_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import torch
  4 | import torch.utils
  5 | import torchvision.datasets as datasets
  6 | import torchvision.transforms as transforms
  7 | from inferno.trainers.basic import Trainer
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | 
 11 | class MNIST(torch.utils.data.Dataset):
 12 |     def __init__(self):
 13 |         super().__init__()
 14 |         self.mnist = datasets.MNIST(
 15 |             root='./data', train=True,
 16 |             download=True, transform=transforms.ToTensor())
 17 |         self.labeled = True
 18 | 
 19 |     def __len__(self):
 20 |         return len(self.mnist)
 21 | 
 22 |     def __getitem__(self, idx):
 23 |         (img, labels) = self.mnist[idx]
 24 |         img = img.view(-1)
 25 |         if self.labeled:
 26 |             return (img, labels)
 27 |         else:
 28 |             return (img, img)
 29 | 
 30 | 
 31 | class LossPrinter(torch.nn.Module):
 32 |     def __init__(self, criterion):
 33 |         super().__init__()
 34 |         self.criterion = criterion
 35 | 
 36 |     def forward(self, *args, **kwargs):
 37 |         loss = self.criterion(*args, **kwargs)
 38 |         print("Loss: %f" % loss)
 39 |         return loss
 40 | 
 41 | 
 42 | def train(net, dataset, criterion, num_epochs,
 43 |           batch_size, learn_rate, dir_name):
 44 |     dir_name = os.path.join('net/', dir_name)
 45 |     trainer = Trainer(net[0])
 46 | 
 47 |     if (os.path.exists(os.path.join(dir_name, 'model.pytorch'))):
 48 |         net_temp = trainer.load_model(dir_name).model
 49 |         net[0].load_state_dict(net_temp.state_dict())
 50 |         print("Loaded checkpoint directly")
 51 |     else:
 52 |         if (not os.path.exists(dir_name)):
 53 |             os.makedirs(dir_name)
 54 |         data_loader = torch.utils.data.DataLoader(
 55 |             dataset, shuffle=True, batch_size=batch_size)
 56 |         net[0].train()
 57 | 
 58 |         trainer \
 59 |             .build_criterion(LossPrinter(criterion)) \
 60 |             .bind_loader('train', data_loader) \
 61 |             .build_optimizer('Adam', lr=learn_rate) \
 62 |             .set_max_num_epochs(num_epochs)
 63 | 
 64 |         if torch.cuda.is_available():
 65 |             trainer.cuda()
 66 | 
 67 |         trainer.fit()
 68 |         trainer.save_model(dir_name)
 69 |     net[0].cpu()
 70 |     net[0].eval()
 71 | 
 72 | 
 73 | def display_image(arr):
 74 |     width = int(np.sqrt(arr.size()[0]))
 75 |     arr = arr.cpu().view(width, -1).numpy()
 76 |     plt.figure()
 77 |     plt.imshow(1.0 - arr, cmap='gray')
 78 | 
 79 | 
 80 | def display_reconstruction(net, dataset):
 81 |     (image, _) = dataset[np.random.randint(len(dataset))]
 82 |     display_image(image)
 83 |     image = torch.autograd.Variable(image).unsqueeze(dim=0)
 84 |     reconst = net.decode(net.encode(image)).data[0]
 85 |     display_image(reconst)
 86 | 
 87 | 
 88 | def display_encodings(net, dataset, limits):
 89 |     data_loader = torch.utils.data.DataLoader(
 90 |         dataset, shuffle=True, batch_size=1000)
 91 |     (images, labels) = next(iter(data_loader))
 92 |     images = torch.autograd.Variable(images)
 93 |     labels = labels.numpy()
 94 | 
 95 |     encoded = net.encode(images).data.cpu().numpy()
 96 |     encoded = np.sign(encoded) * np.abs(encoded) ** (1.0 / 3.0)
 97 |     plt.figure(figsize=(10, 10))
 98 |     plt.scatter(encoded[:, 0], encoded[:, 1], c=labels)
 99 |     plt.xlim(limits)
100 |     plt.ylim(limits)
101 |     plt.colorbar()
102 | 
103 | 
104 | def display_encoding_variation(net, dataset, limits):
105 |     (image, _) = dataset[np.random.randint(len(dataset))]
106 |     display_image(image)
107 | 
108 |     images = torch.autograd.Variable(image).clone().unsqueeze(dim=0)
109 |     images = images.repeat(20, 1)
110 |     encoded = net.encode(images).data.cpu().numpy()
111 |     encoded = np.sign(encoded) * np.abs(encoded) ** (1.0 / 3.0)
112 |     plt.figure(figsize=(10, 10))
113 |     plt.scatter(encoded[:, 0], encoded[:, 1])
114 |     plt.xlim(limits)
115 |     plt.ylim(limits)
116 | 
117 | 
118 | def display_decoding(net, dataset, point):
119 |     point = point ** 3
120 |     point = torch.autograd.Variable(point).unsqueeze(dim=0)
121 |     decoded = net.decode(point).data[0]
122 |     display_image(decoded)
123 | 


--------------------------------------------------------------------------------
/recitation-9/data/processed/test.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-9/data/processed/test.pt


--------------------------------------------------------------------------------
/recitation-9/data/processed/training.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-9/data/processed/training.pt


--------------------------------------------------------------------------------
/recitation-9/data/raw/t10k-images-idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-9/data/raw/t10k-images-idx3-ubyte


--------------------------------------------------------------------------------
/recitation-9/data/raw/t10k-labels-idx1-ubyte:
--------------------------------------------------------------------------------
1 |     ' 		 	 		   		  				 						   		  	  	 				 		 		  					      			  	 	   		 			 		   		 			 		 	 		 	   		 	  	   	 		  	 				    					  	    	  		 		  	 	   	 		 	   			 			  	  		 	 	 	 		 		 			 			 	 	 	 	  	   	   	 	 		   		   						 	 		 	    	 						 	 		 	  		 		 		  	   			 	    			   	 		 				 			 		  	  	  	 		       				  	  			   			 		 	 			   	 	 	 	 				   		   	  		   		 	 	  	 	  		  		 								   		 		    				   	  	 	 	 		 			  	 			   			 	 	  			  	 			  	   		  	    		 			     	 			  		    	  		  	    						 			  					 	 		   	 		 			   				    		   			    					    	 	    		      		        	     	 				 		   	     						  			 				 		 	 	   			  	      		  		   	 			   			   	 		    		 			  					  	 				 					   			  	 			 	 		 						 	  	 	    	   	  								  	  		 				  	 			 			 				 	  		 										 		  	 	   	 	 	 	   								 		  	     			 				   	 		 							 	  	   	  		 				 	 								  	  	 		  	 		  	  					  	 	  				   		   						  		  						   	 	 	 		 	 		   	 		  	  	 					 	  	      			 	 	 	   		  	  		   					 	 	 	 	   	  	    		   	 	  		 	 			 	  	 	 				    		 	    			 	 	   		 	 				 		  	    	  	 		 	 		 	 	   	 	 	 		 	    		   			 		 	 	 	  	  		     	 				 	 	 	 					 		        		 	     	 		  						    	 	 	     	 				 			    		 		    	  		 		 	 	 				 		 	 	 		      		  	 		  	 	 	    	 		  							    	 	 	  	     							  		  	 	 	  				 		 	 	 	   	 	 	    	 	 					 	   	 	 	 		 	  	 		   		 		  	    			 	 		 			  	 	 	   	 	 					 	   	 	 			 		  		  	 	   	  	 	 		  	 			 	  		  	  	 		 	  	 	  		   	 	 	 		   				  	  	 		   	 	 	 	 		   	 	 		    			   		 	 	 	 	 	 	 	  		 		 		     	 		 	 	  	 	 			 	 	 		    			  	 	 	 	    		 		    	  		 		 	 	 				    		 	    			 	 	 	 			 	       		 				 	 	 			 				    		   			   		 	 	 			    			  	 	  	 	 				       					 	  			  	  	   	 	 	 	  		 	 		 	      				 	 	 	   		 	 					 	 	 	    		  	  		   		 	 	 	 		    				   	  	 		 	   		 	  		 	 	     	 	 


--------------------------------------------------------------------------------
/recitation-9/data/raw/train-images-idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-9/data/raw/train-images-idx3-ubyte


--------------------------------------------------------------------------------
/recitation-9/data/raw/train-labels-idx1-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-9/data/raw/train-labels-idx1-ubyte


--------------------------------------------------------------------------------
/recitation-9/net/hidden_sampling_autoencoder/model.pytorch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-9/net/hidden_sampling_autoencoder/model.pytorch


--------------------------------------------------------------------------------
/recitation-9/net/regular_autoencoder/model.pytorch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-9/net/regular_autoencoder/model.pytorch


--------------------------------------------------------------------------------
/recitation-9/net/reparameterized_variational_autoencoder/model.pytorch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/recitation-9/net/reparameterized_variational_autoencoder/model.pytorch


--------------------------------------------------------------------------------
/tensorflow/simple_mnist.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import tensorflow.contrib.slim as slim
  4 | from tensorflow.contrib.keras.api.keras.datasets import mnist
  5 | from tensorflow.contrib.learn import Experiment, RunConfig
  6 | from tensorflow.contrib.learn.python.learn.learn_runner import run
  7 | from tensorflow.contrib.training import HParams
  8 | from tensorflow.python.estimator.estimator import Estimator
  9 | 
 10 | """
 11 | This simple program provides command-line parsing, resuming training, graphing validation loss on TensorBoard,
 12 | and lots of other freebies.
 13 | 
 14 | **Overview**
 15 | - `model_fn`: is the code function that defines the model, losses, and metrics
 16 | - `make_input_fns`: defines the training and validation input
 17 | - `experiment_fn`: links the model and the input
 18 | - `main`: starts the experiment
 19 | """
 20 | 
 21 | 
 22 | # This is the most important section
 23 | # Define your model, losses and metrics
 24 | def model_fn(features, labels, mode, params):
 25 |     # Simple MLP Model
 26 |     # Hyperparameters are stored in `params`
 27 |     h = features['x']
 28 |     h = slim.flatten(h)
 29 |     with slim.arg_scope([slim.fully_connected],
 30 |                         weights_regularizer=slim.l2_regularizer(params.l2),
 31 |                         num_outputs=params.hidden_units,
 32 |                         activation_fn=tf.nn.leaky_relu):
 33 |         for i in range(params.hidden_layers):
 34 |             h = slim.fully_connected(h, scope='my_hidden_layer_{}'.format(i))
 35 |         logits = slim.fully_connected(h, num_outputs=10, activation_fn=None, scope='my_output_layer')
 36 | 
 37 |     # Predictions
 38 |     classes = tf.argmax(logits, axis=1)
 39 |     predictions = {
 40 |         'classes': classes
 41 |     }
 42 |     # Any `tf.summary` you create will automatically show up on TensorBoard
 43 |     accuracy = tf.reduce_mean(tf.cast(tf.equal(classes, labels), tf.float32))
 44 |     tf.summary.scalar('accuracy', accuracy)  # add a scalar graph
 45 |     tf.summary.histogram('logits', logits)  # add a histogram
 46 |     # Loss
 47 |     softmax_loss = tf.losses.softmax_cross_entropy(
 48 |         logits=logits,
 49 |         onehot_labels=tf.one_hot(labels, 10, axis=1)
 50 |     )
 51 |     tf.summary.scalar('softmax_loss', softmax_loss)  # add another graph
 52 |     regularization_loss = tf.losses.get_regularization_loss()
 53 |     tf.summary.scalar('regularization_loss', regularization_loss)  # add another graph
 54 |     loss = softmax_loss + regularization_loss
 55 |     # Training
 56 |     optimizer = tf.train.AdamOptimizer(learning_rate=params.lr)
 57 |     train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
 58 |     # Evaluation
 59 |     eval_metric_ops = {'eval_accuracy': tf.metrics.accuracy(labels=labels, predictions=classes)}
 60 |     # Return everything wrapped in an EstimatorSpec
 61 |     return tf.estimator.EstimatorSpec(
 62 |         mode=mode, loss=loss, train_op=train_op,
 63 |         eval_metric_ops=eval_metric_ops, predictions=predictions)
 64 | 
 65 | 
 66 | # Data loading and massaging (int to float and rescale)
 67 | def make_input_fns():
 68 |     train, eval = mnist.load_data()
 69 |     fns = []
 70 |     for data in (train, eval):
 71 |         x, y = data
 72 |         x = (x.astype(np.float32) * 2. / 255.) - 1.
 73 |         y = y.astype(np.int64)
 74 |         fns.append(tf.estimator.inputs.numpy_input_fn(
 75 |             {'x': x}, y,
 76 |             batch_size=tf.flags.FLAGS.batch_size,
 77 |             num_epochs=None,
 78 |             shuffle=True
 79 |         ))
 80 |     return fns
 81 | 
 82 | 
 83 | # This function links your input function and your model
 84 | def experiment_fn(run_config, hparams):
 85 |     train_input_fn, eval_input_fn = make_input_fns()
 86 |     estimator = Estimator(
 87 |         model_fn=model_fn,
 88 |         config=run_config,
 89 |         params=hparams)
 90 |     experiment = Experiment(
 91 |         estimator=estimator,
 92 |         train_input_fn=train_input_fn,
 93 |         eval_input_fn=eval_input_fn
 94 |     )
 95 |     return experiment
 96 | 
 97 | 
 98 | # Ordinary main function
 99 | def main(_argv):
100 |     # Pass command-line arguments to RunConfig
101 |     run_config = RunConfig(
102 |         model_dir=tf.flags.FLAGS.model_dir,
103 |         save_checkpoints_steps=tf.flags.FLAGS.save_checkpoints_steps)
104 |     # Default hyperparameters
105 |     hparams = HParams(l2=1e-3, lr=1e-3, hidden_layers=3, hidden_units=200) \
106 |         # Parse the hparams command-line argument
107 |     hparams.parse(tf.flags.FLAGS.hparams)
108 |     # Run the experiment
109 |     run(
110 |         experiment_fn=experiment_fn,
111 |         run_config=run_config,
112 |         schedule=tf.flags.FLAGS.schedule,
113 |         hparams=hparams)
114 | 
115 | 
116 | if __name__ == '__main__':
117 |     tf.logging.set_verbosity(tf.logging.INFO)
118 |     # Define command line arguments
119 |     tf.flags.DEFINE_string('model_dir', 'demo/simple_mnist', 'Output directory')
120 |     tf.flags.DEFINE_string('schedule', 'train_and_evaluate', 'Schedule')
121 |     tf.flags.DEFINE_integer('batch_size', 64, 'Batch size')
122 |     tf.flags.DEFINE_integer('save_checkpoints_steps', 2000, 'How often to save and validate')
123 |     tf.flags.DEFINE_string('hparams', '', 'Hyperparameters')
124 |     # This line will call main()
125 |     tf.app.run()
126 | 


--------------------------------------------------------------------------------
/visualization/.gitignore:
--------------------------------------------------------------------------------
1 | /data
2 | /output
3 | 


--------------------------------------------------------------------------------
/visualization/README.md:
--------------------------------------------------------------------------------
 1 | # Visualization Tutorials
 2 | 
 3 | Here are two examples of how to visualize pytorch training metrics. These are not the only ways
 4 | to visualize your data and not the only frameworks but should give you some good ideas to start with.
 5 | 
 6 | - TensorBoard
 7 | - Visdom
 8 | 
 9 | ## Inferno and Tensorboard
10 | 
11 | Inferno provides a convenient wrapper for training, logging, and other boilerplate. It supports logging
12 | to TensorBoard out-of-the-box.
13 | 
14 | TensorBoard serves log files that are stored on disk. It is not necessary to have TensorBoard running as you train
15 | (start it before, after, whatever).
16 | You can also copy the log files to a different machine and run TensorBoard from the other machine. If you have
17 | multiple log files in different subdirectories, run TensorBoard in the parent directory and it will let you
18 | compare graphs between different log files.
19 | 
20 | - Start tensorboard server: `tensorboard --logdir .`
21 | - Run the script: `python pytorch_mnist_inferno_tensorboard_example.py`
22 | - Browse to TensorBoard to view live results: `https://localhost:6006`
23 | 
24 | ![TensorBoard CMD](https://github.com/cmudeeplearning11785/deep-learning-tutorials/raw/master/visualization/tensorboard-cmd.png)
25 | 
26 | ![TensorBoard CMD](https://github.com/cmudeeplearning11785/deep-learning-tutorials/raw/master/visualization/tensorboard.png)
27 | 
28 | 
29 | ```python
30 | trainer = Trainer(model) \
31 |     .build_criterion('CrossEntropyLoss') \
32 |     .build_metric('CategoricalError') \
33 |     .build_optimizer('Adam') \
34 |     .validate_every((2, 'epochs')) \
35 |     .save_every((5, 'epochs')) \
36 |     .save_to_directory(args.save_directory) \
37 |     .set_max_num_epochs(10) \
38 |     .build_logger(TensorboardLogger(log_scalars_every=(1, 'iteration'),
39 |                                     log_images_every='never'),
40 |                   log_directory=args.save_directory)
41 | ```
42 | 
43 | ## TNT and Visdom
44 | 
45 | TNT provides some wrappers for training and logging (but not a ton). It also supports logging to Visdom but
46 | it requires a decent amount of coding.
47 | 
48 | Visdom is a server that receives data from your script. It is necessary to have a running Visdom server
49 | before you start your script. It is also necessary to save your Visdom workspace if you want to look at it later.
50 | 
51 | - Start visdom server: `python -m visdom.server`
52 | - Run the script: `python pytorch_mnist_tnt_visdom_example.py`
53 | - Browse to visdom (it will be a blank until results start being generated): `https://localhost:8097`
54 | 
55 | ![TensorBoard CMD](https://github.com/cmudeeplearning11785/deep-learning-tutorials/raw/master/visualization/visdom-cmd.png)
56 | 
57 | ![TensorBoard CMD](https://github.com/cmudeeplearning11785/deep-learning-tutorials/raw/master/visualization/visdom.png)
58 | 
59 | ```python
60 | train_loss_logger = VisdomPlotLogger(
61 |     'line', port=port, opts={'title': 'Train Loss'})
62 | train_err_logger = VisdomPlotLogger(
63 |     'line', port=port, opts={'title': 'Train Class Error'})
64 | test_loss_logger = VisdomPlotLogger(
65 |     'line', port=port, opts={'title': 'Test Loss'})
66 | test_err_logger = VisdomPlotLogger(
67 |     'line', port=port, opts={'title': 'Test Class Error'})
68 | ...
69 | def on_end_epoch(state):
70 |     print('Training loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0]))
71 |     train_loss_logger.log(state['epoch'], meter_loss.value()[0])
72 |     train_err_logger.log(state['epoch'], classerr.value()[0])
73 | 
74 |     # do validation at the end of each epoch
75 |     reset_meters()
76 |     engine.test(train_fn, validate_loader)
77 |     test_loss_logger.log(state['epoch'], meter_loss.value()[0])
78 |     test_err_logger.log(state['epoch'], classerr.value()[0])
79 |     print('Testing loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0]))
80 | ...
81 | engine.hooks['on_end_epoch'] = on_end_epoch
82 | engine.train(train_fn, train_loader, maxepoch=args.epochs, optimizer=optimizer)
83 | ```
84 | 


--------------------------------------------------------------------------------
/visualization/pytorch_mnist_inferno_tensorboard_example.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | from inferno.extensions.layers.reshape import Flatten
 7 | from inferno.trainers.basic import Trainer
 8 | from inferno.trainers.callbacks.logging.tensorboard import TensorboardLogger
 9 | from torchvision import datasets, transforms
10 | 
11 | 
12 | def model_fn():
13 |     return nn.Sequential(
14 |         Flatten(),
15 |         nn.Linear(in_features=784, out_features=256),
16 |         nn.ReLU(),
17 |         nn.Linear(in_features=256, out_features=128),
18 |         nn.ReLU(),
19 |         nn.Linear(in_features=128, out_features=10)
20 |     )
21 | 
22 | 
23 | def mnist_data_loaders(args):
24 |     kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
25 |     train_loader = torch.utils.data.DataLoader(
26 |         datasets.MNIST('./data', train=True, download=True,
27 |                        transform=transforms.Compose([
28 |                            transforms.ToTensor(),
29 |                            transforms.Normalize((0.1307,), (0.3081,))
30 |                        ])),
31 |         batch_size=args.batch_size, shuffle=True, **kwargs)
32 |     test_loader = torch.utils.data.DataLoader(
33 |         datasets.MNIST('./data', train=False, transform=transforms.Compose([
34 |             transforms.ToTensor(),
35 |             transforms.Normalize((0.1307,), (0.3081,))
36 |         ])),
37 |         batch_size=args.test_batch_size, shuffle=True, **kwargs)
38 |     return train_loader, test_loader
39 | 
40 | 
41 | def train_model(args):
42 |     model = model_fn()
43 |     train_loader, validate_loader = mnist_data_loaders(args)
44 | 
45 |     # Build trainer
46 |     trainer = Trainer(model) \
47 |         .build_criterion('CrossEntropyLoss') \
48 |         .build_metric('CategoricalError') \
49 |         .build_optimizer('Adam') \
50 |         .validate_every((2, 'epochs')) \
51 |         .save_every((5, 'epochs')) \
52 |         .save_to_directory(args.save_directory) \
53 |         .set_max_num_epochs(args.epochs) \
54 |         .build_logger(TensorboardLogger(log_scalars_every=(1, 'iteration'),
55 |                                         log_images_every='never'),
56 |                       log_directory=args.save_directory)
57 | 
58 |     # Bind loaders
59 |     trainer \
60 |         .bind_loader('train', train_loader) \
61 |         .bind_loader('validate', validate_loader)
62 | 
63 |     if args.cuda:
64 |         trainer.cuda()
65 | 
66 |     # Go!
67 |     trainer.fit()
68 | 
69 | 
70 | def main(argv):
71 |     # Training settings
72 |     parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
73 |     parser.add_argument('--batch-size', type=int, default=64, metavar='N',
74 |                         help='input batch size for training (default: 64)')
75 |     parser.add_argument('--save-directory', type=str, default='output/inferno',
76 |                         help='output directory')
77 |     parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
78 |                         help='input batch size for testing (default: 1000)')
79 |     parser.add_argument('--epochs', type=int, default=20, metavar='N',
80 |                         help='number of epochs to train (default: 20)')
81 |     parser.add_argument('--no-cuda', action='store_true', default=False,
82 |                         help='disables CUDA training')
83 |     args = parser.parse_args(argv)
84 |     args.cuda = not args.no_cuda and torch.cuda.is_available()
85 |     train_model(args)
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     main(sys.argv[1:])
90 | 


--------------------------------------------------------------------------------
/visualization/pytorch_mnist_tnt_visdom_example.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torchnet as tnt
  7 | from torch.autograd import Variable
  8 | from torchnet.engine import Engine
  9 | from torchnet.logger.visdomlogger import VisdomPlotLogger, VisdomLogger
 10 | from torchvision import datasets, transforms
 11 | from tqdm import tqdm
 12 | 
 13 | 
 14 | class Flatten(nn.Module):
 15 |     def forward(self, input):
 16 |         return input.view(input.size()[0], -1)
 17 | 
 18 | 
 19 | def model_fn():
 20 |     return nn.Sequential(
 21 |         Flatten(),
 22 |         nn.Linear(in_features=784, out_features=256),
 23 |         nn.ReLU(),
 24 |         nn.Linear(in_features=256, out_features=128),
 25 |         nn.ReLU(),
 26 |         nn.Linear(in_features=128, out_features=10)
 27 |     )
 28 | 
 29 | 
 30 | def mnist_data_loaders(args):
 31 |     kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
 32 |     train_loader = torch.utils.data.DataLoader(
 33 |         datasets.MNIST('./data', train=True, download=True,
 34 |                        transform=transforms.Compose([
 35 |                            transforms.ToTensor(),
 36 |                            transforms.Normalize((0.1307,), (0.3081,))
 37 |                        ])),
 38 |         batch_size=args.batch_size, shuffle=True, **kwargs)
 39 |     test_loader = torch.utils.data.DataLoader(
 40 |         datasets.MNIST('./data', train=False, transform=transforms.Compose([
 41 |             transforms.ToTensor(),
 42 |             transforms.Normalize((0.1307,), (0.3081,))
 43 |         ])),
 44 |         batch_size=args.test_batch_size, shuffle=True, **kwargs)
 45 |     print(kwargs)
 46 |     return train_loader, test_loader
 47 | 
 48 | 
 49 | def train_model(args):
 50 |     model = model_fn()
 51 |     if args.cuda:
 52 |         model = model.cuda()
 53 |     train_loader, validate_loader = mnist_data_loaders(args)
 54 | 
 55 |     optimizer = torch.optim.SGD(
 56 |         model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)
 57 |     criterion = torch.nn.CrossEntropyLoss()
 58 | 
 59 |     engine = Engine()
 60 |     meter_loss = tnt.meter.AverageValueMeter()
 61 |     classerr = tnt.meter.ClassErrorMeter()
 62 |     confusion_meter = tnt.meter.ConfusionMeter(10, normalized=True)
 63 | 
 64 |     port = 8097
 65 |     train_loss_logger = VisdomPlotLogger(
 66 |         'line', port=port, opts={'title': 'Train Loss'})
 67 |     train_err_logger = VisdomPlotLogger(
 68 |         'line', port=port, opts={'title': 'Train Class Error'})
 69 |     test_loss_logger = VisdomPlotLogger(
 70 |         'line', port=port, opts={'title': 'Test Loss'})
 71 |     test_err_logger = VisdomPlotLogger(
 72 |         'line', port=port, opts={'title': 'Test Class Error'})
 73 |     confusion_logger = VisdomLogger('heatmap', port=port, opts={'title': 'Confusion matrix',
 74 |                                                                 'columnnames': list(range(10)),
 75 |                                                                 'rownames': list(range(10))})
 76 | 
 77 |     def train_fn(sample):
 78 |         inputs = sample[0]
 79 |         targets = sample[1]
 80 |         if args.cuda:
 81 |             inputs = inputs.cuda()
 82 |             targets = targets.cuda()
 83 |         inputs = Variable(inputs.float() / 255.0)
 84 |         targets = Variable(targets)
 85 |         output = model(inputs)
 86 |         loss = criterion(output, targets)
 87 |         return loss, output
 88 | 
 89 |     def reset_meters():
 90 |         classerr.reset()
 91 |         meter_loss.reset()
 92 |         confusion_meter.reset()
 93 | 
 94 |     def on_sample(state):
 95 |         state['sample'].append(state['train'])
 96 | 
 97 |     def on_forward(state):
 98 |         classerr.add(state['output'].data,
 99 |                      torch.LongTensor(state['sample'][1]))
100 |         meter_loss.add(state['loss'].data[0])
101 |         confusion_meter.add(state['output'].data,
102 |                             torch.LongTensor(state['sample'][1]))
103 | 
104 |     def on_start_epoch(state):
105 |         reset_meters()
106 |         state['iterator'] = tqdm(state['iterator'])
107 | 
108 |     def on_end_epoch(state):
109 |         print('Training loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0]))
110 |         train_loss_logger.log(state['epoch'], meter_loss.value()[0])
111 |         train_err_logger.log(state['epoch'], classerr.value()[0])
112 | 
113 |         # do validation at the end of each epoch
114 |         reset_meters()
115 |         engine.test(train_fn, validate_loader)
116 |         test_loss_logger.log(state['epoch'], meter_loss.value()[0])
117 |         test_err_logger.log(state['epoch'], classerr.value()[0])
118 |         confusion_logger.log(confusion_meter.value())
119 |         print('Testing loss: %.4f, accuracy: %.2f%%' % (meter_loss.value()[0], classerr.value()[0]))
120 | 
121 |     engine.hooks['on_sample'] = on_sample
122 |     engine.hooks['on_forward'] = on_forward
123 |     engine.hooks['on_start_epoch'] = on_start_epoch
124 |     engine.hooks['on_end_epoch'] = on_end_epoch
125 |     engine.train(train_fn, train_loader, maxepoch=args.epochs, optimizer=optimizer)
126 | 
127 | 
128 | def main(argv):
129 |     # Training settings
130 |     parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
131 |     parser.add_argument('--batch-size', type=int, default=64, metavar='N',
132 |                         help='input batch size for training (default: 64)')
133 |     parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
134 |                         help='input batch size for testing (default: 1000)')
135 |     parser.add_argument('--epochs', type=int, default=20, metavar='N',
136 |                         help='number of epochs to train (default: 20)')
137 |     parser.add_argument('--no-cuda', action='store_true', default=False,
138 |                         help='disables CUDA training')
139 |     args = parser.parse_args()
140 |     args.cuda = not args.no_cuda and torch.cuda.is_available()
141 |     train_model(args)
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     main(sys.argv[1:])
146 | 


--------------------------------------------------------------------------------
/visualization/tensorboard-cmd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/visualization/tensorboard-cmd.png


--------------------------------------------------------------------------------
/visualization/tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/visualization/tensorboard.png


--------------------------------------------------------------------------------
/visualization/visdom-cmd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/visualization/visdom-cmd.png


--------------------------------------------------------------------------------
/visualization/visdom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmudeeplearning11785/Spring2018-tutorials/44a0f9805e1b6891682198c584d66e0577865c6c/visualization/visdom.png


--------------------------------------------------------------------------------