├── vae
    ├── results
    │   └── .gitignore
    ├── requirements.txt
    ├── README.md
    └── main.py
├── fast_neural_style
    ├── neural_style
    │   ├── __init__.py
    │   ├── utils.py
    │   ├── vgg.py
    │   ├── transformer_net.py
    │   └── neural_style.py
    ├── images
    │   ├── style-images
    │   │   ├── candy.jpg
    │   │   ├── mosaic.jpg
    │   │   ├── udnie.jpg
    │   │   ├── rain-princess.jpg
    │   │   └── rain-princess-cropped.jpg
    │   ├── content-images
    │   │   └── amber.jpg
    │   └── output-images
    │   │   ├── amber-candy.jpg
    │   │   ├── amber-mosaic.jpg
    │   │   ├── amber-udnie.jpg
    │   │   └── amber-rain-princess.jpg
    ├── download_saved_models.py
    └── README.md
├── mnist
    ├── requirements.txt
    ├── README.md
    └── main.py
├── snli
    ├── requirements.txt
    ├── util.py
    ├── model.py
    └── train.py
├── word_language_model
    ├── requirements.txt
    ├── data
    │   └── wikitext-2
    │   │   └── README
    ├── data.py
    ├── model.py
    ├── generate.py
    ├── README.md
    ├── main.py
    └── notebooks
    │   └── 02_Inference.ipynb
├── imagenet
    ├── requirements.txt
    ├── README.md
    └── main.py
├── dcgan
    ├── requirements.txt
    ├── README.md
    └── main.py
├── mnist_hogwild
    ├── requirements.txt
    ├── train.py
    └── main.py
├── .gitignore
├── reinforcement_learning
    ├── requirements.txt
    ├── README.md
    ├── reinforce.py
    └── actor_critic.py
├── regression
    ├── README.md
    └── main.py
├── time_sequence_prediction
    ├── generate_sine_wave.py
    ├── README.md
    └── train.py
├── README.md
├── super_resolution
    ├── model.py
    ├── dataset.py
    ├── super_resolve.py
    ├── README.md
    ├── data.py
    └── main.py
└── LICENSE


/vae/results/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
2 | 


--------------------------------------------------------------------------------
/fast_neural_style/neural_style/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mnist/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | 


--------------------------------------------------------------------------------
/snli/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchtext
3 | 


--------------------------------------------------------------------------------
/word_language_model/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | 


--------------------------------------------------------------------------------
/imagenet/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | 


--------------------------------------------------------------------------------
/dcgan/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | lmdb
4 | 


--------------------------------------------------------------------------------
/mnist_hogwild/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dcgan/data
2 | data
3 | *.pyc
4 | OpenNMT/data
5 | 


--------------------------------------------------------------------------------
/vae/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | tqdm
4 | six
5 | 


--------------------------------------------------------------------------------
/reinforcement_learning/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | numpy
3 | gym
4 | 


--------------------------------------------------------------------------------
/regression/README.md:
--------------------------------------------------------------------------------
1 | # Linear regression example
2 | 
3 | Trains a single fully-connected layer to fit a 4th degree polynomial.
4 | 


--------------------------------------------------------------------------------
/fast_neural_style/images/style-images/candy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/style-images/candy.jpg


--------------------------------------------------------------------------------
/fast_neural_style/images/style-images/mosaic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/style-images/mosaic.jpg


--------------------------------------------------------------------------------
/fast_neural_style/images/style-images/udnie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/style-images/udnie.jpg


--------------------------------------------------------------------------------
/fast_neural_style/images/content-images/amber.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/content-images/amber.jpg


--------------------------------------------------------------------------------
/fast_neural_style/images/output-images/amber-candy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/output-images/amber-candy.jpg


--------------------------------------------------------------------------------
/fast_neural_style/images/output-images/amber-mosaic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/output-images/amber-mosaic.jpg


--------------------------------------------------------------------------------
/fast_neural_style/images/output-images/amber-udnie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/output-images/amber-udnie.jpg


--------------------------------------------------------------------------------
/fast_neural_style/images/style-images/rain-princess.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/style-images/rain-princess.jpg


--------------------------------------------------------------------------------
/fast_neural_style/images/output-images/amber-rain-princess.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/output-images/amber-rain-princess.jpg


--------------------------------------------------------------------------------
/fast_neural_style/images/style-images/rain-princess-cropped.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/style-images/rain-princess-cropped.jpg


--------------------------------------------------------------------------------
/mnist/README.md:
--------------------------------------------------------------------------------
1 | # Basic MNIST Example
2 | 
3 | ```bash
4 | pip install -r requirements.txt
5 | python main.py
6 | # CUDA_VISIBLE_DEVICES=2 python main.py  # to specify GPU id to ex. 2
7 | ```
8 | 


--------------------------------------------------------------------------------
/word_language_model/data/wikitext-2/README:
--------------------------------------------------------------------------------
1 | This is raw data from the wikitext-2 dataset.
2 | 
3 | See https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/
4 | 


--------------------------------------------------------------------------------
/reinforcement_learning/README.md:
--------------------------------------------------------------------------------
 1 | # Reinforcement learning training example
 2 | 
 3 | ```bash
 4 | pip install -r requirements.txt
 5 | # For REINFORCE:
 6 | python reinforce.py
 7 | # For actor critic:
 8 | python actor_critic.py
 9 | ```
10 | 


--------------------------------------------------------------------------------
/time_sequence_prediction/generate_sine_wave.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | np.random.seed(2)
 5 | 
 6 | T = 20
 7 | L = 1000
 8 | N = 100
 9 | 
10 | x = np.empty((N, L), 'int64')
11 | x[:] = np.array(range(L)) + np.random.randint(-4 * T, 4 * T, N).reshape(N, 1)
12 | data = np.sin(x / 1.0 / T).astype('float64')
13 | torch.save(data, open('traindata.pt', 'wb'))
14 | 


--------------------------------------------------------------------------------
/vae/README.md:
--------------------------------------------------------------------------------
 1 | # Basic VAE Example
 2 | 
 3 | This is an improved implementation of the paper [Stochastic Gradient VB and the
 4 | Variational Auto-Encoder](http://arxiv.org/abs/1312.6114) by Kingma and Welling.
 5 | It uses ReLUs and the adam optimizer, instead of sigmoids and adagrad. These changes make the network converge much faster.
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | python main.py
10 | ```
11 | 


--------------------------------------------------------------------------------
/fast_neural_style/download_saved_models.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import zipfile
 3 | 
 4 | from torch.utils.model_zoo import _download_url_to_file
 5 | 
 6 | 
 7 | def unzip(source_filename, dest_dir):
 8 |     with zipfile.ZipFile(source_filename) as zf:
 9 |         zf.extractall(path=dest_dir)
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     _download_url_to_file('https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=1', 'saved_models.zip', None, True)
14 |     unzip('saved_models.zip', '.')
15 | 


--------------------------------------------------------------------------------
/time_sequence_prediction/README.md:
--------------------------------------------------------------------------------
 1 | # Time Sequence Prediction
 2 | This is a toy example for beginners to start with. It is helpful for learning both pytorch and time sequence prediction. Two LSTMCell units are used in this example to learn some sine wave signals starting at different phases. After learning the sine waves, the network tries to predict the signal values in the future. The results is shown in the picture below.
 3 | 
 4 | ## Usage
 5 | 
 6 | ```
 7 | python generate_sine_wave.py
 8 | python train.py
 9 | ```
10 | 
11 | ## Result
12 | The initial signal and the predicted results are shown in the image. We first give some initial signals (full line). The network will  subsequently give some predicted results (dash line). It can be concluded that the network can generate new sine waves.
13 | ![image](https://cloud.githubusercontent.com/assets/1419566/24184438/e24f5280-0f08-11e7-8f8b-4d972b527a81.png)
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch Examples
 2 | 
 3 | A repository showcasing examples of using [PyTorch](https://github.com/pytorch/pytorch)
 4 | 
 5 | - MNIST Convnets
 6 | - Word level Language Modeling using LSTM RNNs
 7 | - Training Imagenet Classifiers with Residual Networks
 8 | - Generative Adversarial Networks (DCGAN)
 9 | - Variational Auto-Encoders
10 | - Superresolution using an efficient sub-pixel convolutional neural network
11 | - Hogwild training of shared ConvNets across multiple processes on MNIST
12 | - Training a CartPole to balance in OpenAI Gym with actor-critic
13 | - Natural Language Inference (SNLI) with GloVe vectors, LSTMs, and torchtext
14 | - Time sequence prediction - create an LSTM to learn Sine waves
15 | 
16 | Additionally, a list of good examples hosted in their own repositories:
17 | 
18 | - [Neural Machine Translation using sequence-to-sequence RNN with attention (OpenNMT)](https://github.com/OpenNMT/OpenNMT-py)
19 | 


--------------------------------------------------------------------------------
/fast_neural_style/neural_style/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from PIL import Image
 3 | 
 4 | 
 5 | def load_image(filename, size=None, scale=None):
 6 |     img = Image.open(filename)
 7 |     if size is not None:
 8 |         img = img.resize((size, size), Image.ANTIALIAS)
 9 |     elif scale is not None:
10 |         img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS)
11 |     return img
12 | 
13 | 
14 | def save_image(filename, data):
15 |     img = data.clone().clamp(0, 255).numpy()
16 |     img = img.transpose(1, 2, 0).astype("uint8")
17 |     img = Image.fromarray(img)
18 |     img.save(filename)
19 | 
20 | 
21 | def gram_matrix(y):
22 |     (b, ch, h, w) = y.size()
23 |     features = y.view(b, ch, w * h)
24 |     features_t = features.transpose(1, 2)
25 |     gram = features.bmm(features_t) / (ch * h * w)
26 |     return gram
27 | 
28 | 
29 | def normalize_batch(batch):
30 |     # normalize using imagenet mean and std
31 |     mean = batch.new_tensor([0.485, 0.456, 0.406]).view(-1, 1, 1)
32 |     std = batch.new_tensor([0.229, 0.224, 0.225]).view(-1, 1, 1)
33 |     batch = batch.div_(255.0)
34 |     return (batch - mean) / std
35 | 


--------------------------------------------------------------------------------
/super_resolution/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.init as init
 4 | 
 5 | 
 6 | class Net(nn.Module):
 7 |     def __init__(self, upscale_factor):
 8 |         super(Net, self).__init__()
 9 | 
10 |         self.relu = nn.ReLU()
11 |         self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2))
12 |         self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1))
13 |         self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1))
14 |         self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1))
15 |         self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
16 | 
17 |         self._initialize_weights()
18 | 
19 |     def forward(self, x):
20 |         x = self.relu(self.conv1(x))
21 |         x = self.relu(self.conv2(x))
22 |         x = self.relu(self.conv3(x))
23 |         x = self.pixel_shuffle(self.conv4(x))
24 |         return x
25 | 
26 |     def _initialize_weights(self):
27 |         init.orthogonal_(self.conv1.weight, init.calculate_gain('relu'))
28 |         init.orthogonal_(self.conv2.weight, init.calculate_gain('relu'))
29 |         init.orthogonal_(self.conv3.weight, init.calculate_gain('relu'))
30 |         init.orthogonal_(self.conv4.weight)
31 | 


--------------------------------------------------------------------------------
/super_resolution/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | 
 3 | from os import listdir
 4 | from os.path import join
 5 | from PIL import Image
 6 | 
 7 | 
 8 | def is_image_file(filename):
 9 |     return any(filename.endswith(extension) for extension in [".png", ".jpg", ".jpeg"])
10 | 
11 | 
12 | def load_img(filepath):
13 |     img = Image.open(filepath).convert('YCbCr')
14 |     y, _, _ = img.split()
15 |     return y
16 | 
17 | 
18 | class DatasetFromFolder(data.Dataset):
19 |     def __init__(self, image_dir, input_transform=None, target_transform=None):
20 |         super(DatasetFromFolder, self).__init__()
21 |         self.image_filenames = [join(image_dir, x) for x in listdir(image_dir) if is_image_file(x)]
22 | 
23 |         self.input_transform = input_transform
24 |         self.target_transform = target_transform
25 | 
26 |     def __getitem__(self, index):
27 |         input = load_img(self.image_filenames[index])
28 |         target = input.copy()
29 |         if self.input_transform:
30 |             input = self.input_transform(input)
31 |         if self.target_transform:
32 |             target = self.target_transform(target)
33 | 
34 |         return input, target
35 | 
36 |     def __len__(self):
37 |         return len(self.image_filenames)
38 | 


--------------------------------------------------------------------------------
/fast_neural_style/neural_style/vgg.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | import torch
 4 | from torchvision import models
 5 | 
 6 | 
 7 | class Vgg16(torch.nn.Module):
 8 |     def __init__(self, requires_grad=False):
 9 |         super(Vgg16, self).__init__()
10 |         vgg_pretrained_features = models.vgg16(pretrained=True).features
11 |         self.slice1 = torch.nn.Sequential()
12 |         self.slice2 = torch.nn.Sequential()
13 |         self.slice3 = torch.nn.Sequential()
14 |         self.slice4 = torch.nn.Sequential()
15 |         for x in range(4):
16 |             self.slice1.add_module(str(x), vgg_pretrained_features[x])
17 |         for x in range(4, 9):
18 |             self.slice2.add_module(str(x), vgg_pretrained_features[x])
19 |         for x in range(9, 16):
20 |             self.slice3.add_module(str(x), vgg_pretrained_features[x])
21 |         for x in range(16, 23):
22 |             self.slice4.add_module(str(x), vgg_pretrained_features[x])
23 |         if not requires_grad:
24 |             for param in self.parameters():
25 |                 param.requires_grad = False
26 | 
27 |     def forward(self, X):
28 |         h = self.slice1(X)
29 |         h_relu1_2 = h
30 |         h = self.slice2(h)
31 |         h_relu2_2 = h
32 |         h = self.slice3(h)
33 |         h_relu3_3 = h
34 |         h = self.slice4(h)
35 |         h_relu4_3 = h
36 |         vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'])
37 |         out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3)
38 |         return out
39 | 


--------------------------------------------------------------------------------
/super_resolution/super_resolve.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import argparse
 3 | import torch
 4 | from torch.autograd import Variable
 5 | from PIL import Image
 6 | from torchvision.transforms import ToTensor
 7 | 
 8 | import numpy as np
 9 | 
10 | # Training settings
11 | parser = argparse.ArgumentParser(description='PyTorch Super Res Example')
12 | parser.add_argument('--input_image', type=str, required=True, help='input image to use')
13 | parser.add_argument('--model', type=str, required=True, help='model file to use')
14 | parser.add_argument('--output_filename', type=str, help='where to save the output image')
15 | parser.add_argument('--cuda', action='store_true', help='use cuda')
16 | opt = parser.parse_args()
17 | 
18 | print(opt)
19 | img = Image.open(opt.input_image).convert('YCbCr')
20 | y, cb, cr = img.split()
21 | 
22 | model = torch.load(opt.model)
23 | img_to_tensor = ToTensor()
24 | input = img_to_tensor(y).view(1, -1, y.size[1], y.size[0])
25 | 
26 | if opt.cuda:
27 |     model = model.cuda()
28 |     input = input.cuda()
29 | 
30 | out = model(input)
31 | out = out.cpu()
32 | out_img_y = out[0].detach().numpy()
33 | out_img_y *= 255.0
34 | out_img_y = out_img_y.clip(0, 255)
35 | out_img_y = Image.fromarray(np.uint8(out_img_y[0]), mode='L')
36 | 
37 | out_img_cb = cb.resize(out_img_y.size, Image.BICUBIC)
38 | out_img_cr = cr.resize(out_img_y.size, Image.BICUBIC)
39 | out_img = Image.merge('YCbCr', [out_img_y, out_img_cb, out_img_cr]).convert('RGB')
40 | 
41 | out_img.save(opt.output_filename)
42 | print('output image saved to ', opt.output_filename)
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, 
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/word_language_model/data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | 
 4 | class Dictionary(object):
 5 |     def __init__(self):
 6 |         self.word2idx = {}
 7 |         self.idx2word = []
 8 | 
 9 |     def add_word(self, word):
10 |         if word not in self.word2idx:
11 |             self.idx2word.append(word)
12 |             self.word2idx[word] = len(self.idx2word) - 1
13 |         return self.word2idx[word]
14 | 
15 |     def __len__(self):
16 |         return len(self.idx2word)
17 | 
18 | 
19 | class Corpus(object):
20 |     def __init__(self, path):
21 |         self.dictionary = Dictionary()
22 |         self.train = self.tokenize(os.path.join(path, 'train.txt'))
23 |         self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
24 |         self.test = self.tokenize(os.path.join(path, 'test.txt'))
25 | 
26 |     def tokenize(self, path):
27 |         """Tokenizes a text file."""
28 |         assert os.path.exists(path)
29 |         # Add words to the dictionary
30 |         with open(path, 'r', encoding="utf8") as f:
31 |             tokens = 0
32 |             for line in f:
33 |                 if len(line.strip()) == 0:
34 |                     continue
35 |                 words = line.strip().split() + ['<eos>']
36 |                 tokens += len(words)
37 |                 for word in words:
38 |                     self.dictionary.add_word(word)
39 | 
40 |         # Tokenize file content
41 |         with open(path, 'r', encoding="utf8") as f:
42 |             ids = torch.LongTensor(tokens)
43 |             token = 0
44 |             for line in f:
45 |                 if len(line.strip()) == 0:
46 |                     continue
47 |                 words = line.strip().split() + ['<eos>']
48 |                 for word in words:
49 |                     ids[token] = self.dictionary.word2idx[word]
50 |                     token += 1
51 | 
52 |         return ids
53 | 


--------------------------------------------------------------------------------
/super_resolution/README.md:
--------------------------------------------------------------------------------
 1 | # Superresolution using an efficient sub-pixel convolutional neural network
 2 | 
 3 | This example illustrates how to use the efficient sub-pixel convolution layer described in  ["Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network" - Shi et al.](https://arxiv.org/abs/1609.05158) for increasing spatial resolution within your network for tasks such as superresolution.
 4 | 
 5 | ```
 6 | usage: main.py [-h] --upscale_factor UPSCALE_FACTOR [--batchSize BATCHSIZE]
 7 |                [--testBatchSize TESTBATCHSIZE] [--nEpochs NEPOCHS] [--lr LR]
 8 |                [--cuda] [--threads THREADS] [--seed SEED]
 9 | 
10 | PyTorch Super Res Example
11 | 
12 | optional arguments:
13 |   -h, --help            show this help message and exit
14 |   --upscale_factor      super resolution upscale factor
15 |   --batchSize           training batch size
16 |   --testBatchSize       testing batch size
17 |   --nEpochs             number of epochs to train for
18 |   --lr                  Learning Rate. Default=0.01
19 |   --cuda                use cuda
20 |   --threads             number of threads for data loader to use Default=4
21 |   --seed                random seed to use. Default=123
22 | ```
23 | This example trains a super-resolution network on the [BSD300 dataset](https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/), using crops from the 200 training images, and evaluating on crops of the 100 test images. A snapshot of the model after every epoch with filename model_epoch_<epoch_number>.pth
24 | 
25 | ## Example Usage:
26 | 
27 | ### Train
28 | 
29 | `python main.py --upscale_factor 3 --batchSize 4 --testBatchSize 100 --nEpochs 30 --lr 0.001`
30 | 
31 | ### Super Resolve
32 | `python super_resolve.py --input_image dataset/BSDS300/images/test/16077.jpg --model model_epoch_500.pth --output_filename out.png`
33 | 


--------------------------------------------------------------------------------
/regression/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | from itertools import count
 4 | 
 5 | import torch
 6 | import torch.autograd
 7 | import torch.nn.functional as F
 8 | 
 9 | POLY_DEGREE = 4
10 | W_target = torch.randn(POLY_DEGREE, 1) * 5
11 | b_target = torch.randn(1) * 5
12 | 
13 | 
14 | def make_features(x):
15 |     """Builds features i.e. a matrix with columns [x, x^2, x^3, x^4]."""
16 |     x = x.unsqueeze(1)
17 |     return torch.cat([x ** i for i in range(1, POLY_DEGREE+1)], 1)
18 | 
19 | 
20 | def f(x):
21 |     """Approximated function."""
22 |     return x.mm(W_target) + b_target.item()
23 | 
24 | 
25 | def poly_desc(W, b):
26 |     """Creates a string description of a polynomial."""
27 |     result = 'y = '
28 |     for i, w in enumerate(W):
29 |         result += '{:+.2f} x^{} '.format(w, len(W) - i)
30 |     result += '{:+.2f}'.format(b[0])
31 |     return result
32 | 
33 | 
34 | def get_batch(batch_size=32):
35 |     """Builds a batch i.e. (x, f(x)) pair."""
36 |     random = torch.randn(batch_size)
37 |     x = make_features(random)
38 |     y = f(x)
39 |     return x, y
40 | 
41 | 
42 | # Define model
43 | fc = torch.nn.Linear(W_target.size(0), 1)
44 | 
45 | for batch_idx in count(1):
46 |     # Get data
47 |     batch_x, batch_y = get_batch()
48 | 
49 |     # Reset gradients
50 |     fc.zero_grad()
51 | 
52 |     # Forward pass
53 |     output = F.smooth_l1_loss(fc(batch_x), batch_y)
54 |     loss = output.item()
55 | 
56 |     # Backward pass
57 |     output.backward()
58 | 
59 |     # Apply gradients
60 |     for param in fc.parameters():
61 |         param.data.add_(-0.1 * param.grad.data)
62 | 
63 |     # Stop criterion
64 |     if loss < 1e-3:
65 |         break
66 | 
67 | print('Loss: {:.6f} after {} batches'.format(loss, batch_idx))
68 | print('==> Learned function:\t' + poly_desc(fc.weight.view(-1), fc.bias))
69 | print('==> Actual function:\t' + poly_desc(W_target.view(-1), b_target))
70 | 


--------------------------------------------------------------------------------
/dcgan/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Convolution Generative Adversarial Networks
 2 | 
 3 | This example implements the paper [Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks](http://arxiv.org/abs/1511.06434)
 4 | 
 5 | The implementation is very close to the Torch implementation [dcgan.torch](https://github.com/soumith/dcgan.torch)
 6 | 
 7 | After every 100 training iterations, the files `real_samples.png` and `fake_samples.png` are written to disk
 8 | with the samples from the generative model.
 9 | 
10 | After every epoch, models are saved to: `netG_epoch_%d.pth` and `netD_epoch_%d.pth`
11 | 
12 | ## Downloading the dataset
13 | You can download the LSUN dataset by cloning [this repo](https://github.com/fyu/lsun) and running
14 | ```
15 | python download.py -c bedroom
16 | ```
17 | 
18 | ## Usage
19 | ```
20 | usage: main.py [-h] --dataset DATASET --dataroot DATAROOT [--workers WORKERS]
21 |                [--batchSize BATCHSIZE] [--imageSize IMAGESIZE] [--nz NZ]
22 |                [--ngf NGF] [--ndf NDF] [--niter NITER] [--lr LR]
23 |                [--beta1 BETA1] [--cuda] [--ngpu NGPU] [--netG NETG]
24 |                [--netD NETD]
25 | 
26 | optional arguments:
27 |   -h, --help            show this help message and exit
28 |   --dataset DATASET     cifar10 | lsun | imagenet | folder | lfw
29 |   --dataroot DATAROOT   path to dataset
30 |   --workers WORKERS     number of data loading workers
31 |   --batchSize BATCHSIZE
32 |                         input batch size
33 |   --imageSize IMAGESIZE
34 |                         the height / width of the input image to network
35 |   --nz NZ               size of the latent z vector
36 |   --ngf NGF
37 |   --ndf NDF
38 |   --niter NITER         number of epochs to train for
39 |   --lr LR               learning rate, default=0.0002
40 |   --beta1 BETA1         beta1 for adam. default=0.5
41 |   --cuda                enables cuda
42 |   --ngpu NGPU           number of GPUs to use
43 |   --netG NETG           path to netG (to continue training)
44 |   --netD NETD           path to netD (to continue training)
45 | ```
46 | 


--------------------------------------------------------------------------------
/snli/util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from argparse import ArgumentParser
 3 | 
 4 | def makedirs(name):
 5 |     """helper function for python 2 and 3 to call os.makedirs()
 6 |        avoiding an error if the directory to be created already exists"""
 7 | 
 8 |     import os, errno
 9 | 
10 |     try:
11 |         os.makedirs(name)
12 |     except OSError as ex:
13 |         if ex.errno == errno.EEXIST and os.path.isdir(name):
14 |             # ignore existing directory
15 |             pass
16 |         else:
17 |             # a different error happened
18 |             raise
19 | 
20 | 
21 | def get_args():
22 |     parser = ArgumentParser(description='PyTorch/torchtext SNLI example')
23 |     parser.add_argument('--epochs', type=int, default=50)
24 |     parser.add_argument('--batch_size', type=int, default=128)
25 |     parser.add_argument('--d_embed', type=int, default=100)
26 |     parser.add_argument('--d_proj', type=int, default=300)
27 |     parser.add_argument('--d_hidden', type=int, default=300)
28 |     parser.add_argument('--n_layers', type=int, default=1)
29 |     parser.add_argument('--log_every', type=int, default=50)
30 |     parser.add_argument('--lr', type=float, default=.001)
31 |     parser.add_argument('--dev_every', type=int, default=1000)
32 |     parser.add_argument('--save_every', type=int, default=1000)
33 |     parser.add_argument('--dp_ratio', type=int, default=0.2)
34 |     parser.add_argument('--no-bidirectional', action='store_false', dest='birnn')
35 |     parser.add_argument('--preserve-case', action='store_false', dest='lower')
36 |     parser.add_argument('--no-projection', action='store_false', dest='projection')
37 |     parser.add_argument('--train_embed', action='store_false', dest='fix_emb')
38 |     parser.add_argument('--gpu', type=int, default=0)
39 |     parser.add_argument('--save_path', type=str, default='results')
40 |     parser.add_argument('--vector_cache', type=str, default=os.path.join(os.getcwd(), '.vector_cache/input_vectors.pt'))
41 |     parser.add_argument('--word_vectors', type=str, default='glove.6B.100d')
42 |     parser.add_argument('--resume_snapshot', type=str, default='')
43 |     args = parser.parse_args()
44 |     return args
45 | 


--------------------------------------------------------------------------------
/super_resolution/data.py:
--------------------------------------------------------------------------------
 1 | from os.path import exists, join, basename
 2 | from os import makedirs, remove
 3 | from six.moves import urllib
 4 | import tarfile
 5 | from torchvision.transforms import Compose, CenterCrop, ToTensor, Resize
 6 | 
 7 | from dataset import DatasetFromFolder
 8 | 
 9 | 
10 | def download_bsd300(dest="dataset"):
11 |     output_image_dir = join(dest, "BSDS300/images")
12 | 
13 |     if not exists(output_image_dir):
14 |         makedirs(dest)
15 |         url = "http://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/BSDS300-images.tgz"
16 |         print("downloading url ", url)
17 | 
18 |         data = urllib.request.urlopen(url)
19 | 
20 |         file_path = join(dest, basename(url))
21 |         with open(file_path, 'wb') as f:
22 |             f.write(data.read())
23 | 
24 |         print("Extracting data")
25 |         with tarfile.open(file_path) as tar:
26 |             for item in tar:
27 |                 tar.extract(item, dest)
28 | 
29 |         remove(file_path)
30 | 
31 |     return output_image_dir
32 | 
33 | 
34 | def calculate_valid_crop_size(crop_size, upscale_factor):
35 |     return crop_size - (crop_size % upscale_factor)
36 | 
37 | 
38 | def input_transform(crop_size, upscale_factor):
39 |     return Compose([
40 |         CenterCrop(crop_size),
41 |         Resize(crop_size // upscale_factor),
42 |         ToTensor(),
43 |     ])
44 | 
45 | 
46 | def target_transform(crop_size):
47 |     return Compose([
48 |         CenterCrop(crop_size),
49 |         ToTensor(),
50 |     ])
51 | 
52 | 
53 | def get_training_set(upscale_factor):
54 |     root_dir = download_bsd300()
55 |     train_dir = join(root_dir, "train")
56 |     crop_size = calculate_valid_crop_size(256, upscale_factor)
57 | 
58 |     return DatasetFromFolder(train_dir,
59 |                              input_transform=input_transform(crop_size, upscale_factor),
60 |                              target_transform=target_transform(crop_size))
61 | 
62 | 
63 | def get_test_set(upscale_factor):
64 |     root_dir = download_bsd300()
65 |     test_dir = join(root_dir, "test")
66 |     crop_size = calculate_valid_crop_size(256, upscale_factor)
67 | 
68 |     return DatasetFromFolder(test_dir,
69 |                              input_transform=input_transform(crop_size, upscale_factor),
70 |                              target_transform=target_transform(crop_size))
71 | 


--------------------------------------------------------------------------------
/mnist_hogwild/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import torch.optim as optim
 4 | import torch.nn.functional as F
 5 | from torchvision import datasets, transforms
 6 | 
 7 | def train(rank, args, model):
 8 |     torch.manual_seed(args.seed + rank)
 9 | 
10 |     train_loader = torch.utils.data.DataLoader(
11 |         datasets.MNIST('../data', train=True, download=True,
12 |                     transform=transforms.Compose([
13 |                         transforms.ToTensor(),
14 |                         transforms.Normalize((0.1307,), (0.3081,))
15 |                     ])),
16 |         batch_size=args.batch_size, shuffle=True, num_workers=1)
17 |     test_loader = torch.utils.data.DataLoader(
18 |         datasets.MNIST('../data', train=False, transform=transforms.Compose([
19 |                         transforms.ToTensor(),
20 |                         transforms.Normalize((0.1307,), (0.3081,))
21 |                     ])),
22 |         batch_size=args.batch_size, shuffle=True, num_workers=1)
23 | 
24 |     optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
25 |     for epoch in range(1, args.epochs + 1):
26 |         train_epoch(epoch, args, model, train_loader, optimizer)
27 |         test_epoch(model, test_loader)
28 | 
29 | 
30 | def train_epoch(epoch, args, model, data_loader, optimizer):
31 |     model.train()
32 |     pid = os.getpid()
33 |     for batch_idx, (data, target) in enumerate(data_loader):
34 |         optimizer.zero_grad()
35 |         output = model(data)
36 |         loss = F.nll_loss(output, target)
37 |         loss.backward()
38 |         optimizer.step()
39 |         if batch_idx % args.log_interval == 0:
40 |             print('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
41 |                 pid, epoch, batch_idx * len(data), len(data_loader.dataset),
42 |                 100. * batch_idx / len(data_loader), loss.item()))
43 | 
44 | 
45 | def test_epoch(model, data_loader):
46 |     model.eval()
47 |     test_loss = 0
48 |     correct = 0
49 |     with torch.no_grad():
50 |         for data, target in data_loader:
51 |             output = model(data)
52 |             test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
53 |             pred = output.max(1)[1] # get the index of the max log-probability
54 |             correct += pred.eq(target).sum().item()
55 | 
56 |     test_loss /= len(data_loader.dataset)
57 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
58 |         test_loss, correct, len(data_loader.dataset),
59 |         100. * correct / len(data_loader.dataset)))
60 | 


--------------------------------------------------------------------------------
/imagenet/README.md:
--------------------------------------------------------------------------------
 1 | # ImageNet training in PyTorch
 2 | 
 3 | This implements training of popular model architectures, such as ResNet, AlexNet, and VGG on the ImageNet dataset.
 4 | 
 5 | ## Requirements
 6 | 
 7 | - Install PyTorch ([pytorch.org](http://pytorch.org))
 8 | - `pip install -r requirements.txt`
 9 | - Download the ImageNet dataset and move validation images to labeled subfolders
10 |     - To do this, you can use the following script: https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
11 | 
12 | ## Training
13 | 
14 | To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset:
15 | 
16 | ```bash
17 | python main.py -a resnet18 [imagenet-folder with train and val folders]
18 | ```
19 | 
20 | The default learning rate schedule starts at 0.1 and decays by a factor of 10 every 30 epochs. This is appropriate for ResNet and models with batch normalization, but too high for AlexNet and VGG. Use 0.01 as the initial learning rate for AlexNet or VGG:
21 | 
22 | ```bash
23 | python main.py -a alexnet --lr 0.01 [imagenet-folder with train and val folders]
24 | ```
25 | 
26 | ## Usage
27 | 
28 | ```
29 | usage: main.py [-h] [--arch ARCH] [-j N] [--epochs N] [--start-epoch N] [-b N]
30 |                [--lr LR] [--momentum M] [--weight-decay W] [--print-freq N]
31 |                [--resume PATH] [-e] [--pretrained]
32 |                DIR
33 | 
34 | PyTorch ImageNet Training
35 | 
36 | positional arguments:
37 |   DIR                   path to dataset
38 | 
39 | optional arguments:
40 |   -h, --help            show this help message and exit
41 |   --arch ARCH, -a ARCH  model architecture: alexnet | resnet | resnet101 |
42 |                         resnet152 | resnet18 | resnet34 | resnet50 | vgg |
43 |                         vgg11 | vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn
44 |                         | vgg19 | vgg19_bn (default: resnet18)
45 |   -j N, --workers N     number of data loading workers (default: 4)
46 |   --epochs N            number of total epochs to run
47 |   --start-epoch N       manual epoch number (useful on restarts)
48 |   -b N, --batch-size N  mini-batch size (default: 256)
49 |   --lr LR, --learning-rate LR
50 |                         initial learning rate
51 |   --momentum M          momentum
52 |   --weight-decay W, --wd W
53 |                         weight decay (default: 1e-4)
54 |   --print-freq N, -p N  print frequency (default: 10)
55 |   --resume PATH         path to latest checkpoint (default: none)
56 |   -e, --evaluate        evaluate model on validation set
57 |   --pretrained          use pre-trained model
58 | ```
59 | 


--------------------------------------------------------------------------------
/mnist_hogwild/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import argparse
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | import torch.multiprocessing as mp
 7 | 
 8 | from train import train
 9 | 
10 | # Training settings
11 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
12 | parser.add_argument('--batch-size', type=int, default=64, metavar='N',
13 |                     help='input batch size for training (default: 64)')
14 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
15 |                     help='input batch size for testing (default: 1000)')
16 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
17 |                     help='number of epochs to train (default: 10)')
18 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
19 |                     help='learning rate (default: 0.01)')
20 | parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
21 |                     help='SGD momentum (default: 0.5)')
22 | parser.add_argument('--seed', type=int, default=1, metavar='S',
23 |                     help='random seed (default: 1)')
24 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
25 |                     help='how many batches to wait before logging training status')
26 | parser.add_argument('--num-processes', type=int, default=2, metavar='N',
27 |                     help='how many training processes to use (default: 2)')
28 | 
29 | class Net(nn.Module):
30 |     def __init__(self):
31 |         super(Net, self).__init__()
32 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
33 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
34 |         self.conv2_drop = nn.Dropout2d()
35 |         self.fc1 = nn.Linear(320, 50)
36 |         self.fc2 = nn.Linear(50, 10)
37 | 
38 |     def forward(self, x):
39 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
40 |         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
41 |         x = x.view(-1, 320)
42 |         x = F.relu(self.fc1(x))
43 |         x = F.dropout(x, training=self.training)
44 |         x = self.fc2(x)
45 |         return F.log_softmax(x, dim=1)
46 | 
47 | if __name__ == '__main__':
48 |     args = parser.parse_args()
49 | 
50 |     torch.manual_seed(args.seed)
51 | 
52 |     model = Net()
53 |     model.share_memory() # gradients are allocated lazily, so they are not shared here
54 | 
55 |     processes = []
56 |     for rank in range(args.num_processes):
57 |         p = mp.Process(target=train, args=(rank, args, model))
58 |         p.start()
59 |         processes.append(p)
60 |     for p in processes:
61 |         p.join()
62 | 


--------------------------------------------------------------------------------
/word_language_model/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class RNNModel(nn.Module):
 4 |     """Container module with an encoder, a recurrent module, and a decoder."""
 5 | 
 6 |     def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False):
 7 |         super(RNNModel, self).__init__()
 8 |         self.drop = nn.Dropout(dropout)
 9 |         self.encoder = nn.Embedding(ntoken, ninp)
10 |         if rnn_type in ['LSTM', 'GRU']:
11 |             self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
12 |         else:
13 |             try:
14 |                 nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
15 |             except KeyError:
16 |                 raise ValueError( """An invalid option for `--model` was supplied,
17 |                                  options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
18 |             self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
19 |         self.decoder = nn.Linear(nhid, ntoken)
20 | 
21 |         # Optionally tie weights as in:
22 |         # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
23 |         # https://arxiv.org/abs/1608.05859
24 |         # and
25 |         # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
26 |         # https://arxiv.org/abs/1611.01462
27 |         if tie_weights:
28 |             if nhid != ninp:
29 |                 raise ValueError('When using the tied flag, nhid must be equal to emsize')
30 |             self.decoder.weight = self.encoder.weight
31 | 
32 |         self.init_weights()
33 | 
34 |         self.rnn_type = rnn_type
35 |         self.nhid = nhid
36 |         self.nlayers = nlayers
37 | 
38 |     def init_weights(self):
39 |         initrange = 0.1
40 |         self.encoder.weight.data.uniform_(-initrange, initrange)
41 |         self.decoder.bias.data.zero_()
42 |         self.decoder.weight.data.uniform_(-initrange, initrange)
43 | 
44 |     def forward(self, input, hidden):
45 |         emb = self.drop(self.encoder(input))
46 |         output, hidden = self.rnn(emb, hidden)
47 |         output = self.drop(output)
48 |         decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
49 |         return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
50 | 
51 |     def init_hidden(self, bsz):
52 |         weight = next(self.parameters())
53 |         if self.rnn_type == 'LSTM':
54 |             return (weight.new_zeros(self.nlayers, bsz, self.nhid),
55 |                     weight.new_zeros(self.nlayers, bsz, self.nhid))
56 |         else:
57 |             return weight.new_zeros(self.nlayers, bsz, self.nhid)
58 | 


--------------------------------------------------------------------------------
/word_language_model/generate.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Language Modeling on Penn Tree Bank
 3 | #
 4 | # This file generates new sentences sampled from the language model
 5 | #
 6 | ###############################################################################
 7 | 
 8 | import argparse
 9 | 
10 | import torch
11 | from torch.autograd import Variable
12 | 
13 | import data
14 | 
15 | parser = argparse.ArgumentParser(description='PyTorch Wikitext-2 Language Model')
16 | 
17 | # Model parameters.
18 | parser.add_argument('--data', type=str, default='./data/wikitext-2',
19 |                     help='location of the data corpus')
20 | parser.add_argument('--checkpoint', type=str, default='./model.pt',
21 |                     help='model checkpoint to use')
22 | parser.add_argument('--outf', type=str, default='generated.txt',
23 |                     help='output file for generated text')
24 | parser.add_argument('--words', type=int, default='1000',
25 |                     help='number of words to generate')
26 | parser.add_argument('--seed', type=int, default=1111,
27 |                     help='random seed')
28 | parser.add_argument('--cuda', action='store_true',
29 |                     help='use CUDA')
30 | parser.add_argument('--temperature', type=float, default=1.0,
31 |                     help='temperature - higher will increase diversity')
32 | parser.add_argument('--log-interval', type=int, default=100,
33 |                     help='reporting interval')
34 | args = parser.parse_args()
35 | 
36 | # Set the random seed manually for reproducibility.
37 | torch.manual_seed(args.seed)
38 | if torch.cuda.is_available():
39 |     if not args.cuda:
40 |         print("WARNING: You have a CUDA device, so you should probably run with --cuda")
41 | 
42 | device = torch.device("cuda" if args.cuda else "cpu")
43 | 
44 | if args.temperature < 1e-3:
45 |     parser.error("--temperature has to be greater or equal 1e-3")
46 | 
47 | with open(args.checkpoint, 'rb') as f:
48 |     model = torch.load(f).to(device)
49 | model.eval()
50 | 
51 | corpus = data.Corpus(args.data)
52 | ntokens = len(corpus.dictionary)
53 | hidden = model.init_hidden(1)
54 | input = torch.randint(ntokens, (1, 1), dtype=torch.long).to(device)
55 | 
56 | with open(args.outf, 'w') as outf:
57 |     with torch.no_grad():  # no tracking history
58 |         for i in range(args.words):
59 |             output, hidden = model(input, hidden)
60 |             word_weights = output.squeeze().div(args.temperature).exp().cpu()
61 |             word_idx = torch.multinomial(word_weights, 1)[0]
62 |             input.fill_(word_idx)
63 |             word = corpus.dictionary.idx2word[word_idx]
64 | 
65 |             outf.write(word + ('\n' if i % 20 == 19 else ' '))
66 | 
67 |             if i % args.log_interval == 0:
68 |                 print('| Generated {}/{} words'.format(i, args.words))
69 | 


--------------------------------------------------------------------------------
/snli/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Bottle(nn.Module):
 6 | 
 7 |     def forward(self, input):
 8 |         if len(input.size()) <= 2:
 9 |             return super(Bottle, self).forward(input)
10 |         size = input.size()[:2]
11 |         out = super(Bottle, self).forward(input.view(size[0]*size[1], -1))
12 |         return out.view(size[0], size[1], -1)
13 | 
14 | 
15 | class Linear(Bottle, nn.Linear):
16 |     pass
17 | 
18 | 
19 | class Encoder(nn.Module):
20 | 
21 |     def __init__(self, config):
22 |         super(Encoder, self).__init__()
23 |         self.config = config
24 |         input_size = config.d_proj if config.projection else config.d_embed
25 |         dropout = 0 if config.n_layers == 1 else config.dp_ratio
26 |         self.rnn = nn.LSTM(input_size=input_size, hidden_size=config.d_hidden,
27 |                         num_layers=config.n_layers, dropout=dropout,
28 |                         bidirectional=config.birnn)
29 | 
30 |     def forward(self, inputs):
31 |         batch_size = inputs.size()[1]
32 |         state_shape = self.config.n_cells, batch_size, self.config.d_hidden
33 |         h0 = c0 =  inputs.new_zeros(state_shape)
34 |         outputs, (ht, ct) = self.rnn(inputs, (h0, c0))
35 |         return ht[-1] if not self.config.birnn else ht[-2:].transpose(0, 1).contiguous().view(batch_size, -1)
36 | 
37 | 
38 | class SNLIClassifier(nn.Module):
39 | 
40 |     def __init__(self, config):
41 |         super(SNLIClassifier, self).__init__()
42 |         self.config = config
43 |         self.embed = nn.Embedding(config.n_embed, config.d_embed)
44 |         self.projection = Linear(config.d_embed, config.d_proj)
45 |         self.encoder = Encoder(config)
46 |         self.dropout = nn.Dropout(p=config.dp_ratio)
47 |         self.relu = nn.ReLU()
48 |         seq_in_size = 2*config.d_hidden
49 |         if self.config.birnn:
50 |             seq_in_size *= 2
51 |         lin_config = [seq_in_size]*2
52 |         self.out = nn.Sequential(
53 |             Linear(*lin_config),
54 |             self.relu,
55 |             self.dropout,
56 |             Linear(*lin_config),
57 |             self.relu,
58 |             self.dropout,
59 |             Linear(*lin_config),
60 |             self.relu,
61 |             self.dropout,
62 |             Linear(seq_in_size, config.d_out))
63 | 
64 |     def forward(self, batch):
65 |         prem_embed = self.embed(batch.premise)
66 |         hypo_embed = self.embed(batch.hypothesis)
67 |         if self.config.fix_emb:
68 |             prem_embed =prem_embed.detach()
69 |             hypo_embed =hypo_embed.detach()
70 |         if self.config.projection:
71 |             prem_embed = self.relu(self.projection(prem_embed))
72 |             hypo_embed = self.relu(self.projection(hypo_embed))
73 |         premise = self.encoder(prem_embed)
74 |         hypothesis = self.encoder(hypo_embed)
75 |         scores = self.out(torch.cat([premise, hypothesis], 1))
76 |         return scores
77 | 


--------------------------------------------------------------------------------
/word_language_model/README.md:
--------------------------------------------------------------------------------
 1 | # Word-level language modeling RNN
 2 | 
 3 | This example trains a multi-layer RNN (Elman, GRU, or LSTM) on a language modeling task.
 4 | By default, the training script uses the Wikitext-2 dataset, provided.
 5 | The trained model can then be used by the generate script to generate new text.
 6 | 
 7 | ```bash
 8 | python main.py --cuda --epochs 6        # Train a LSTM on Wikitext-2 with CUDA, reaching perplexity of 117.61
 9 | python main.py --cuda --epochs 6 --tied # Train a tied LSTM on Wikitext-2 with CUDA, reaching perplexity of 110.44
10 | python main.py --cuda --tied            # Train a tied LSTM on Wikitext-2 with CUDA for 40 epochs, reaching perplexity of 87.17
11 | python generate.py                      # Generate samples from the trained LSTM model.
12 | ```
13 | 
14 | The model uses the `nn.RNN` module (and its sister modules `nn.GRU` and `nn.LSTM`)
15 | which will automatically use the cuDNN backend if run on CUDA with cuDNN installed.
16 | 
17 | During training, if a keyboard interrupt (Ctrl-C) is received,
18 | training is stopped and the current model is evaluated against the test dataset.
19 | 
20 | The `main.py` script accepts the following arguments:
21 | 
22 | ```bash
23 | optional arguments:
24 |   -h, --help         show this help message and exit
25 |   --data DATA        location of the data corpus
26 |   --model MODEL      type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)
27 |   --emsize EMSIZE    size of word embeddings
28 |   --nhid NHID        number of hidden units per layer
29 |   --nlayers NLAYERS  number of layers
30 |   --lr LR            initial learning rate
31 |   --clip CLIP        gradient clipping
32 |   --epochs EPOCHS    upper epoch limit
33 |   --batch-size N     batch size
34 |   --bptt BPTT        sequence length
35 |   --dropout DROPOUT  dropout applied to layers (0 = no dropout)
36 |   --decay DECAY      learning rate decay per epoch
37 |   --tied             tie the word embedding and softmax weights
38 |   --seed SEED        random seed
39 |   --cuda             use CUDA
40 |   --log-interval N   report interval
41 |   --save SAVE        path to save the final model
42 | ```
43 | 
44 | With these arguments, a variety of models can be tested.
45 | As an example, the following arguments produce slower but better models:
46 | 
47 | ```bash
48 | python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40           # Test perplexity of 80.97
49 | python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 --tied    # Test perplexity of 75.96
50 | python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40        # Test perplexity of 77.42
51 | python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 --tied # Test perplexity of 72.30
52 | ```
53 | 
54 | Perplexities on PTB are equal or better than
55 | [Recurrent Neural Network Regularization (Zaremba et al. 2014)](https://arxiv.org/pdf/1409.2329.pdf)
56 | and are similar to [Using the Output Embedding to Improve Language Models (Press & Wolf 2016](https://arxiv.org/abs/1608.05859) and [Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling (Inan et al. 2016)](https://arxiv.org/pdf/1611.01462.pdf), though both of these papers have improved perplexities by using a form of recurrent dropout [(variational dropout)](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks).
57 | 


--------------------------------------------------------------------------------
/super_resolution/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import argparse
 3 | from math import log10
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.optim as optim
 8 | from torch.utils.data import DataLoader
 9 | from model import Net
10 | from data import get_training_set, get_test_set
11 | 
12 | # Training settings
13 | parser = argparse.ArgumentParser(description='PyTorch Super Res Example')
14 | parser.add_argument('--upscale_factor', type=int, required=True, help="super resolution upscale factor")
15 | parser.add_argument('--batchSize', type=int, default=64, help='training batch size')
16 | parser.add_argument('--testBatchSize', type=int, default=10, help='testing batch size')
17 | parser.add_argument('--nEpochs', type=int, default=2, help='number of epochs to train for')
18 | parser.add_argument('--lr', type=float, default=0.01, help='Learning Rate. Default=0.01')
19 | parser.add_argument('--cuda', action='store_true', help='use cuda?')
20 | parser.add_argument('--threads', type=int, default=4, help='number of threads for data loader to use')
21 | parser.add_argument('--seed', type=int, default=123, help='random seed to use. Default=123')
22 | opt = parser.parse_args()
23 | 
24 | print(opt)
25 | 
26 | if opt.cuda and not torch.cuda.is_available():
27 |     raise Exception("No GPU found, please run without --cuda")
28 | 
29 | torch.manual_seed(opt.seed)
30 | 
31 | device = torch.device("cuda" if opt.cuda else "cpu")
32 | 
33 | print('===> Loading datasets')
34 | train_set = get_training_set(opt.upscale_factor)
35 | test_set = get_test_set(opt.upscale_factor)
36 | training_data_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batchSize, shuffle=True)
37 | testing_data_loader = DataLoader(dataset=test_set, num_workers=opt.threads, batch_size=opt.testBatchSize, shuffle=False)
38 | 
39 | print('===> Building model')
40 | model = Net(upscale_factor=opt.upscale_factor).to(device)
41 | criterion = nn.MSELoss()
42 | 
43 | optimizer = optim.Adam(model.parameters(), lr=opt.lr)
44 | 
45 | 
46 | def train(epoch):
47 |     epoch_loss = 0
48 |     for iteration, batch in enumerate(training_data_loader, 1):
49 |         input, target = batch[0].to(device), batch[1].to(device)
50 | 
51 |         optimizer.zero_grad()
52 |         loss = criterion(model(input), target)
53 |         epoch_loss += loss.item()
54 |         loss.backward()
55 |         optimizer.step()
56 | 
57 |         print("===> Epoch[{}]({}/{}): Loss: {:.4f}".format(epoch, iteration, len(training_data_loader), loss.item()))
58 | 
59 |     print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, epoch_loss / len(training_data_loader)))
60 | 
61 | 
62 | def test():
63 |     avg_psnr = 0
64 |     with torch.no_grad():
65 |         for batch in testing_data_loader:
66 |             input, target = batch[0].to(device), batch[1].to(device)
67 | 
68 |             prediction = model(input)
69 |             mse = criterion(prediction, target)
70 |             psnr = 10 * log10(1 / mse.item())
71 |             avg_psnr += psnr
72 |     print("===> Avg. PSNR: {:.4f} dB".format(avg_psnr / len(testing_data_loader)))
73 | 
74 | 
75 | def checkpoint(epoch):
76 |     model_out_path = "model_epoch_{}.pth".format(epoch)
77 |     torch.save(model, model_out_path)
78 |     print("Checkpoint saved to {}".format(model_out_path))
79 | 
80 | for epoch in range(1, opt.nEpochs + 1):
81 |     train(epoch)
82 |     test()
83 |     checkpoint(epoch)
84 | 


--------------------------------------------------------------------------------
/time_sequence_prediction/train.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.optim as optim
 5 | import numpy as np
 6 | import matplotlib
 7 | matplotlib.use('Agg')
 8 | import matplotlib.pyplot as plt
 9 | 
10 | class Sequence(nn.Module):
11 |     def __init__(self):
12 |         super(Sequence, self).__init__()
13 |         self.lstm1 = nn.LSTMCell(1, 51)
14 |         self.lstm2 = nn.LSTMCell(51, 51)
15 |         self.linear = nn.Linear(51, 1)
16 | 
17 |     def forward(self, input, future = 0):
18 |         outputs = []
19 |         h_t = torch.zeros(input.size(0), 51, dtype=torch.double)
20 |         c_t = torch.zeros(input.size(0), 51, dtype=torch.double)
21 |         h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
22 |         c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
23 | 
24 |         for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
25 |             h_t, c_t = self.lstm1(input_t, (h_t, c_t))
26 |             h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
27 |             output = self.linear(h_t2)
28 |             outputs += [output]
29 |         for i in range(future):# if we should predict the future
30 |             h_t, c_t = self.lstm1(output, (h_t, c_t))
31 |             h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
32 |             output = self.linear(h_t2)
33 |             outputs += [output]
34 |         outputs = torch.stack(outputs, 1).squeeze(2)
35 |         return outputs
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     # set random seed to 0
40 |     np.random.seed(0)
41 |     torch.manual_seed(0)
42 |     # load data and make training set
43 |     data = torch.load('traindata.pt')
44 |     input = torch.from_numpy(data[3:, :-1])
45 |     target = torch.from_numpy(data[3:, 1:])
46 |     test_input = torch.from_numpy(data[:3, :-1])
47 |     test_target = torch.from_numpy(data[:3, 1:])
48 |     # build the model
49 |     seq = Sequence()
50 |     seq.double()
51 |     criterion = nn.MSELoss()
52 |     # use LBFGS as optimizer since we can load the whole data to train
53 |     optimizer = optim.LBFGS(seq.parameters(), lr=0.8)
54 |     #begin to train
55 |     for i in range(15):
56 |         print('STEP: ', i)
57 |         def closure():
58 |             optimizer.zero_grad()
59 |             out = seq(input)
60 |             loss = criterion(out, target)
61 |             print('loss:', loss.item())
62 |             loss.backward()
63 |             return loss
64 |         optimizer.step(closure)
65 |         # begin to predict, no need to track gradient here
66 |         with torch.no_grad():
67 |             future = 1000
68 |             pred = seq(test_input, future=future)
69 |             loss = criterion(pred[:, :-future], test_target)
70 |             print('test loss:', loss.item())
71 |             y = pred.detach().numpy()
72 |         # draw the result
73 |         plt.figure(figsize=(30,10))
74 |         plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize=30)
75 |         plt.xlabel('x', fontsize=20)
76 |         plt.ylabel('y', fontsize=20)
77 |         plt.xticks(fontsize=20)
78 |         plt.yticks(fontsize=20)
79 |         def draw(yi, color):
80 |             plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0)
81 |             plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0)
82 |         draw(y[0], 'r')
83 |         draw(y[1], 'g')
84 |         draw(y[2], 'b')
85 |         plt.savefig('predict%d.pdf'%i)
86 |         plt.close()
87 | 


--------------------------------------------------------------------------------
/reinforcement_learning/reinforce.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gym
  3 | import numpy as np
  4 | from itertools import count
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import torch.optim as optim
 10 | from torch.distributions import Categorical
 11 | 
 12 | 
 13 | parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
 14 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
 15 |                     help='discount factor (default: 0.99)')
 16 | parser.add_argument('--seed', type=int, default=543, metavar='N',
 17 |                     help='random seed (default: 543)')
 18 | parser.add_argument('--render', action='store_true',
 19 |                     help='render the environment')
 20 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
 21 |                     help='interval between training status logs (default: 10)')
 22 | args = parser.parse_args()
 23 | 
 24 | 
 25 | env = gym.make('CartPole-v0')
 26 | env.seed(args.seed)
 27 | torch.manual_seed(args.seed)
 28 | 
 29 | 
 30 | class Policy(nn.Module):
 31 |     def __init__(self):
 32 |         super(Policy, self).__init__()
 33 |         self.affine1 = nn.Linear(4, 128)
 34 |         self.affine2 = nn.Linear(128, 2)
 35 | 
 36 |         self.saved_log_probs = []
 37 |         self.rewards = []
 38 | 
 39 |     def forward(self, x):
 40 |         x = F.relu(self.affine1(x))
 41 |         action_scores = self.affine2(x)
 42 |         return F.softmax(action_scores, dim=1)
 43 | 
 44 | 
 45 | policy = Policy()
 46 | optimizer = optim.Adam(policy.parameters(), lr=1e-2)
 47 | eps = np.finfo(np.float32).eps.item()
 48 | 
 49 | 
 50 | def select_action(state):
 51 |     state = torch.from_numpy(state).float().unsqueeze(0)
 52 |     probs = policy(state)
 53 |     m = Categorical(probs)
 54 |     action = m.sample()
 55 |     policy.saved_log_probs.append(m.log_prob(action))
 56 |     return action.item()
 57 | 
 58 | 
 59 | def finish_episode():
 60 |     R = 0
 61 |     policy_loss = []
 62 |     rewards = []
 63 |     for r in policy.rewards[::-1]:
 64 |         R = r + args.gamma * R
 65 |         rewards.insert(0, R)
 66 |     rewards = torch.tensor(rewards)
 67 |     rewards = (rewards - rewards.mean()) / (rewards.std() + eps)
 68 |     for log_prob, reward in zip(policy.saved_log_probs, rewards):
 69 |         policy_loss.append(-log_prob * reward)
 70 |     optimizer.zero_grad()
 71 |     policy_loss = torch.cat(policy_loss).sum()
 72 |     policy_loss.backward()
 73 |     optimizer.step()
 74 |     del policy.rewards[:]
 75 |     del policy.saved_log_probs[:]
 76 | 
 77 | 
 78 | def main():
 79 |     running_reward = 10
 80 |     for i_episode in count(1):
 81 |         state = env.reset()
 82 |         for t in range(10000):  # Don't infinite loop while learning
 83 |             action = select_action(state)
 84 |             state, reward, done, _ = env.step(action)
 85 |             if args.render:
 86 |                 env.render()
 87 |             policy.rewards.append(reward)
 88 |             if done:
 89 |                 break
 90 | 
 91 |         running_reward = running_reward * 0.99 + t * 0.01
 92 |         finish_episode()
 93 |         if i_episode % args.log_interval == 0:
 94 |             print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format(
 95 |                 i_episode, t, running_reward))
 96 |         if running_reward > env.spec.reward_threshold:
 97 |             print("Solved! Running reward is now {} and "
 98 |                   "the last episode runs to {} time steps!".format(running_reward, t))
 99 |             break
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     main()
104 | 


--------------------------------------------------------------------------------
/fast_neural_style/README.md:
--------------------------------------------------------------------------------
 1 | # fast-neural-style :city_sunrise: :rocket:
 2 | This repository contains a pytorch implementation of an algorithm for artistic style transfer. The algorithm can be used to mix the content of an image with the style of another image. For example, here is a photograph of a door arch rendered in the style of a stained glass painting.
 3 | 
 4 | The model uses the method described in [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](https://arxiv.org/abs/1603.08155) along with [Instance Normalization](https://arxiv.org/pdf/1607.08022.pdf). The saved-models for examples shown in the README can be downloaded from [here](https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=0).
 5 | 
 6 | <p align="center">
 7 |     <img src="images/style-images/mosaic.jpg" height="200px">
 8 |     <img src="images/content-images/amber.jpg" height="200px">
 9 |     <img src="images/output-images/amber-mosaic.jpg" height="440px">
10 | </p>
11 | 
12 | ## Requirements
13 | The program is written in Python, and uses [pytorch](http://pytorch.org/), [scipy](https://www.scipy.org). A GPU is not necessary, but can provide a significant speed up especially for training a new model. Regular sized images can be styled on a laptop or desktop using saved models.
14 | 
15 | ## Usage
16 | Stylize image
17 | ```
18 | python neural_style/neural_style.py eval --content-image </path/to/content/image> --model </path/to/saved/model> --output-image </path/to/output/image> --cuda 0
19 | ```
20 | * `--content-image`: path to content image you want to stylize.
21 | * `--model`: saved model to be used for stylizing the image (eg: `mosaic.pth`)
22 | * `--output-image`: path for saving the output image.
23 | * `--content-scale`: factor for scaling down the content image if memory is an issue (eg: value of 2 will halve the height and width of content-image)
24 | * `--cuda`: set it to 1 for running on GPU, 0 for CPU.
25 | 
26 | Train model
27 | ```bash
28 | python neural_style/neural_style.py train --dataset </path/to/train-dataset> --style-image </path/to/style/image> --save-model-dir </path/to/save-model/folder> --epochs 2 --cuda 1
29 | ```
30 | 
31 | There are several command line arguments, the important ones are listed below
32 | * `--dataset`: path to training dataset, the path should point to a folder containing another folder with all the training images. I used COCO 2014 Training images dataset [80K/13GB] [(download)](http://mscoco.org/dataset/#download).
33 | * `--style-image`: path to style-image.
34 | * `--save-model-dir`: path to folder where trained model will be saved.
35 | * `--cuda`: set it to 1 for running on GPU, 0 for CPU.
36 | 
37 | Refer to ``neural_style/neural_style.py`` for other command line arguments. For training new models you might have to tune the values of `--content-weight` and `--style-weight`. The mosaic style model shown above was trained with `--content-weight 1e5` and `--style-weight 1e10`. The remaining 3 models were also trained with similar order of weight parameters with slight variation in the `--style-weight` (`5e10` or `1e11`).
38 | 
39 | ## Models
40 | 
41 | Models for the examples shown below can be downloaded from [here](https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=0) or by running the script ``download_saved_models.py``.
42 | 
43 | <div align='center'>
44 |   <img src='images/content-images/amber.jpg' height="174px">		
45 | </div>
46 | 
47 | <div align='center'>
48 |   <img src='images/style-images/mosaic.jpg' height="174px">
49 |   <img src='images/output-images/amber-mosaic.jpg' height="174px">
50 |   <img src='images/output-images/amber-candy.jpg' height="174px">
51 |   <img src='images/style-images/candy.jpg' height="174px">
52 |   <br>
53 |   <img src='images/style-images/rain-princess-cropped.jpg' height="174px">
54 |   <img src='images/output-images/amber-rain-princess.jpg' height="174px">
55 |   <img src='images/output-images/amber-udnie.jpg' height="174px">
56 |   <img src='images/style-images/udnie.jpg' height="174px">
57 | </div>
58 | 


--------------------------------------------------------------------------------
/reinforcement_learning/actor_critic.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gym
  3 | import numpy as np
  4 | from itertools import count
  5 | from collections import namedtuple
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import torch.optim as optim
 11 | from torch.distributions import Categorical
 12 | 
 13 | 
 14 | parser = argparse.ArgumentParser(description='PyTorch actor-critic example')
 15 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
 16 |                     help='discount factor (default: 0.99)')
 17 | parser.add_argument('--seed', type=int, default=543, metavar='N',
 18 |                     help='random seed (default: 1)')
 19 | parser.add_argument('--render', action='store_true',
 20 |                     help='render the environment')
 21 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
 22 |                     help='interval between training status logs (default: 10)')
 23 | args = parser.parse_args()
 24 | 
 25 | 
 26 | env = gym.make('CartPole-v0')
 27 | env.seed(args.seed)
 28 | torch.manual_seed(args.seed)
 29 | 
 30 | 
 31 | SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])
 32 | 
 33 | 
 34 | class Policy(nn.Module):
 35 |     def __init__(self):
 36 |         super(Policy, self).__init__()
 37 |         self.affine1 = nn.Linear(4, 128)
 38 |         self.action_head = nn.Linear(128, 2)
 39 |         self.value_head = nn.Linear(128, 1)
 40 | 
 41 |         self.saved_actions = []
 42 |         self.rewards = []
 43 | 
 44 |     def forward(self, x):
 45 |         x = F.relu(self.affine1(x))
 46 |         action_scores = self.action_head(x)
 47 |         state_values = self.value_head(x)
 48 |         return F.softmax(action_scores, dim=-1), state_values
 49 | 
 50 | 
 51 | model = Policy()
 52 | optimizer = optim.Adam(model.parameters(), lr=3e-2)
 53 | eps = np.finfo(np.float32).eps.item()
 54 | 
 55 | 
 56 | def select_action(state):
 57 |     state = torch.from_numpy(state).float()
 58 |     probs, state_value = model(state)
 59 |     m = Categorical(probs)
 60 |     action = m.sample()
 61 |     model.saved_actions.append(SavedAction(m.log_prob(action), state_value))
 62 |     return action.item()
 63 | 
 64 | 
 65 | def finish_episode():
 66 |     R = 0
 67 |     saved_actions = model.saved_actions
 68 |     policy_losses = []
 69 |     value_losses = []
 70 |     rewards = []
 71 |     for r in model.rewards[::-1]:
 72 |         R = r + args.gamma * R
 73 |         rewards.insert(0, R)
 74 |     rewards = torch.tensor(rewards)
 75 |     rewards = (rewards - rewards.mean()) / (rewards.std() + eps)
 76 |     for (log_prob, value), r in zip(saved_actions, rewards):
 77 |         reward = r - value.item()
 78 |         policy_losses.append(-log_prob * reward)
 79 |         value_losses.append(F.smooth_l1_loss(value, torch.tensor([r])))
 80 |     optimizer.zero_grad()
 81 |     loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum()
 82 |     loss.backward()
 83 |     optimizer.step()
 84 |     del model.rewards[:]
 85 |     del model.saved_actions[:]
 86 | 
 87 | 
 88 | def main():
 89 |     running_reward = 10
 90 |     for i_episode in count(1):
 91 |         state = env.reset()
 92 |         for t in range(10000):  # Don't infinite loop while learning
 93 |             action = select_action(state)
 94 |             state, reward, done, _ = env.step(action)
 95 |             if args.render:
 96 |                 env.render()
 97 |             model.rewards.append(reward)
 98 |             if done:
 99 |                 break
100 | 
101 |         running_reward = running_reward * 0.99 + t * 0.01
102 |         finish_episode()
103 |         if i_episode % args.log_interval == 0:
104 |             print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format(
105 |                 i_episode, t, running_reward))
106 |         if running_reward > env.spec.reward_threshold:
107 |             print("Solved! Running reward is now {} and "
108 |                   "the last episode runs to {} time steps!".format(running_reward, t))
109 |             break
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     main()
114 | 


--------------------------------------------------------------------------------
/fast_neural_style/neural_style/transformer_net.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | class TransformerNet(torch.nn.Module):
  5 |     def __init__(self):
  6 |         super(TransformerNet, self).__init__()
  7 |         # Initial convolution layers
  8 |         self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1)
  9 |         self.in1 = torch.nn.InstanceNorm2d(32, affine=True)
 10 |         self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2)
 11 |         self.in2 = torch.nn.InstanceNorm2d(64, affine=True)
 12 |         self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2)
 13 |         self.in3 = torch.nn.InstanceNorm2d(128, affine=True)
 14 |         # Residual layers
 15 |         self.res1 = ResidualBlock(128)
 16 |         self.res2 = ResidualBlock(128)
 17 |         self.res3 = ResidualBlock(128)
 18 |         self.res4 = ResidualBlock(128)
 19 |         self.res5 = ResidualBlock(128)
 20 |         # Upsampling Layers
 21 |         self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2)
 22 |         self.in4 = torch.nn.InstanceNorm2d(64, affine=True)
 23 |         self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2)
 24 |         self.in5 = torch.nn.InstanceNorm2d(32, affine=True)
 25 |         self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1)
 26 |         # Non-linearities
 27 |         self.relu = torch.nn.ReLU()
 28 | 
 29 |     def forward(self, X):
 30 |         y = self.relu(self.in1(self.conv1(X)))
 31 |         y = self.relu(self.in2(self.conv2(y)))
 32 |         y = self.relu(self.in3(self.conv3(y)))
 33 |         y = self.res1(y)
 34 |         y = self.res2(y)
 35 |         y = self.res3(y)
 36 |         y = self.res4(y)
 37 |         y = self.res5(y)
 38 |         y = self.relu(self.in4(self.deconv1(y)))
 39 |         y = self.relu(self.in5(self.deconv2(y)))
 40 |         y = self.deconv3(y)
 41 |         return y
 42 | 
 43 | 
 44 | class ConvLayer(torch.nn.Module):
 45 |     def __init__(self, in_channels, out_channels, kernel_size, stride):
 46 |         super(ConvLayer, self).__init__()
 47 |         reflection_padding = kernel_size // 2
 48 |         self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
 49 |         self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
 50 | 
 51 |     def forward(self, x):
 52 |         out = self.reflection_pad(x)
 53 |         out = self.conv2d(out)
 54 |         return out
 55 | 
 56 | 
 57 | class ResidualBlock(torch.nn.Module):
 58 |     """ResidualBlock
 59 |     introduced in: https://arxiv.org/abs/1512.03385
 60 |     recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
 61 |     """
 62 | 
 63 |     def __init__(self, channels):
 64 |         super(ResidualBlock, self).__init__()
 65 |         self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
 66 |         self.in1 = torch.nn.InstanceNorm2d(channels, affine=True)
 67 |         self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
 68 |         self.in2 = torch.nn.InstanceNorm2d(channels, affine=True)
 69 |         self.relu = torch.nn.ReLU()
 70 | 
 71 |     def forward(self, x):
 72 |         residual = x
 73 |         out = self.relu(self.in1(self.conv1(x)))
 74 |         out = self.in2(self.conv2(out))
 75 |         out = out + residual
 76 |         return out
 77 | 
 78 | 
 79 | class UpsampleConvLayer(torch.nn.Module):
 80 |     """UpsampleConvLayer
 81 |     Upsamples the input and then does a convolution. This method gives better results
 82 |     compared to ConvTranspose2d.
 83 |     ref: http://distill.pub/2016/deconv-checkerboard/
 84 |     """
 85 | 
 86 |     def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
 87 |         super(UpsampleConvLayer, self).__init__()
 88 |         self.upsample = upsample
 89 |         if upsample:
 90 |             self.upsample_layer = torch.nn.Upsample(mode='nearest', scale_factor=upsample)
 91 |         reflection_padding = kernel_size // 2
 92 |         self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
 93 |         self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
 94 | 
 95 |     def forward(self, x):
 96 |         x_in = x
 97 |         if self.upsample:
 98 |             x_in = self.upsample_layer(x_in)
 99 |         out = self.reflection_pad(x_in)
100 |         out = self.conv2d(out)
101 |         return out
102 | 


--------------------------------------------------------------------------------
/mnist/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import torch.optim as optim
  7 | from torchvision import datasets, transforms
  8 | 
  9 | class Net(nn.Module):
 10 |     def __init__(self):
 11 |         super(Net, self).__init__()
 12 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
 13 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
 14 |         self.conv2_drop = nn.Dropout2d()
 15 |         self.fc1 = nn.Linear(320, 50)
 16 |         self.fc2 = nn.Linear(50, 10)
 17 | 
 18 |     def forward(self, x):
 19 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
 20 |         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
 21 |         x = x.view(-1, 320)
 22 |         x = F.relu(self.fc1(x))
 23 |         x = F.dropout(x, training=self.training)
 24 |         x = self.fc2(x)
 25 |         return F.log_softmax(x, dim=1)
 26 | 
 27 | def train(args, model, device, train_loader, optimizer, epoch):
 28 |     model.train()
 29 |     for batch_idx, (data, target) in enumerate(train_loader):
 30 |         data, target = data.to(device), target.to(device)
 31 |         optimizer.zero_grad()
 32 |         output = model(data)
 33 |         loss = F.nll_loss(output, target)
 34 |         loss.backward()
 35 |         optimizer.step()
 36 |         if batch_idx % args.log_interval == 0:
 37 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
 38 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
 39 |                 100. * batch_idx / len(train_loader), loss.item()))
 40 | 
 41 | def test(args, model, device, test_loader):
 42 |     model.eval()
 43 |     test_loss = 0
 44 |     correct = 0
 45 |     with torch.no_grad():
 46 |         for data, target in test_loader:
 47 |             data, target = data.to(device), target.to(device)
 48 |             output = model(data)
 49 |             test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
 50 |             pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
 51 |             correct += pred.eq(target.view_as(pred)).sum().item()
 52 | 
 53 |     test_loss /= len(test_loader.dataset)
 54 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
 55 |         test_loss, correct, len(test_loader.dataset),
 56 |         100. * correct / len(test_loader.dataset)))
 57 | 
 58 | def main():
 59 |     # Training settings
 60 |     parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 61 |     parser.add_argument('--batch-size', type=int, default=64, metavar='N',
 62 |                         help='input batch size for training (default: 64)')
 63 |     parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
 64 |                         help='input batch size for testing (default: 1000)')
 65 |     parser.add_argument('--epochs', type=int, default=10, metavar='N',
 66 |                         help='number of epochs to train (default: 10)')
 67 |     parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
 68 |                         help='learning rate (default: 0.01)')
 69 |     parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
 70 |                         help='SGD momentum (default: 0.5)')
 71 |     parser.add_argument('--no-cuda', action='store_true', default=False,
 72 |                         help='disables CUDA training')
 73 |     parser.add_argument('--seed', type=int, default=1, metavar='S',
 74 |                         help='random seed (default: 1)')
 75 |     parser.add_argument('--log-interval', type=int, default=10, metavar='N',
 76 |                         help='how many batches to wait before logging training status')
 77 |     args = parser.parse_args()
 78 |     use_cuda = not args.no_cuda and torch.cuda.is_available()
 79 | 
 80 |     torch.manual_seed(args.seed)
 81 | 
 82 |     device = torch.device("cuda" if use_cuda else "cpu")
 83 | 
 84 |     kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
 85 |     train_loader = torch.utils.data.DataLoader(
 86 |         datasets.MNIST('../data', train=True, download=True,
 87 |                        transform=transforms.Compose([
 88 |                            transforms.ToTensor(),
 89 |                            transforms.Normalize((0.1307,), (0.3081,))
 90 |                        ])),
 91 |         batch_size=args.batch_size, shuffle=True, **kwargs)
 92 |     test_loader = torch.utils.data.DataLoader(
 93 |         datasets.MNIST('../data', train=False, transform=transforms.Compose([
 94 |                            transforms.ToTensor(),
 95 |                            transforms.Normalize((0.1307,), (0.3081,))
 96 |                        ])),
 97 |         batch_size=args.test_batch_size, shuffle=True, **kwargs)
 98 | 
 99 | 
100 |     model = Net().to(device)
101 |     optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
102 | 
103 |     for epoch in range(1, args.epochs + 1):
104 |         train(args, model, device, train_loader, optimizer, epoch)
105 |         test(args, model, device, test_loader)
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     main()


--------------------------------------------------------------------------------
/vae/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.utils.data
  5 | from torch import nn, optim
  6 | from torch.nn import functional as F
  7 | from torchvision import datasets, transforms
  8 | from torchvision.utils import save_image
  9 | 
 10 | 
 11 | parser = argparse.ArgumentParser(description='VAE MNIST Example')
 12 | parser.add_argument('--batch-size', type=int, default=128, metavar='N',
 13 |                     help='input batch size for training (default: 128)')
 14 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
 15 |                     help='number of epochs to train (default: 10)')
 16 | parser.add_argument('--no-cuda', action='store_true', default=False,
 17 |                     help='enables CUDA training')
 18 | parser.add_argument('--seed', type=int, default=1, metavar='S',
 19 |                     help='random seed (default: 1)')
 20 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
 21 |                     help='how many batches to wait before logging training status')
 22 | args = parser.parse_args()
 23 | args.cuda = not args.no_cuda and torch.cuda.is_available()
 24 | 
 25 | torch.manual_seed(args.seed)
 26 | 
 27 | device = torch.device("cuda" if args.cuda else "cpu")
 28 | 
 29 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
 30 | train_loader = torch.utils.data.DataLoader(
 31 |     datasets.MNIST('../data', train=True, download=True,
 32 |                    transform=transforms.ToTensor()),
 33 |     batch_size=args.batch_size, shuffle=True, **kwargs)
 34 | test_loader = torch.utils.data.DataLoader(
 35 |     datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
 36 |     batch_size=args.batch_size, shuffle=True, **kwargs)
 37 | 
 38 | 
 39 | class VAE(nn.Module):
 40 |     def __init__(self):
 41 |         super(VAE, self).__init__()
 42 | 
 43 |         self.fc1 = nn.Linear(784, 400)
 44 |         self.fc21 = nn.Linear(400, 20)
 45 |         self.fc22 = nn.Linear(400, 20)
 46 |         self.fc3 = nn.Linear(20, 400)
 47 |         self.fc4 = nn.Linear(400, 784)
 48 | 
 49 |     def encode(self, x):
 50 |         h1 = F.relu(self.fc1(x))
 51 |         return self.fc21(h1), self.fc22(h1)
 52 | 
 53 |     def reparameterize(self, mu, logvar):
 54 |         std = torch.exp(0.5*logvar)
 55 |         eps = torch.randn_like(std)
 56 |         return eps.mul(std).add_(mu)
 57 | 
 58 |     def decode(self, z):
 59 |         h3 = F.relu(self.fc3(z))
 60 |         return torch.sigmoid(self.fc4(h3))
 61 | 
 62 |     def forward(self, x):
 63 |         mu, logvar = self.encode(x.view(-1, 784))
 64 |         z = self.reparameterize(mu, logvar)
 65 |         return self.decode(z), mu, logvar
 66 | 
 67 | 
 68 | model = VAE().to(device)
 69 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
 70 | 
 71 | 
 72 | # Reconstruction + KL divergence losses summed over all elements and batch
 73 | def loss_function(recon_x, x, mu, logvar):
 74 |     BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), reduction='sum')
 75 | 
 76 |     # see Appendix B from VAE paper:
 77 |     # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
 78 |     # https://arxiv.org/abs/1312.6114
 79 |     # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
 80 |     KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
 81 | 
 82 |     return BCE + KLD
 83 | 
 84 | 
 85 | def train(epoch):
 86 |     model.train()
 87 |     train_loss = 0
 88 |     for batch_idx, (data, _) in enumerate(train_loader):
 89 |         data = data.to(device)
 90 |         optimizer.zero_grad()
 91 |         recon_batch, mu, logvar = model(data)
 92 |         loss = loss_function(recon_batch, data, mu, logvar)
 93 |         loss.backward()
 94 |         train_loss += loss.item()
 95 |         optimizer.step()
 96 |         if batch_idx % args.log_interval == 0:
 97 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
 98 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
 99 |                 100. * batch_idx / len(train_loader),
100 |                 loss.item() / len(data)))
101 | 
102 |     print('====> Epoch: {} Average loss: {:.4f}'.format(
103 |           epoch, train_loss / len(train_loader.dataset)))
104 | 
105 | 
106 | def test(epoch):
107 |     model.eval()
108 |     test_loss = 0
109 |     with torch.no_grad():
110 |         for i, (data, _) in enumerate(test_loader):
111 |             data = data.to(device)
112 |             recon_batch, mu, logvar = model(data)
113 |             test_loss += loss_function(recon_batch, data, mu, logvar).item()
114 |             if i == 0:
115 |                 n = min(data.size(0), 8)
116 |                 comparison = torch.cat([data[:n],
117 |                                       recon_batch.view(args.batch_size, 1, 28, 28)[:n]])
118 |                 save_image(comparison.cpu(),
119 |                          'results/reconstruction_' + str(epoch) + '.png', nrow=n)
120 | 
121 |     test_loss /= len(test_loader.dataset)
122 |     print('====> Test set loss: {:.4f}'.format(test_loss))
123 | 
124 | if __name__ == "__main__":
125 |     for epoch in range(1, args.epochs + 1):
126 |         train(epoch)
127 |         test(epoch)
128 |         with torch.no_grad():
129 |             sample = torch.randn(64, 20).to(device)
130 |             sample = model.decode(sample).cpu()
131 |             save_image(sample.view(64, 1, 28, 28),
132 |                        'results/sample_' + str(epoch) + '.png')
133 | 


--------------------------------------------------------------------------------
/snli/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import glob
  4 | 
  5 | import torch
  6 | import torch.optim as O
  7 | import torch.nn as nn
  8 | 
  9 | from torchtext import data
 10 | from torchtext import datasets
 11 | 
 12 | from model import SNLIClassifier
 13 | from util import get_args, makedirs
 14 | 
 15 | 
 16 | args = get_args()
 17 | torch.cuda.set_device(args.gpu)
 18 | device = torch.device('cuda:{}'.format(args.gpu))
 19 | 
 20 | inputs = data.Field(lower=args.lower, tokenize='spacy')
 21 | answers = data.Field(sequential=False)
 22 | 
 23 | train, dev, test = datasets.SNLI.splits(inputs, answers)
 24 | 
 25 | inputs.build_vocab(train, dev, test)
 26 | if args.word_vectors:
 27 |     if os.path.isfile(args.vector_cache):
 28 |         inputs.vocab.vectors = torch.load(args.vector_cache)
 29 |     else:
 30 |         inputs.vocab.load_vectors(args.word_vectors)
 31 |         makedirs(os.path.dirname(args.vector_cache))
 32 |         torch.save(inputs.vocab.vectors, args.vector_cache)
 33 | answers.build_vocab(train)
 34 | 
 35 | train_iter, dev_iter, test_iter = data.BucketIterator.splits(
 36 |             (train, dev, test), batch_size=args.batch_size, device=device)
 37 | 
 38 | config = args
 39 | config.n_embed = len(inputs.vocab)
 40 | config.d_out = len(answers.vocab)
 41 | config.n_cells = config.n_layers
 42 | 
 43 | # double the number of cells for bidirectional networks
 44 | if config.birnn:
 45 |     config.n_cells *= 2
 46 | 
 47 | if args.resume_snapshot:
 48 |     model = torch.load(args.resume_snapshot, map_location=device)
 49 | else:
 50 |     model = SNLIClassifier(config)
 51 |     if args.word_vectors:
 52 |         model.embed.weight.data.copy_(inputs.vocab.vectors)
 53 |         model.to(device)
 54 | 
 55 | criterion = nn.CrossEntropyLoss()
 56 | opt = O.Adam(model.parameters(), lr=args.lr)
 57 | 
 58 | iterations = 0
 59 | start = time.time()
 60 | best_dev_acc = -1
 61 | train_iter.repeat = False
 62 | header = '  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss     Accuracy  Dev/Accuracy'
 63 | dev_log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:8.6f},{:12.4f},{:12.4f}'.split(','))
 64 | log_template =     ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{},{:12.4f},{}'.split(','))
 65 | makedirs(args.save_path)
 66 | print(header)
 67 | 
 68 | for epoch in range(args.epochs):
 69 |     train_iter.init_epoch()
 70 |     n_correct, n_total = 0, 0
 71 |     for batch_idx, batch in enumerate(train_iter):
 72 | 
 73 |         # switch model to training mode, clear gradient accumulators
 74 |         model.train(); opt.zero_grad()
 75 | 
 76 |         iterations += 1
 77 | 
 78 |         # forward pass
 79 |         answer = model(batch)
 80 | 
 81 |         # calculate accuracy of predictions in the current batch
 82 |         n_correct += (torch.max(answer, 1)[1].view(batch.label.size()) == batch.label).sum().item()
 83 |         n_total += batch.batch_size
 84 |         train_acc = 100. * n_correct/n_total
 85 | 
 86 |         # calculate loss of the network output with respect to training labels
 87 |         loss = criterion(answer, batch.label)
 88 | 
 89 |         # backpropagate and update optimizer learning rate
 90 |         loss.backward(); opt.step()
 91 | 
 92 |         # checkpoint model periodically
 93 |         if iterations % args.save_every == 0:
 94 |             snapshot_prefix = os.path.join(args.save_path, 'snapshot')
 95 |             snapshot_path = snapshot_prefix + '_acc_{:.4f}_loss_{:.6f}_iter_{}_model.pt'.format(train_acc, loss.item(), iterations)
 96 |             torch.save(model, snapshot_path)
 97 |             for f in glob.glob(snapshot_prefix + '*'):
 98 |                 if f != snapshot_path:
 99 |                     os.remove(f)
100 | 
101 |         # evaluate performance on validation set periodically
102 |         if iterations % args.dev_every == 0:
103 | 
104 |             # switch model to evaluation mode
105 |             model.eval(); dev_iter.init_epoch()
106 | 
107 |             # calculate accuracy on validation set
108 |             n_dev_correct, dev_loss = 0, 0
109 |             with torch.no_grad():
110 |                 for dev_batch_idx, dev_batch in enumerate(dev_iter):
111 |                      answer = model(dev_batch)
112 |                      n_dev_correct += (torch.max(answer, 1)[1].view(dev_batch.label.size()) == dev_batch.label).sum().item()
113 |                      dev_loss = criterion(answer, dev_batch.label)
114 |             dev_acc = 100. * n_dev_correct / len(dev)
115 | 
116 |             print(dev_log_template.format(time.time()-start,
117 |                 epoch, iterations, 1+batch_idx, len(train_iter),
118 |                 100. * (1+batch_idx) / len(train_iter), loss.item(), dev_loss.item(), train_acc, dev_acc))
119 | 
120 |             # update best valiation set accuracy
121 |             if dev_acc > best_dev_acc:
122 | 
123 |                 # found a model with better validation set accuracy
124 | 
125 |                 best_dev_acc = dev_acc
126 |                 snapshot_prefix = os.path.join(args.save_path, 'best_snapshot')
127 |                 snapshot_path = snapshot_prefix + '_devacc_{}_devloss_{}__iter_{}_model.pt'.format(dev_acc, dev_loss.item(), iterations)
128 | 
129 |                 # save model, delete previous 'best_snapshot' files
130 |                 torch.save(model, snapshot_path)
131 |                 for f in glob.glob(snapshot_prefix + '*'):
132 |                     if f != snapshot_path:
133 |                         os.remove(f)
134 | 
135 |         elif iterations % args.log_every == 0:
136 | 
137 |             # print progress message
138 |             print(log_template.format(time.time()-start,
139 |                 epoch, iterations, 1+batch_idx, len(train_iter),
140 |                 100. * (1+batch_idx) / len(train_iter), loss.item(), ' '*8, n_correct/n_total*100, ' '*12))
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/word_language_model/main.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import argparse
  3 | import time
  4 | import math
  5 | import os
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.onnx
  9 | 
 10 | import data
 11 | import model
 12 | 
 13 | parser = argparse.ArgumentParser(description='PyTorch Wikitext-2 RNN/LSTM Language Model')
 14 | parser.add_argument('--data', type=str, default='./data/wikitext-2',
 15 |                     help='location of the data corpus')
 16 | parser.add_argument('--model', type=str, default='LSTM',
 17 |                     help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)')
 18 | parser.add_argument('--emsize', type=int, default=200,
 19 |                     help='size of word embeddings')
 20 | parser.add_argument('--nhid', type=int, default=200,
 21 |                     help='number of hidden units per layer')
 22 | parser.add_argument('--nlayers', type=int, default=2,
 23 |                     help='number of layers')
 24 | parser.add_argument('--lr', type=float, default=20,
 25 |                     help='initial learning rate')
 26 | parser.add_argument('--clip', type=float, default=0.25,
 27 |                     help='gradient clipping')
 28 | parser.add_argument('--epochs', type=int, default=40,
 29 |                     help='upper epoch limit')
 30 | parser.add_argument('--batch_size', type=int, default=20, metavar='N',
 31 |                     help='batch size')
 32 | parser.add_argument('--bptt', type=int, default=35,
 33 |                     help='sequence length')
 34 | parser.add_argument('--dropout', type=float, default=0.2,
 35 |                     help='dropout applied to layers (0 = no dropout)')
 36 | parser.add_argument('--tied', action='store_true',
 37 |                     help='tie the word embedding and softmax weights')
 38 | parser.add_argument('--seed', type=int, default=1111,
 39 |                     help='random seed')
 40 | parser.add_argument('--cuda', action='store_true',
 41 |                     help='use CUDA')
 42 | parser.add_argument('--log-interval', type=int, default=200, metavar='N',
 43 |                     help='report interval')
 44 | parser.add_argument('--save', type=str, default='model.pt',
 45 |                     help='path to save the final model')
 46 | parser.add_argument('--onnx-export', type=str, default='',
 47 |                     help='path to export the final model in onnx format')
 48 | args = parser.parse_args()
 49 | 
 50 | # Set the random seed manually for reproducibility.
 51 | torch.manual_seed(args.seed)
 52 | if torch.cuda.is_available():
 53 |     if not args.cuda:
 54 |         print("WARNING: You have a CUDA device, so you should probably run with --cuda")
 55 | 
 56 | device = torch.device("cuda" if args.cuda else "cpu")
 57 | 
 58 | ###############################################################################
 59 | # Load data
 60 | ###############################################################################
 61 | 
 62 | corpus = data.Corpus(args.data)
 63 | 
 64 | print("Number of tokens:")
 65 | print("Train: ", len(corpus.train))
 66 | print("Valid: ", len(corpus.valid))
 67 | print("Test:  ", len(corpus.test))
 68 | 
 69 | # Starting from sequential data, batchify arranges the dataset into columns.
 70 | # For instance, with the alphabet as the sequence and batch size 4, we'd get
 71 | # ┌ a g m s ┐
 72 | # │ b h n t │
 73 | # │ c i o u │
 74 | # │ d j p v │
 75 | # │ e k q w │
 76 | # └ f l r x ┘.
 77 | # These columns are treated as independent by the model, which means that the
 78 | # dependence of e. g. 'g' on 'f' can not be learned, but allows more efficient
 79 | # batch processing.
 80 | 
 81 | def batchify(data, bsz):
 82 |     # Work out how cleanly we can divide the dataset into bsz parts.
 83 |     nbatch = data.size(0) // bsz
 84 |     # Trim off any extra elements that wouldn't cleanly fit (remainders).
 85 |     data = data.narrow(0, 0, nbatch * bsz)
 86 |     # Evenly divide the data across the bsz batches.
 87 |     data = data.view(bsz, -1).t().contiguous()
 88 |     return data.to(device)
 89 | 
 90 | eval_batch_size = 10
 91 | train_data = batchify(corpus.train, args.batch_size)
 92 | val_data = batchify(corpus.valid, eval_batch_size)
 93 | test_data = batchify(corpus.test, eval_batch_size)
 94 | 
 95 | ###############################################################################
 96 | # Build the model
 97 | ###############################################################################
 98 | 
 99 | ntokens = len(corpus.dictionary)
100 | model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device)
101 | 
102 | criterion = nn.CrossEntropyLoss()
103 | 
104 | ###############################################################################
105 | # Training code
106 | ###############################################################################
107 | 
108 | def repackage_hidden(h):
109 |     """Wraps hidden states in new Tensors, to detach them from their history."""
110 |     if isinstance(h, torch.Tensor):
111 |         return h.detach()
112 |     else:
113 |         return tuple(repackage_hidden(v) for v in h)
114 | 
115 | 
116 | # get_batch subdivides the source data into chunks of length args.bptt.
117 | # If source is equal to the example output of the batchify function, with
118 | # a bptt-limit of 2, we'd get the following two Variables for i = 0:
119 | # ┌ a g m s ┐ ┌ b h n t ┐
120 | # └ b h n t ┘ └ c i o u ┘
121 | # Note that despite the name of the function, the subdivison of data is not
122 | # done along the batch dimension (i.e. dimension 1), since that was handled
123 | # by the batchify function. The chunks are along dimension 0, corresponding
124 | # to the seq_len dimension in the LSTM.
125 | 
126 | def get_batch(source, i):
127 |     seq_len = min(args.bptt, len(source) - 1 - i)
128 |     data = source[i:i+seq_len]
129 |     target = source[i+1:i+1+seq_len].view(-1)
130 |     return data, target
131 | 
132 | 
133 | def evaluate(data_source):
134 |     # Turn on evaluation mode which disables dropout.
135 |     model.eval()
136 |     total_loss = 0.
137 |     ntokens = len(corpus.dictionary)
138 |     hidden = model.init_hidden(eval_batch_size)
139 |     with torch.no_grad():
140 |         for i in range(0, data_source.size(0) - 1, args.bptt):
141 |             data, targets = get_batch(data_source, i)
142 |             output, hidden = model(data, hidden)
143 |             output_flat = output.view(-1, ntokens)
144 |             total_loss += len(data) * criterion(output_flat, targets).item()
145 |             hidden = repackage_hidden(hidden)
146 |     return total_loss / len(data_source)
147 | 
148 | 
149 | def train():
150 |     # Turn on training mode which enables dropout.
151 |     model.train()
152 |     total_loss = 0.
153 |     start_time = time.time()
154 |     ntokens = len(corpus.dictionary)
155 |     hidden = model.init_hidden(args.batch_size)
156 |     for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
157 |         data, targets = get_batch(train_data, i)
158 |         # Starting each batch, we detach the hidden state from how it was previously produced.
159 |         # If we didn't, the model would try backpropagating all the way to start of the dataset.
160 |         hidden = repackage_hidden(hidden)
161 |         model.zero_grad()
162 |         output, hidden = model(data, hidden)
163 |         loss = criterion(output.view(-1, ntokens), targets)
164 |         loss.backward()
165 | 
166 |         # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
167 |         torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
168 |         for p in model.parameters():
169 |             p.data.add_(-lr, p.grad.data)
170 | 
171 |         total_loss += loss.item()
172 | 
173 |         if batch % args.log_interval == 0 and batch > 0:
174 |             cur_loss = total_loss / args.log_interval
175 |             elapsed = time.time() - start_time
176 |             print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
177 |                     'loss {:5.2f} | ppl {:8.2f}'.format(
178 |                 epoch, batch, len(train_data) // args.bptt, lr,
179 |                 elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
180 |             total_loss = 0
181 |             start_time = time.time()
182 | 
183 | 
184 | def export_onnx(path, batch_size, seq_len):
185 |     print('The model is also exported in ONNX format at {}'.
186 |           format(os.path.realpath(args.onnx_export)))
187 |     model.eval()
188 |     dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(-1, batch_size).to(device)
189 |     hidden = model.init_hidden(batch_size)
190 |     torch.onnx.export(model, (dummy_input, hidden), path)
191 | 
192 | 
193 | # Loop over epochs.
194 | lr = args.lr
195 | best_val_loss = None
196 | 
197 | # At any point you can hit Ctrl + C to break out of training early.
198 | try:
199 |     for epoch in range(1, args.epochs+1):
200 |         epoch_start_time = time.time()
201 |         train()
202 |         val_loss = evaluate(val_data)
203 |         print('-' * 89)
204 |         print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
205 |                 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
206 |                                            val_loss, math.exp(val_loss)))
207 |         print('-' * 89)
208 |         # Save the model if the validation loss is the best we've seen so far.
209 |         if not best_val_loss or val_loss < best_val_loss:
210 |             with open(args.save, 'wb') as f:
211 |                 torch.save(model, f)
212 |             best_val_loss = val_loss
213 |         else:
214 |             # Anneal the learning rate if no improvement has been seen in the validation dataset.
215 |             lr /= 4.0
216 | except KeyboardInterrupt:
217 |     print('-' * 89)
218 |     print('Exiting from training early')
219 | 
220 | # Load the best saved model.
221 | with open(args.save, 'rb') as f:
222 |     model = torch.load(f)
223 |     # after load the rnn params are not a continuous chunk of memory
224 |     # this makes them a continuous chunk, and will speed up forward pass
225 |     model.rnn.flatten_parameters()
226 | 
227 | # Run on test data.
228 | test_loss = evaluate(test_data)
229 | print('=' * 89)
230 | print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
231 |     test_loss, math.exp(test_loss)))
232 | print('=' * 89)
233 | 
234 | if len(args.onnx_export) > 0:
235 |     # Export the model in ONNX format.
236 |     export_onnx(args.onnx_export, batch_size=1, seq_len=args.bptt)
237 | 


--------------------------------------------------------------------------------
/dcgan/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import os
  4 | import random
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.parallel
  8 | import torch.backends.cudnn as cudnn
  9 | import torch.optim as optim
 10 | import torch.utils.data
 11 | import torchvision.datasets as dset
 12 | import torchvision.transforms as transforms
 13 | import torchvision.utils as vutils
 14 | 
 15 | 
 16 | parser = argparse.ArgumentParser()
 17 | parser.add_argument('--dataset', required=True, help='cifar10 | lsun | imagenet | folder | lfw | fake')
 18 | parser.add_argument('--dataroot', required=True, help='path to dataset')
 19 | parser.add_argument('--workers', type=int, help='number of data loading workers', default=2)
 20 | parser.add_argument('--batchSize', type=int, default=64, help='input batch size')
 21 | parser.add_argument('--imageSize', type=int, default=64, help='the height / width of the input image to network')
 22 | parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector')
 23 | parser.add_argument('--ngf', type=int, default=64)
 24 | parser.add_argument('--ndf', type=int, default=64)
 25 | parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for')
 26 | parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002')
 27 | parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
 28 | parser.add_argument('--cuda', action='store_true', help='enables cuda')
 29 | parser.add_argument('--ngpu', type=int, default=1, help='number of GPUs to use')
 30 | parser.add_argument('--netG', default='', help="path to netG (to continue training)")
 31 | parser.add_argument('--netD', default='', help="path to netD (to continue training)")
 32 | parser.add_argument('--outf', default='.', help='folder to output images and model checkpoints')
 33 | parser.add_argument('--manualSeed', type=int, help='manual seed')
 34 | 
 35 | opt = parser.parse_args()
 36 | print(opt)
 37 | 
 38 | try:
 39 |     os.makedirs(opt.outf)
 40 | except OSError:
 41 |     pass
 42 | 
 43 | if opt.manualSeed is None:
 44 |     opt.manualSeed = random.randint(1, 10000)
 45 | print("Random Seed: ", opt.manualSeed)
 46 | random.seed(opt.manualSeed)
 47 | torch.manual_seed(opt.manualSeed)
 48 | 
 49 | cudnn.benchmark = True
 50 | 
 51 | if torch.cuda.is_available() and not opt.cuda:
 52 |     print("WARNING: You have a CUDA device, so you should probably run with --cuda")
 53 | 
 54 | if opt.dataset in ['imagenet', 'folder', 'lfw']:
 55 |     # folder dataset
 56 |     dataset = dset.ImageFolder(root=opt.dataroot,
 57 |                                transform=transforms.Compose([
 58 |                                    transforms.Resize(opt.imageSize),
 59 |                                    transforms.CenterCrop(opt.imageSize),
 60 |                                    transforms.ToTensor(),
 61 |                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
 62 |                                ]))
 63 | elif opt.dataset == 'lsun':
 64 |     dataset = dset.LSUN(root=opt.dataroot, classes=['bedroom_train'],
 65 |                         transform=transforms.Compose([
 66 |                             transforms.Resize(opt.imageSize),
 67 |                             transforms.CenterCrop(opt.imageSize),
 68 |                             transforms.ToTensor(),
 69 |                             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
 70 |                         ]))
 71 | elif opt.dataset == 'cifar10':
 72 |     dataset = dset.CIFAR10(root=opt.dataroot, download=True,
 73 |                            transform=transforms.Compose([
 74 |                                transforms.Resize(opt.imageSize),
 75 |                                transforms.ToTensor(),
 76 |                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
 77 |                            ]))
 78 | elif opt.dataset == 'fake':
 79 |     dataset = dset.FakeData(image_size=(3, opt.imageSize, opt.imageSize),
 80 |                             transform=transforms.ToTensor())
 81 | assert dataset
 82 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize,
 83 |                                          shuffle=True, num_workers=int(opt.workers))
 84 | 
 85 | device = torch.device("cuda:0" if opt.cuda else "cpu")
 86 | ngpu = int(opt.ngpu)
 87 | nz = int(opt.nz)
 88 | ngf = int(opt.ngf)
 89 | ndf = int(opt.ndf)
 90 | nc = 3
 91 | 
 92 | 
 93 | # custom weights initialization called on netG and netD
 94 | def weights_init(m):
 95 |     classname = m.__class__.__name__
 96 |     if classname.find('Conv') != -1:
 97 |         m.weight.data.normal_(0.0, 0.02)
 98 |     elif classname.find('BatchNorm') != -1:
 99 |         m.weight.data.normal_(1.0, 0.02)
100 |         m.bias.data.fill_(0)
101 | 
102 | 
103 | class Generator(nn.Module):
104 |     def __init__(self, ngpu):
105 |         super(Generator, self).__init__()
106 |         self.ngpu = ngpu
107 |         self.main = nn.Sequential(
108 |             # input is Z, going into a convolution
109 |             nn.ConvTranspose2d(     nz, ngf * 8, 4, 1, 0, bias=False),
110 |             nn.BatchNorm2d(ngf * 8),
111 |             nn.ReLU(True),
112 |             # state size. (ngf*8) x 4 x 4
113 |             nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
114 |             nn.BatchNorm2d(ngf * 4),
115 |             nn.ReLU(True),
116 |             # state size. (ngf*4) x 8 x 8
117 |             nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
118 |             nn.BatchNorm2d(ngf * 2),
119 |             nn.ReLU(True),
120 |             # state size. (ngf*2) x 16 x 16
121 |             nn.ConvTranspose2d(ngf * 2,     ngf, 4, 2, 1, bias=False),
122 |             nn.BatchNorm2d(ngf),
123 |             nn.ReLU(True),
124 |             # state size. (ngf) x 32 x 32
125 |             nn.ConvTranspose2d(    ngf,      nc, 4, 2, 1, bias=False),
126 |             nn.Tanh()
127 |             # state size. (nc) x 64 x 64
128 |         )
129 | 
130 |     def forward(self, input):
131 |         if input.is_cuda and self.ngpu > 1:
132 |             output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
133 |         else:
134 |             output = self.main(input)
135 |         return output
136 | 
137 | 
138 | netG = Generator(ngpu).to(device)
139 | netG.apply(weights_init)
140 | if opt.netG != '':
141 |     netG.load_state_dict(torch.load(opt.netG))
142 | print(netG)
143 | 
144 | 
145 | class Discriminator(nn.Module):
146 |     def __init__(self, ngpu):
147 |         super(Discriminator, self).__init__()
148 |         self.ngpu = ngpu
149 |         self.main = nn.Sequential(
150 |             # input is (nc) x 64 x 64
151 |             nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
152 |             nn.LeakyReLU(0.2, inplace=True),
153 |             # state size. (ndf) x 32 x 32
154 |             nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
155 |             nn.BatchNorm2d(ndf * 2),
156 |             nn.LeakyReLU(0.2, inplace=True),
157 |             # state size. (ndf*2) x 16 x 16
158 |             nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
159 |             nn.BatchNorm2d(ndf * 4),
160 |             nn.LeakyReLU(0.2, inplace=True),
161 |             # state size. (ndf*4) x 8 x 8
162 |             nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
163 |             nn.BatchNorm2d(ndf * 8),
164 |             nn.LeakyReLU(0.2, inplace=True),
165 |             # state size. (ndf*8) x 4 x 4
166 |             nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
167 |             nn.Sigmoid()
168 |         )
169 | 
170 |     def forward(self, input):
171 |         if input.is_cuda and self.ngpu > 1:
172 |             output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
173 |         else:
174 |             output = self.main(input)
175 | 
176 |         return output.view(-1, 1).squeeze(1)
177 | 
178 | 
179 | netD = Discriminator(ngpu).to(device)
180 | netD.apply(weights_init)
181 | if opt.netD != '':
182 |     netD.load_state_dict(torch.load(opt.netD))
183 | print(netD)
184 | 
185 | criterion = nn.BCELoss()
186 | 
187 | fixed_noise = torch.randn(opt.batchSize, nz, 1, 1, device=device)
188 | real_label = 1
189 | fake_label = 0
190 | 
191 | # setup optimizer
192 | optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
193 | optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
194 | 
195 | for epoch in range(opt.niter):
196 |     for i, data in enumerate(dataloader, 0):
197 |         ############################
198 |         # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
199 |         ###########################
200 |         # train with real
201 |         netD.zero_grad()
202 |         real_cpu = data[0].to(device)
203 |         batch_size = real_cpu.size(0)
204 |         label = torch.full((batch_size,), real_label, device=device)
205 | 
206 |         output = netD(real_cpu)
207 |         errD_real = criterion(output, label)
208 |         errD_real.backward()
209 |         D_x = output.mean().item()
210 | 
211 |         # train with fake
212 |         noise = torch.randn(batch_size, nz, 1, 1, device=device)
213 |         fake = netG(noise)
214 |         label.fill_(fake_label)
215 |         output = netD(fake.detach())
216 |         errD_fake = criterion(output, label)
217 |         errD_fake.backward()
218 |         D_G_z1 = output.mean().item()
219 |         errD = errD_real + errD_fake
220 |         optimizerD.step()
221 | 
222 |         ############################
223 |         # (2) Update G network: maximize log(D(G(z)))
224 |         ###########################
225 |         netG.zero_grad()
226 |         label.fill_(real_label)  # fake labels are real for generator cost
227 |         output = netD(fake)
228 |         errG = criterion(output, label)
229 |         errG.backward()
230 |         D_G_z2 = output.mean().item()
231 |         optimizerG.step()
232 | 
233 |         print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f'
234 |               % (epoch, opt.niter, i, len(dataloader),
235 |                  errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
236 |         if i % 100 == 0:
237 |             vutils.save_image(real_cpu,
238 |                     '%s/real_samples.png' % opt.outf,
239 |                     normalize=True)
240 |             fake = netG(fixed_noise)
241 |             vutils.save_image(fake.detach(),
242 |                     '%s/fake_samples_epoch_%03d.png' % (opt.outf, epoch),
243 |                     normalize=True)
244 | 
245 |     # do checkpointing
246 |     torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (opt.outf, epoch))
247 |     torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (opt.outf, epoch))
248 | 


--------------------------------------------------------------------------------
/fast_neural_style/neural_style/neural_style.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | import time
  5 | import re
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from torch.optim import Adam
 10 | from torch.utils.data import DataLoader
 11 | from torchvision import datasets
 12 | from torchvision import transforms
 13 | import torch.onnx
 14 | 
 15 | import utils
 16 | from transformer_net import TransformerNet
 17 | from vgg import Vgg16
 18 | 
 19 | 
 20 | def check_paths(args):
 21 |     try:
 22 |         if not os.path.exists(args.save_model_dir):
 23 |             os.makedirs(args.save_model_dir)
 24 |         if args.checkpoint_model_dir is not None and not (os.path.exists(args.checkpoint_model_dir)):
 25 |             os.makedirs(args.checkpoint_model_dir)
 26 |     except OSError as e:
 27 |         print(e)
 28 |         sys.exit(1)
 29 | 
 30 | 
 31 | def train(args):
 32 |     device = torch.device("cuda" if args.cuda else "cpu")
 33 | 
 34 |     np.random.seed(args.seed)
 35 |     torch.manual_seed(args.seed)
 36 | 
 37 |     transform = transforms.Compose([
 38 |         transforms.Resize(args.image_size),
 39 |         transforms.CenterCrop(args.image_size),
 40 |         transforms.ToTensor(),
 41 |         transforms.Lambda(lambda x: x.mul(255))
 42 |     ])
 43 |     train_dataset = datasets.ImageFolder(args.dataset, transform)
 44 |     train_loader = DataLoader(train_dataset, batch_size=args.batch_size)
 45 | 
 46 |     transformer = TransformerNet().to(device)
 47 |     optimizer = Adam(transformer.parameters(), args.lr)
 48 |     mse_loss = torch.nn.MSELoss()
 49 | 
 50 |     vgg = Vgg16(requires_grad=False).to(device)
 51 |     style_transform = transforms.Compose([
 52 |         transforms.ToTensor(),
 53 |         transforms.Lambda(lambda x: x.mul(255))
 54 |     ])
 55 |     style = utils.load_image(args.style_image, size=args.style_size)
 56 |     style = style_transform(style)
 57 |     style = style.repeat(args.batch_size, 1, 1, 1).to(device)
 58 | 
 59 |     features_style = vgg(utils.normalize_batch(style))
 60 |     gram_style = [utils.gram_matrix(y) for y in features_style]
 61 | 
 62 |     for e in range(args.epochs):
 63 |         transformer.train()
 64 |         agg_content_loss = 0.
 65 |         agg_style_loss = 0.
 66 |         count = 0
 67 |         for batch_id, (x, _) in enumerate(train_loader):
 68 |             n_batch = len(x)
 69 |             count += n_batch
 70 |             optimizer.zero_grad()
 71 | 
 72 |             x = x.to(device)
 73 |             y = transformer(x)
 74 | 
 75 |             y = utils.normalize_batch(y)
 76 |             x = utils.normalize_batch(x)
 77 | 
 78 |             features_y = vgg(y)
 79 |             features_x = vgg(x)
 80 | 
 81 |             content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2)
 82 | 
 83 |             style_loss = 0.
 84 |             for ft_y, gm_s in zip(features_y, gram_style):
 85 |                 gm_y = utils.gram_matrix(ft_y)
 86 |                 style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :])
 87 |             style_loss *= args.style_weight
 88 | 
 89 |             total_loss = content_loss + style_loss
 90 |             total_loss.backward()
 91 |             optimizer.step()
 92 | 
 93 |             agg_content_loss += content_loss.item()
 94 |             agg_style_loss += style_loss.item()
 95 | 
 96 |             if (batch_id + 1) % args.log_interval == 0:
 97 |                 mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format(
 98 |                     time.ctime(), e + 1, count, len(train_dataset),
 99 |                                   agg_content_loss / (batch_id + 1),
100 |                                   agg_style_loss / (batch_id + 1),
101 |                                   (agg_content_loss + agg_style_loss) / (batch_id + 1)
102 |                 )
103 |                 print(mesg)
104 | 
105 |             if args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0:
106 |                 transformer.eval().cpu()
107 |                 ckpt_model_filename = "ckpt_epoch_" + str(e) + "_batch_id_" + str(batch_id + 1) + ".pth"
108 |                 ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename)
109 |                 torch.save(transformer.state_dict(), ckpt_model_path)
110 |                 transformer.to(device).train()
111 | 
112 |     # save model
113 |     transformer.eval().cpu()
114 |     save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str(
115 |         args.content_weight) + "_" + str(args.style_weight) + ".model"
116 |     save_model_path = os.path.join(args.save_model_dir, save_model_filename)
117 |     torch.save(transformer.state_dict(), save_model_path)
118 | 
119 |     print("\nDone, trained model saved at", save_model_path)
120 | 
121 | 
122 | def stylize(args):
123 |     device = torch.device("cuda" if args.cuda else "cpu")
124 | 
125 |     content_image = utils.load_image(args.content_image, scale=args.content_scale)
126 |     content_transform = transforms.Compose([
127 |         transforms.ToTensor(),
128 |         transforms.Lambda(lambda x: x.mul(255))
129 |     ])
130 |     content_image = content_transform(content_image)
131 |     content_image = content_image.unsqueeze(0).to(device)
132 | 
133 |     if args.model.endswith(".onnx"):
134 |         output = stylize_onnx_caffe2(content_image, args)
135 |     else:
136 |         with torch.no_grad():
137 |             style_model = TransformerNet()
138 |             state_dict = torch.load(args.model)
139 |             # remove saved deprecated running_* keys in InstanceNorm from the checkpoint
140 |             for k in list(state_dict.keys()):
141 |                 if re.search(r'in\d+\.running_(mean|var)$', k):
142 |                     del state_dict[k]
143 |             style_model.load_state_dict(state_dict)
144 |             style_model.to(device)
145 |             if args.export_onnx:
146 |                 assert args.export_onnx.endswith(".onnx"), "Export model file should end with .onnx"
147 |                 output = torch.onnx._export(style_model, content_image, args.export_onnx).cpu()
148 |             else:
149 |                 output = style_model(content_image).cpu()
150 |     utils.save_image(args.output_image, output[0])
151 | 
152 | 
153 | def stylize_onnx_caffe2(content_image, args):
154 |     """
155 |     Read ONNX model and run it using Caffe2
156 |     """
157 | 
158 |     assert not args.export_onnx
159 | 
160 |     import onnx
161 |     import onnx_caffe2.backend
162 | 
163 |     model = onnx.load(args.model)
164 | 
165 |     prepared_backend = onnx_caffe2.backend.prepare(model, device='CUDA' if args.cuda else 'CPU')
166 |     inp = {model.graph.input[0].name: content_image.numpy()}
167 |     c2_out = prepared_backend.run(inp)[0]
168 | 
169 |     return torch.from_numpy(c2_out)
170 | 
171 | 
172 | def main():
173 |     main_arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style")
174 |     subparsers = main_arg_parser.add_subparsers(title="subcommands", dest="subcommand")
175 | 
176 |     train_arg_parser = subparsers.add_parser("train", help="parser for training arguments")
177 |     train_arg_parser.add_argument("--epochs", type=int, default=2,
178 |                                   help="number of training epochs, default is 2")
179 |     train_arg_parser.add_argument("--batch-size", type=int, default=4,
180 |                                   help="batch size for training, default is 4")
181 |     train_arg_parser.add_argument("--dataset", type=str, required=True,
182 |                                   help="path to training dataset, the path should point to a folder "
183 |                                        "containing another folder with all the training images")
184 |     train_arg_parser.add_argument("--style-image", type=str, default="images/style-images/mosaic.jpg",
185 |                                   help="path to style-image")
186 |     train_arg_parser.add_argument("--save-model-dir", type=str, required=True,
187 |                                   help="path to folder where trained model will be saved.")
188 |     train_arg_parser.add_argument("--checkpoint-model-dir", type=str, default=None,
189 |                                   help="path to folder where checkpoints of trained models will be saved")
190 |     train_arg_parser.add_argument("--image-size", type=int, default=256,
191 |                                   help="size of training images, default is 256 X 256")
192 |     train_arg_parser.add_argument("--style-size", type=int, default=None,
193 |                                   help="size of style-image, default is the original size of style image")
194 |     train_arg_parser.add_argument("--cuda", type=int, required=True,
195 |                                   help="set it to 1 for running on GPU, 0 for CPU")
196 |     train_arg_parser.add_argument("--seed", type=int, default=42,
197 |                                   help="random seed for training")
198 |     train_arg_parser.add_argument("--content-weight", type=float, default=1e5,
199 |                                   help="weight for content-loss, default is 1e5")
200 |     train_arg_parser.add_argument("--style-weight", type=float, default=1e10,
201 |                                   help="weight for style-loss, default is 1e10")
202 |     train_arg_parser.add_argument("--lr", type=float, default=1e-3,
203 |                                   help="learning rate, default is 1e-3")
204 |     train_arg_parser.add_argument("--log-interval", type=int, default=500,
205 |                                   help="number of images after which the training loss is logged, default is 500")
206 |     train_arg_parser.add_argument("--checkpoint-interval", type=int, default=2000,
207 |                                   help="number of batches after which a checkpoint of the trained model will be created")
208 | 
209 |     eval_arg_parser = subparsers.add_parser("eval", help="parser for evaluation/stylizing arguments")
210 |     eval_arg_parser.add_argument("--content-image", type=str, required=True,
211 |                                  help="path to content image you want to stylize")
212 |     eval_arg_parser.add_argument("--content-scale", type=float, default=None,
213 |                                  help="factor for scaling down the content image")
214 |     eval_arg_parser.add_argument("--output-image", type=str, required=True,
215 |                                  help="path for saving the output image")
216 |     eval_arg_parser.add_argument("--model", type=str, required=True,
217 |                                  help="saved model to be used for stylizing the image. If file ends in .pth - PyTorch path is used, if in .onnx - Caffe2 path")
218 |     eval_arg_parser.add_argument("--cuda", type=int, required=True,
219 |                                  help="set it to 1 for running on GPU, 0 for CPU")
220 |     eval_arg_parser.add_argument("--export_onnx", type=str,
221 |                                  help="export ONNX model to a given file")
222 | 
223 |     args = main_arg_parser.parse_args()
224 | 
225 |     if args.subcommand is None:
226 |         print("ERROR: specify either train or eval")
227 |         sys.exit(1)
228 |     if args.cuda and not torch.cuda.is_available():
229 |         print("ERROR: cuda is not available, try running on CPU")
230 |         sys.exit(1)
231 | 
232 |     if args.subcommand == "train":
233 |         check_paths(args)
234 |         train(args)
235 |     else:
236 |         stylize(args)
237 | 
238 | 
239 | if __name__ == "__main__":
240 |     main()
241 | 


--------------------------------------------------------------------------------
/imagenet/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import random
  4 | import shutil
  5 | import time
  6 | import warnings
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.parallel
 11 | import torch.backends.cudnn as cudnn
 12 | import torch.distributed as dist
 13 | import torch.optim
 14 | import torch.utils.data
 15 | import torch.utils.data.distributed
 16 | import torchvision.transforms as transforms
 17 | import torchvision.datasets as datasets
 18 | import torchvision.models as models
 19 | 
 20 | model_names = sorted(name for name in models.__dict__
 21 |     if name.islower() and not name.startswith("__")
 22 |     and callable(models.__dict__[name]))
 23 | 
 24 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
 25 | parser.add_argument('data', metavar='DIR',
 26 |                     help='path to dataset')
 27 | parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18',
 28 |                     choices=model_names,
 29 |                     help='model architecture: ' +
 30 |                         ' | '.join(model_names) +
 31 |                         ' (default: resnet18)')
 32 | parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
 33 |                     help='number of data loading workers (default: 4)')
 34 | parser.add_argument('--epochs', default=90, type=int, metavar='N',
 35 |                     help='number of total epochs to run')
 36 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
 37 |                     help='manual epoch number (useful on restarts)')
 38 | parser.add_argument('-b', '--batch-size', default=256, type=int,
 39 |                     metavar='N', help='mini-batch size (default: 256)')
 40 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
 41 |                     metavar='LR', help='initial learning rate')
 42 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
 43 |                     help='momentum')
 44 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
 45 |                     metavar='W', help='weight decay (default: 1e-4)')
 46 | parser.add_argument('--print-freq', '-p', default=10, type=int,
 47 |                     metavar='N', help='print frequency (default: 10)')
 48 | parser.add_argument('--resume', default='', type=str, metavar='PATH',
 49 |                     help='path to latest checkpoint (default: none)')
 50 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
 51 |                     help='evaluate model on validation set')
 52 | parser.add_argument('--pretrained', dest='pretrained', action='store_true',
 53 |                     help='use pre-trained model')
 54 | parser.add_argument('--world-size', default=1, type=int,
 55 |                     help='number of distributed processes')
 56 | parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
 57 |                     help='url used to set up distributed training')
 58 | parser.add_argument('--dist-backend', default='gloo', type=str,
 59 |                     help='distributed backend')
 60 | parser.add_argument('--seed', default=None, type=int,
 61 |                     help='seed for initializing training. ')
 62 | parser.add_argument('--gpu', default=None, type=int,
 63 |                     help='GPU id to use.')
 64 | 
 65 | best_prec1 = 0
 66 | 
 67 | 
 68 | def main():
 69 |     global args, best_prec1
 70 |     args = parser.parse_args()
 71 | 
 72 |     if args.seed is not None:
 73 |         random.seed(args.seed)
 74 |         torch.manual_seed(args.seed)
 75 |         cudnn.deterministic = True
 76 |         warnings.warn('You have chosen to seed training. '
 77 |                       'This will turn on the CUDNN deterministic setting, '
 78 |                       'which can slow down your training considerably! '
 79 |                       'You may see unexpected behavior when restarting '
 80 |                       'from checkpoints.')
 81 | 
 82 |     if args.gpu is not None:
 83 |         warnings.warn('You have chosen a specific GPU. This will completely '
 84 |                       'disable data parallelism.')
 85 | 
 86 |     args.distributed = args.world_size > 1
 87 | 
 88 |     if args.distributed:
 89 |         dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
 90 |                                 world_size=args.world_size)
 91 | 
 92 |     # create model
 93 |     if args.pretrained:
 94 |         print("=> using pre-trained model '{}'".format(args.arch))
 95 |         model = models.__dict__[args.arch](pretrained=True)
 96 |     else:
 97 |         print("=> creating model '{}'".format(args.arch))
 98 |         model = models.__dict__[args.arch]()
 99 | 
100 |     if args.gpu is not None:
101 |         model = model.cuda(args.gpu)
102 |     elif args.distributed:
103 |         model.cuda()
104 |         model = torch.nn.parallel.DistributedDataParallel(model)
105 |     else:
106 |         if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
107 |             model.features = torch.nn.DataParallel(model.features)
108 |             model.cuda()
109 |         else:
110 |             model = torch.nn.DataParallel(model).cuda()
111 | 
112 |     # define loss function (criterion) and optimizer
113 |     criterion = nn.CrossEntropyLoss().cuda(args.gpu)
114 | 
115 |     optimizer = torch.optim.SGD(model.parameters(), args.lr,
116 |                                 momentum=args.momentum,
117 |                                 weight_decay=args.weight_decay)
118 | 
119 |     # optionally resume from a checkpoint
120 |     if args.resume:
121 |         if os.path.isfile(args.resume):
122 |             print("=> loading checkpoint '{}'".format(args.resume))
123 |             checkpoint = torch.load(args.resume)
124 |             args.start_epoch = checkpoint['epoch']
125 |             best_prec1 = checkpoint['best_prec1']
126 |             model.load_state_dict(checkpoint['state_dict'])
127 |             optimizer.load_state_dict(checkpoint['optimizer'])
128 |             print("=> loaded checkpoint '{}' (epoch {})"
129 |                   .format(args.resume, checkpoint['epoch']))
130 |         else:
131 |             print("=> no checkpoint found at '{}'".format(args.resume))
132 | 
133 |     cudnn.benchmark = True
134 | 
135 |     # Data loading code
136 |     traindir = os.path.join(args.data, 'train')
137 |     valdir = os.path.join(args.data, 'val')
138 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
139 |                                      std=[0.229, 0.224, 0.225])
140 | 
141 |     train_dataset = datasets.ImageFolder(
142 |         traindir,
143 |         transforms.Compose([
144 |             transforms.RandomResizedCrop(224),
145 |             transforms.RandomHorizontalFlip(),
146 |             transforms.ToTensor(),
147 |             normalize,
148 |         ]))
149 | 
150 |     if args.distributed:
151 |         train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
152 |     else:
153 |         train_sampler = None
154 | 
155 |     train_loader = torch.utils.data.DataLoader(
156 |         train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
157 |         num_workers=args.workers, pin_memory=True, sampler=train_sampler)
158 | 
159 |     val_loader = torch.utils.data.DataLoader(
160 |         datasets.ImageFolder(valdir, transforms.Compose([
161 |             transforms.Resize(256),
162 |             transforms.CenterCrop(224),
163 |             transforms.ToTensor(),
164 |             normalize,
165 |         ])),
166 |         batch_size=args.batch_size, shuffle=False,
167 |         num_workers=args.workers, pin_memory=True)
168 | 
169 |     if args.evaluate:
170 |         validate(val_loader, model, criterion)
171 |         return
172 | 
173 |     for epoch in range(args.start_epoch, args.epochs):
174 |         if args.distributed:
175 |             train_sampler.set_epoch(epoch)
176 |         adjust_learning_rate(optimizer, epoch)
177 | 
178 |         # train for one epoch
179 |         train(train_loader, model, criterion, optimizer, epoch)
180 | 
181 |         # evaluate on validation set
182 |         prec1 = validate(val_loader, model, criterion)
183 | 
184 |         # remember best prec@1 and save checkpoint
185 |         is_best = prec1 > best_prec1
186 |         best_prec1 = max(prec1, best_prec1)
187 |         save_checkpoint({
188 |             'epoch': epoch + 1,
189 |             'arch': args.arch,
190 |             'state_dict': model.state_dict(),
191 |             'best_prec1': best_prec1,
192 |             'optimizer' : optimizer.state_dict(),
193 |         }, is_best)
194 | 
195 | 
196 | def train(train_loader, model, criterion, optimizer, epoch):
197 |     batch_time = AverageMeter()
198 |     data_time = AverageMeter()
199 |     losses = AverageMeter()
200 |     top1 = AverageMeter()
201 |     top5 = AverageMeter()
202 | 
203 |     # switch to train mode
204 |     model.train()
205 | 
206 |     end = time.time()
207 |     for i, (input, target) in enumerate(train_loader):
208 |         # measure data loading time
209 |         data_time.update(time.time() - end)
210 | 
211 |         if args.gpu is not None:
212 |             input = input.cuda(args.gpu, non_blocking=True)
213 |         target = target.cuda(args.gpu, non_blocking=True)
214 | 
215 |         # compute output
216 |         output = model(input)
217 |         loss = criterion(output, target)
218 | 
219 |         # measure accuracy and record loss
220 |         prec1, prec5 = accuracy(output, target, topk=(1, 5))
221 |         losses.update(loss.item(), input.size(0))
222 |         top1.update(prec1[0], input.size(0))
223 |         top5.update(prec5[0], input.size(0))
224 | 
225 |         # compute gradient and do SGD step
226 |         optimizer.zero_grad()
227 |         loss.backward()
228 |         optimizer.step()
229 | 
230 |         # measure elapsed time
231 |         batch_time.update(time.time() - end)
232 |         end = time.time()
233 | 
234 |         if i % args.print_freq == 0:
235 |             print('Epoch: [{0}][{1}/{2}]\t'
236 |                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
237 |                   'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
238 |                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
239 |                   'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
240 |                   'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
241 |                    epoch, i, len(train_loader), batch_time=batch_time,
242 |                    data_time=data_time, loss=losses, top1=top1, top5=top5))
243 | 
244 | 
245 | def validate(val_loader, model, criterion):
246 |     batch_time = AverageMeter()
247 |     losses = AverageMeter()
248 |     top1 = AverageMeter()
249 |     top5 = AverageMeter()
250 | 
251 |     # switch to evaluate mode
252 |     model.eval()
253 | 
254 |     with torch.no_grad():
255 |         end = time.time()
256 |         for i, (input, target) in enumerate(val_loader):
257 |             if args.gpu is not None:
258 |                 input = input.cuda(args.gpu, non_blocking=True)
259 |             target = target.cuda(args.gpu, non_blocking=True)
260 | 
261 |             # compute output
262 |             output = model(input)
263 |             loss = criterion(output, target)
264 | 
265 |             # measure accuracy and record loss
266 |             prec1, prec5 = accuracy(output, target, topk=(1, 5))
267 |             losses.update(loss.item(), input.size(0))
268 |             top1.update(prec1[0], input.size(0))
269 |             top5.update(prec5[0], input.size(0))
270 | 
271 |             # measure elapsed time
272 |             batch_time.update(time.time() - end)
273 |             end = time.time()
274 | 
275 |             if i % args.print_freq == 0:
276 |                 print('Test: [{0}/{1}]\t'
277 |                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
278 |                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
279 |                       'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
280 |                       'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
281 |                        i, len(val_loader), batch_time=batch_time, loss=losses,
282 |                        top1=top1, top5=top5))
283 | 
284 |         print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
285 |               .format(top1=top1, top5=top5))
286 | 
287 |     return top1.avg
288 | 
289 | 
290 | def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
291 |     torch.save(state, filename)
292 |     if is_best:
293 |         shutil.copyfile(filename, 'model_best.pth.tar')
294 | 
295 | 
296 | class AverageMeter(object):
297 |     """Computes and stores the average and current value"""
298 |     def __init__(self):
299 |         self.reset()
300 | 
301 |     def reset(self):
302 |         self.val = 0
303 |         self.avg = 0
304 |         self.sum = 0
305 |         self.count = 0
306 | 
307 |     def update(self, val, n=1):
308 |         self.val = val
309 |         self.sum += val * n
310 |         self.count += n
311 |         self.avg = self.sum / self.count
312 | 
313 | 
314 | def adjust_learning_rate(optimizer, epoch):
315 |     """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
316 |     lr = args.lr * (0.1 ** (epoch // 30))
317 |     for param_group in optimizer.param_groups:
318 |         param_group['lr'] = lr
319 | 
320 | 
321 | def accuracy(output, target, topk=(1,)):
322 |     """Computes the accuracy over the k top predictions for the specified values of k"""
323 |     with torch.no_grad():
324 |         maxk = max(topk)
325 |         batch_size = target.size(0)
326 | 
327 |         _, pred = output.topk(maxk, 1, True, True)
328 |         pred = pred.t()
329 |         correct = pred.eq(target.view(1, -1).expand_as(pred))
330 | 
331 |         res = []
332 |         for k in topk:
333 |             correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
334 |             res.append(correct_k.mul_(100.0 / batch_size))
335 |         return res
336 | 
337 | 
338 | if __name__ == '__main__':
339 |     main()
340 | 


--------------------------------------------------------------------------------
/word_language_model/notebooks/02_Inference.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "nbformat": 4,
   3 |   "nbformat_minor": 0,
   4 |   "metadata": {
   5 |     "colab": {
   6 |       "name": "02_Inference.ipynb",
   7 |       "version": "0.3.2",
   8 |       "provenance": [],
   9 |       "collapsed_sections": [],
  10 |       "toc_visible": true
  11 |     },
  12 |     "kernelspec": {
  13 |       "name": "python3",
  14 |       "display_name": "Python 3"
  15 |     }
  16 |   },
  17 |   "cells": [
  18 |     {
  19 |       "metadata": {
  20 |         "id": "15yLeJZw8ncp",
  21 |         "colab_type": "text"
  22 |       },
  23 |       "cell_type": "markdown",
  24 |       "source": [
  25 |         "## Prepare the Environment"
  26 |       ]
  27 |     },
  28 |     {
  29 |       "metadata": {
  30 |         "id": "b2uU_hgOtGur",
  31 |         "colab_type": "code",
  32 |         "colab": {
  33 |           "base_uri": "https://localhost:8080/",
  34 |           "height": 51
  35 |         },
  36 |         "outputId": "3f490e97-3902-4ebe-e1c6-f5166cc8b6a8"
  37 |       },
  38 |       "cell_type": "code",
  39 |       "source": [
  40 |         "!pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html"
  41 |       ],
  42 |       "execution_count": 6,
  43 |       "outputs": [
  44 |         {
  45 |           "output_type": "stream",
  46 |           "text": [
  47 |             "Looking in links: https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html\n",
  48 |             "Requirement already satisfied: torch_nightly in /usr/local/lib/python3.6/dist-packages (1.0.0.dev20181011)\n"
  49 |           ],
  50 |           "name": "stdout"
  51 |         }
  52 |       ]
  53 |     },
  54 |     {
  55 |       "metadata": {
  56 |         "id": "OSsB1s-p4kPc",
  57 |         "colab_type": "code",
  58 |         "colab": {
  59 |           "base_uri": "https://localhost:8080/",
  60 |           "height": 34
  61 |         },
  62 |         "outputId": "09363dc9-a617-41aa-f61e-1c44efcae94b"
  63 |       },
  64 |       "cell_type": "code",
  65 |       "source": [
  66 |         "!git clone https://github.com/ceshine/examples.git pytorch_examples"
  67 |       ],
  68 |       "execution_count": 7,
  69 |       "outputs": [
  70 |         {
  71 |           "output_type": "stream",
  72 |           "text": [
  73 |             "fatal: destination path 'pytorch_examples' already exists and is not an empty directory.\n"
  74 |           ],
  75 |           "name": "stdout"
  76 |         }
  77 |       ]
  78 |     },
  79 |     {
  80 |       "metadata": {
  81 |         "id": "vtJtfZas4oS6",
  82 |         "colab_type": "code",
  83 |         "colab": {
  84 |           "base_uri": "https://localhost:8080/",
  85 |           "height": 85
  86 |         },
  87 |         "outputId": "153c2cd0-cb8f-4de7-c22d-67e3ad706086"
  88 |       },
  89 |       "cell_type": "code",
  90 |       "source": [
  91 |         "%cd pytorch_examples/word_language_model\n",
  92 |         "%ls"
  93 |       ],
  94 |       "execution_count": 8,
  95 |       "outputs": [
  96 |         {
  97 |           "output_type": "stream",
  98 |           "text": [
  99 |             "/content/pytorch_examples/word_language_model\n",
 100 |             "\u001b[0m\u001b[01;34mdata\u001b[0m/        lm_model.pt   model.py      requirements.txt\n",
 101 |             "data.py      main.py       \u001b[01;34m__pycache__\u001b[0m/  train_new.log\n",
 102 |             "generate.py  model_new.pt  README.md\n"
 103 |           ],
 104 |           "name": "stdout"
 105 |         }
 106 |       ]
 107 |     },
 108 |     {
 109 |       "metadata": {
 110 |         "id": "yAAdydfL6vcs",
 111 |         "colab_type": "text"
 112 |       },
 113 |       "cell_type": "markdown",
 114 |       "source": [
 115 |         "Upload the trained model (from notebook 01_Training.ipynb):"
 116 |       ]
 117 |     },
 118 |     {
 119 |       "metadata": {
 120 |         "id": "PpgaP3x1icla",
 121 |         "colab_type": "text"
 122 |       },
 123 |       "cell_type": "markdown",
 124 |       "source": [
 125 |         ""
 126 |       ]
 127 |     },
 128 |     {
 129 |       "metadata": {
 130 |         "id": "9hByUNAB6F6_",
 131 |         "colab_type": "code",
 132 |         "colab": {
 133 |           "resources": {
 134 |             "http://localhost:8080/nbextensions/google.colab/files.js": {
 135 |               "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7Ci8vIE1heCBhbW91bnQgb2YgdGltZSB0byBibG9jayB3YWl0aW5nIGZvciB0aGUgdXNlci4KY29uc3QgRklMRV9DSEFOR0VfVElNRU9VVF9NUyA9IDMwICogMTAwMDsKCmZ1bmN0aW9uIF91cGxvYWRGaWxlcyhpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IHN0ZXBzID0gdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKTsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIC8vIENhY2hlIHN0ZXBzIG9uIHRoZSBvdXRwdXRFbGVtZW50IHRvIG1ha2UgaXQgYXZhaWxhYmxlIGZvciB0aGUgbmV4dCBjYWxsCiAgLy8gdG8gdXBsb2FkRmlsZXNDb250aW51ZSBmcm9tIFB5dGhvbi4KICBvdXRwdXRFbGVtZW50LnN0ZXBzID0gc3RlcHM7CgogIHJldHVybiBfdXBsb2FkRmlsZXNDb250aW51ZShvdXRwdXRJZCk7Cn0KCi8vIFRoaXMgaXMgcm91Z2hseSBhbiBhc3luYyBnZW5lcmF0b3IgKG5vdCBzdXBwb3J0ZWQgaW4gdGhlIGJyb3dzZXIgeWV0KSwKLy8gd2hlcmUgdGhlcmUgYXJlIG11bHRpcGxlIGFzeW5jaHJvbm91cyBzdGVwcyBhbmQgdGhlIFB5dGhvbiBzaWRlIGlzIGdvaW5nCi8vIHRvIHBvbGwgZm9yIGNvbXBsZXRpb24gb2YgZWFjaCBzdGVwLgovLyBUaGlzIHVzZXMgYSBQcm9taXNlIHRvIGJsb2NrIHRoZSBweXRob24gc2lkZSBvbiBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcCwKLy8gdGhlbiBwYXNzZXMgdGhlIHJlc3VsdCBvZiB0aGUgcHJldmlvdXMgc3RlcCBhcyB0aGUgaW5wdXQgdG8gdGhlIG5leHQgc3RlcC4KZnVuY3Rpb24gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpIHsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIGNvbnN0IHN0ZXBzID0gb3V0cHV0RWxlbWVudC5zdGVwczsKCiAgY29uc3QgbmV4dCA9IHN0ZXBzLm5leHQob3V0cHV0RWxlbWVudC5sYXN0UHJvbWlzZVZhbHVlKTsKICByZXR1cm4gUHJvbWlzZS5yZXNvbHZlKG5leHQudmFsdWUucHJvbWlzZSkudGhlbigodmFsdWUpID0+IHsKICAgIC8vIENhY2hlIHRoZSBsYXN0IHByb21pc2UgdmFsdWUgdG8gbWFrZSBpdCBhdmFpbGFibGUgdG8gdGhlIG5leHQKICAgIC8vIHN0ZXAgb2YgdGhlIGdlbmVyYXRvci4KICAgIG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSA9IHZhbHVlOwogICAgcmV0dXJuIG5leHQudmFsdWUucmVzcG9uc2U7CiAgfSk7Cn0KCi8qKgogKiBHZW5lcmF0b3IgZnVuY3Rpb24gd2hpY2ggaXMgY2FsbGVkIGJldHdlZW4gZWFjaCBhc3luYyBzdGVwIG9mIHRoZSB1cGxvYWQKICogcHJvY2Vzcy4KICogQHBhcmFtIHtzdHJpbmd9IGlucHV0SWQgRWxlbWVudCBJRCBvZiB0aGUgaW5wdXQgZmlsZSBwaWNrZXIgZWxlbWVudC4KICogQHBhcmFtIHtzdHJpbmd9IG91dHB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIG91dHB1dCBkaXNwbGF5LgogKiBAcmV0dXJuIHshSXRlcmFibGU8IU9iamVjdD59IEl0ZXJhYmxlIG9mIG5leHQgc3RlcHMuCiAqLwpmdW5jdGlvbiogdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKSB7CiAgY29uc3QgaW5wdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoaW5wdXRJZCk7CiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gZmFsc2U7CgogIGNvbnN0IG91dHB1dEVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50QnlJZChvdXRwdXRJZCk7CiAgb3V0cHV0RWxlbWVudC5pbm5lckhUTUwgPSAnJzsKCiAgY29uc3QgcGlja2VkUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBpbnB1dEVsZW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignY2hhbmdlJywgKGUpID0+IHsKICAgICAgcmVzb2x2ZShlLnRhcmdldC5maWxlcyk7CiAgICB9KTsKICB9KTsKCiAgY29uc3QgY2FuY2VsID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnYnV0dG9uJyk7CiAgaW5wdXRFbGVtZW50LnBhcmVudEVsZW1lbnQuYXBwZW5kQ2hpbGQoY2FuY2VsKTsKICBjYW5jZWwudGV4dENvbnRlbnQgPSAnQ2FuY2VsIHVwbG9hZCc7CiAgY29uc3QgY2FuY2VsUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBjYW5jZWwub25jbGljayA9ICgpID0+IHsKICAgICAgcmVzb2x2ZShudWxsKTsKICAgIH07CiAgfSk7CgogIC8vIENhbmNlbCB1cGxvYWQgaWYgdXNlciBoYXNuJ3QgcGlja2VkIGFueXRoaW5nIGluIHRpbWVvdXQuCiAgY29uc3QgdGltZW91dFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgc2V0VGltZW91dCgoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9LCBGSUxFX0NIQU5HRV9USU1FT1VUX01TKTsKICB9KTsKCiAgLy8gV2FpdCBmb3IgdGhlIHVzZXIgdG8gcGljayB0aGUgZmlsZXMuCiAgY29uc3QgZmlsZXMgPSB5aWVsZCB7CiAgICBwcm9taXNlOiBQcm9taXNlLnJhY2UoW3BpY2tlZFByb21pc2UsIHRpbWVvdXRQcm9taXNlLCBjYW5jZWxQcm9taXNlXSksCiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdzdGFydGluZycsCiAgICB9CiAgfTsKCiAgaWYgKCFmaWxlcykgewogICAgcmV0dXJuIHsKICAgICAgcmVzcG9uc2U6IHsKICAgICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICAgIH0KICAgIH07CiAgfQoKICBjYW5jZWwucmVtb3ZlKCk7CgogIC8vIERpc2FibGUgdGhlIGlucHV0IGVsZW1lbnQgc2luY2UgZnVydGhlciBwaWNrcyBhcmUgbm90IGFsbG93ZWQuCiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gdHJ1ZTsKCiAgZm9yIChjb25zdCBmaWxlIG9mIGZpbGVzKSB7CiAgICBjb25zdCBsaSA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2xpJyk7CiAgICBsaS5hcHBlbmQoc3BhbihmaWxlLm5hbWUsIHtmb250V2VpZ2h0OiAnYm9sZCd9KSk7CiAgICBsaS5hcHBlbmQoc3BhbigKICAgICAgICBgKCR7ZmlsZS50eXBlIHx8ICduL2EnfSkgLSAke2ZpbGUuc2l6ZX0gYnl0ZXMsIGAgKwogICAgICAgIGBsYXN0IG1vZGlmaWVkOiAkewogICAgICAgICAgICBmaWxlLmxhc3RNb2RpZmllZERhdGUgPyBmaWxlLmxhc3RNb2RpZmllZERhdGUudG9Mb2NhbGVEYXRlU3RyaW5nKCkgOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnbi9hJ30gLSBgKSk7CiAgICBjb25zdCBwZXJjZW50ID0gc3BhbignMCUgZG9uZScpOwogICAgbGkuYXBwZW5kQ2hpbGQocGVyY2VudCk7CgogICAgb3V0cHV0RWxlbWVudC5hcHBlbmRDaGlsZChsaSk7CgogICAgY29uc3QgZmlsZURhdGFQcm9taXNlID0gbmV3IFByb21pc2UoKHJlc29sdmUpID0+IHsKICAgICAgY29uc3QgcmVhZGVyID0gbmV3IEZpbGVSZWFkZXIoKTsKICAgICAgcmVhZGVyLm9ubG9hZCA9IChlKSA9PiB7CiAgICAgICAgcmVzb2x2ZShlLnRhcmdldC5yZXN1bHQpOwogICAgICB9OwogICAgICByZWFkZXIucmVhZEFzQXJyYXlCdWZmZXIoZmlsZSk7CiAgICB9KTsKICAgIC8vIFdhaXQgZm9yIHRoZSBkYXRhIHRvIGJlIHJlYWR5LgogICAgbGV0IGZpbGVEYXRhID0geWllbGQgewogICAgICBwcm9taXNlOiBmaWxlRGF0YVByb21pc2UsCiAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgYWN0aW9uOiAnY29udGludWUnLAogICAgICB9CiAgICB9OwoKICAgIC8vIFVzZSBhIGNodW5rZWQgc2VuZGluZyB0byBhdm9pZCBtZXNzYWdlIHNpemUgbGltaXRzLiBTZWUgYi82MjExNTY2MC4KICAgIGxldCBwb3NpdGlvbiA9IDA7CiAgICB3aGlsZSAocG9zaXRpb24gPCBmaWxlRGF0YS5ieXRlTGVuZ3RoKSB7CiAgICAgIGNvbnN0IGxlbmd0aCA9IE1hdGgubWluKGZpbGVEYXRhLmJ5dGVMZW5ndGggLSBwb3NpdGlvbiwgTUFYX1BBWUxPQURfU0laRSk7CiAgICAgIGNvbnN0IGNodW5rID0gbmV3IFVpbnQ4QXJyYXkoZmlsZURhdGEsIHBvc2l0aW9uLCBsZW5ndGgpOwogICAgICBwb3NpdGlvbiArPSBsZW5ndGg7CgogICAgICBjb25zdCBiYXNlNjQgPSBidG9hKFN0cmluZy5mcm9tQ2hhckNvZGUuYXBwbHkobnVsbCwgY2h1bmspKTsKICAgICAgeWllbGQgewogICAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgICBhY3Rpb246ICdhcHBlbmQnLAogICAgICAgICAgZmlsZTogZmlsZS5uYW1lLAogICAgICAgICAgZGF0YTogYmFzZTY0LAogICAgICAgIH0sCiAgICAgIH07CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPQogICAgICAgICAgYCR7TWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCl9JSBkb25lYDsKICAgIH0KICB9CgogIC8vIEFsbCBkb25lLgogIHlpZWxkIHsKICAgIHJlc3BvbnNlOiB7CiAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgIH0KICB9Owp9CgpzY29wZS5nb29nbGUgPSBzY29wZS5nb29nbGUgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYiA9IHNjb3BlLmdvb2dsZS5jb2xhYiB8fCB7fTsKc2NvcGUuZ29vZ2xlLmNvbGFiLl9maWxlcyA9IHsKICBfdXBsb2FkRmlsZXMsCiAgX3VwbG9hZEZpbGVzQ29udGludWUsCn07Cn0pKHNlbGYpOwo=",
 136 |               "ok": true,
 137 |               "headers": [
 138 |                 [
 139 |                   "content-type",
 140 |                   "application/javascript"
 141 |                 ]
 142 |               ],
 143 |               "status": 200,
 144 |               "status_text": ""
 145 |             }
 146 |           },
 147 |           "base_uri": "https://localhost:8080/",
 148 |           "height": 38
 149 |         },
 150 |         "outputId": "02a93afb-8cc1-45cc-bc27-6bb05c1f9260"
 151 |       },
 152 |       "cell_type": "code",
 153 |       "source": [
 154 |         "from google.colab import files\n",
 155 |         "\n",
 156 |         "uploaded = files.upload()\n",
 157 |         "\n",
 158 |         "for fn in uploaded.keys():\n",
 159 |         "  print('User uploaded file \"{name}\" with length {length} bytes'.format(\n",
 160 |         "      name=fn, length=len(uploaded[fn])))"
 161 |       ],
 162 |       "execution_count": 19,
 163 |       "outputs": [
 164 |         {
 165 |           "output_type": "display_data",
 166 |           "data": {
 167 |             "text/html": [
 168 |               "\n",
 169 |               "     <input type=\"file\" id=\"files-40f6f1ad-0502-4728-b078-da136ed1ef00\" name=\"files[]\" multiple disabled />\n",
 170 |               "     <output id=\"result-40f6f1ad-0502-4728-b078-da136ed1ef00\">\n",
 171 |               "      Upload widget is only available when the cell has been executed in the\n",
 172 |               "      current browser session. Please rerun this cell to enable.\n",
 173 |               "      </output>\n",
 174 |               "      <script src=\"/nbextensions/google.colab/files.js\"></script> "
 175 |             ],
 176 |             "text/plain": [
 177 |               "<IPython.core.display.HTML object>"
 178 |             ]
 179 |           },
 180 |           "metadata": {
 181 |             "tags": []
 182 |           }
 183 |         }
 184 |       ]
 185 |     },
 186 |     {
 187 |       "metadata": {
 188 |         "id": "M5fre8JFEO8R",
 189 |         "colab_type": "text"
 190 |       },
 191 |       "cell_type": "markdown",
 192 |       "source": [
 193 |         "The above did not work for me (because I constantly failed to download the entire file). Using gsutil instead here:"
 194 |       ]
 195 |     },
 196 |     {
 197 |       "metadata": {
 198 |         "id": "vNhKUTtMEXZZ",
 199 |         "colab_type": "code",
 200 |         "colab": {
 201 |           "base_uri": "https://localhost:8080/",
 202 |           "height": 34
 203 |         },
 204 |         "outputId": "74ceda78-abfe-4fb7-d278-b62bf0ed35d8"
 205 |       },
 206 |       "cell_type": "code",
 207 |       "source": [
 208 |         "from google.colab import auth\n",
 209 |         "auth.authenticate_user()\n",
 210 |         "\n",
 211 |         "# https://cloud.google.com/resource-manager/docs/creating-managing-projects\n",
 212 |         "project_id = 'personal-project-196600'\n",
 213 |         "!gcloud config set project {project_id}"
 214 |       ],
 215 |       "execution_count": 138,
 216 |       "outputs": [
 217 |         {
 218 |           "output_type": "stream",
 219 |           "text": [
 220 |             "Updated property [core/project].\n"
 221 |           ],
 222 |           "name": "stdout"
 223 |         }
 224 |       ]
 225 |     },
 226 |     {
 227 |       "metadata": {
 228 |         "id": "5GIVuGa7Ei4e",
 229 |         "colab_type": "code",
 230 |         "colab": {
 231 |           "base_uri": "https://localhost:8080/",
 232 |           "height": 68
 233 |         },
 234 |         "outputId": "5e591b04-690d-4e73-cbd9-2e28112624eb"
 235 |       },
 236 |       "cell_type": "code",
 237 |       "source": [
 238 |         "!gsutil cp  gs://ceshine-colab-tmp/lm_model.pt lm_model.pt"
 239 |       ],
 240 |       "execution_count": 140,
 241 |       "outputs": [
 242 |         {
 243 |           "output_type": "stream",
 244 |           "text": [
 245 |             "Copying gs://ceshine-colab-tmp/lm_model.pt...\n",
 246 |             "\\ [1 files][108.5 MiB/108.5 MiB]                                                \n",
 247 |             "Operation completed over 1 objects/108.5 MiB.                                    \n"
 248 |           ],
 249 |           "name": "stdout"
 250 |         }
 251 |       ]
 252 |     },
 253 |     {
 254 |       "metadata": {
 255 |         "id": "CZAw4xjq9wSH",
 256 |         "colab_type": "text"
 257 |       },
 258 |       "cell_type": "markdown",
 259 |       "source": [
 260 |         "Import libraries, functions and classes:"
 261 |       ]
 262 |     },
 263 |     {
 264 |       "metadata": {
 265 |         "id": "PX8bxlu_7wYX",
 266 |         "colab_type": "code",
 267 |         "colab": {}
 268 |       },
 269 |       "cell_type": "code",
 270 |       "source": [
 271 |         "import torch\n",
 272 |         "import numpy as np\n",
 273 |         "import pandas as pd\n",
 274 |         "\n",
 275 |         "from model import RNNModel\n",
 276 |         "from data import Dictionary, Corpus"
 277 |       ],
 278 |       "execution_count": 0,
 279 |       "outputs": []
 280 |     },
 281 |     {
 282 |       "metadata": {
 283 |         "id": "A5FDZG9y46of",
 284 |         "colab_type": "text"
 285 |       },
 286 |       "cell_type": "markdown",
 287 |       "source": [
 288 |         "## Prepare Dictionary"
 289 |       ]
 290 |     },
 291 |     {
 292 |       "metadata": {
 293 |         "id": "AOUPtIdJ-0qC",
 294 |         "colab_type": "code",
 295 |         "colab": {
 296 |           "base_uri": "https://localhost:8080/",
 297 |           "height": 102
 298 |         },
 299 |         "outputId": "711f133f-b466-48ff-ae07-56ebd84368dd"
 300 |       },
 301 |       "cell_type": "code",
 302 |       "source": [
 303 |         "DATA_PATH = \"./data/wikitext-2\"\n",
 304 |         "corpus = Corpus(DATA_PATH)\n",
 305 |         "\n",
 306 |         "print(\"Number of tokens:\")\n",
 307 |         "print(\"Train: \", len(corpus.train))\n",
 308 |         "print(\"Valid: \", len(corpus.valid))\n",
 309 |         "print(\"Test:  \", len(corpus.test))\n",
 310 |         "\n",
 311 |         "print(\"Vocabulary size:\", len(corpus.dictionary.idx2word))"
 312 |       ],
 313 |       "execution_count": 161,
 314 |       "outputs": [
 315 |         {
 316 |           "output_type": "stream",
 317 |           "text": [
 318 |             "Number of tokens:\n",
 319 |             "Train:  2075677\n",
 320 |             "Valid:  216347\n",
 321 |             "Test:   244102\n",
 322 |             "Vocabulary size: 33278\n"
 323 |           ],
 324 |           "name": "stdout"
 325 |         }
 326 |       ]
 327 |     },
 328 |     {
 329 |       "metadata": {
 330 |         "id": "FU4dIZ68_pxl",
 331 |         "colab_type": "text"
 332 |       },
 333 |       "cell_type": "markdown",
 334 |       "source": [
 335 |         "## Load Model"
 336 |       ]
 337 |     },
 338 |     {
 339 |       "metadata": {
 340 |         "id": "Yr54aXIS_PZO",
 341 |         "colab_type": "code",
 342 |         "colab": {}
 343 |       },
 344 |       "cell_type": "code",
 345 |       "source": [
 346 |         "DEVICE = torch.device(\"cpu\")\n",
 347 |         "# model = model.RNNModel(\n",
 348 |         "#     \"LSTM\", len(corpus.dictionary), 650,\n",
 349 |         "#     650, 2, 0.5, True\n",
 350 |         "# ).to(DEVICE)"
 351 |       ],
 352 |       "execution_count": 0,
 353 |       "outputs": []
 354 |     },
 355 |     {
 356 |       "metadata": {
 357 |         "id": "7QWwNQyPAfSa",
 358 |         "colab_type": "code",
 359 |         "colab": {}
 360 |       },
 361 |       "cell_type": "code",
 362 |       "source": [
 363 |         "with open(\"lm_model.pt\", 'rb') as f:\n",
 364 |         "    model = torch.load(f, map_location='cpu')\n",
 365 |         "model = model.to(DEVICE)"
 366 |       ],
 367 |       "execution_count": 0,
 368 |       "outputs": []
 369 |     },
 370 |     {
 371 |       "metadata": {
 372 |         "id": "BYerbMO0FEwd",
 373 |         "colab_type": "code",
 374 |         "colab": {
 375 |           "base_uri": "https://localhost:8080/",
 376 |           "height": 119
 377 |         },
 378 |         "outputId": "6d25897e-e2e4-4948-8794-333c98d09324"
 379 |       },
 380 |       "cell_type": "code",
 381 |       "source": [
 382 |         "model.eval()"
 383 |       ],
 384 |       "execution_count": 143,
 385 |       "outputs": [
 386 |         {
 387 |           "output_type": "execute_result",
 388 |           "data": {
 389 |             "text/plain": [
 390 |               "RNNModel(\n",
 391 |               "  (drop): Dropout(p=0.5)\n",
 392 |               "  (encoder): Embedding(33278, 650)\n",
 393 |               "  (rnn): LSTM(650, 650, num_layers=2, dropout=0.5)\n",
 394 |               "  (decoder): Linear(in_features=650, out_features=33278, bias=True)\n",
 395 |               ")"
 396 |             ]
 397 |           },
 398 |           "metadata": {
 399 |             "tags": []
 400 |           },
 401 |           "execution_count": 143
 402 |         }
 403 |       ]
 404 |     },
 405 |     {
 406 |       "metadata": {
 407 |         "id": "oJM06o9eFKpt",
 408 |         "colab_type": "text"
 409 |       },
 410 |       "cell_type": "markdown",
 411 |       "source": [
 412 |         "## Evaluate with Test Documents"
 413 |       ]
 414 |     },
 415 |     {
 416 |       "metadata": {
 417 |         "id": "z7EuOn6idH_7",
 418 |         "colab_type": "text"
 419 |       },
 420 |       "cell_type": "markdown",
 421 |       "source": [
 422 |         "### Calculate the Perplexity of the Test Predictions\n",
 423 |         "To confirm we have loaded the correct model."
 424 |       ]
 425 |     },
 426 |     {
 427 |       "metadata": {
 428 |         "id": "eFjgGcqcbLk6",
 429 |         "colab_type": "code",
 430 |         "colab": {
 431 |           "base_uri": "https://localhost:8080/",
 432 |           "height": 51
 433 |         },
 434 |         "outputId": "0d45ab36-fbdb-43df-93d2-707b9e8933e5"
 435 |       },
 436 |       "cell_type": "code",
 437 |       "source": [
 438 |         "%%time\n",
 439 |         "BPTT = 50\n",
 440 |         "CRITERION = torch.nn.CrossEntropyLoss()\n",
 441 |         "\n",
 442 |         "def batchify(data, bsz):\n",
 443 |         "    # Work out how cleanly we can divide the dataset into bsz parts.\n",
 444 |         "    nbatch = data.size(0) // bsz\n",
 445 |         "    # Trim off any extra elements that wouldn't cleanly fit (remainders).\n",
 446 |         "    data = data.narrow(0, 0, nbatch * bsz)\n",
 447 |         "    # Evenly divide the data across the bsz batches.\n",
 448 |         "    data = data.view(bsz, -1).t().contiguous()\n",
 449 |         "    return data.to(DEVICE)\n",
 450 |         "\n",
 451 |         "def get_batch(source, i):\n",
 452 |         "    seq_len = min(BPTT, len(source) - 1 - i)\n",
 453 |         "    data = source[i:i+seq_len]\n",
 454 |         "    target = source[i+1:i+1+seq_len].view(-1)\n",
 455 |         "    return data, target\n",
 456 |         "\n",
 457 |         "def evaluate(data_source):\n",
 458 |         "    # Turn on evaluation mode which disables dropout.\n",
 459 |         "    model.eval()\n",
 460 |         "    total_loss = 0.\n",
 461 |         "    ntokens = len(corpus.dictionary)\n",
 462 |         "    hidden = model.init_hidden(10)\n",
 463 |         "    with torch.no_grad():\n",
 464 |         "        for i in range(0, data_source.size(0) - 1, BPTT):\n",
 465 |         "            data, targets = get_batch(data_source, i)\n",
 466 |         "            output, hidden = model(data, hidden)\n",
 467 |         "            output_flat = output.view(-1, ntokens)\n",
 468 |         "            total_loss += len(data) * CRITERION(output_flat, targets).item()\n",
 469 |         "            hidden = repackage_hidden(hidden)\n",
 470 |         "    return total_loss / len(data_source)\n",
 471 |         "\n",
 472 |         "def repackage_hidden(h):\n",
 473 |         "    \"\"\"Wraps hidden states in new Tensors, to detach them from their history.\"\"\"\n",
 474 |         "    if isinstance(h, torch.Tensor):\n",
 475 |         "        return h.detach()\n",
 476 |         "    else:\n",
 477 |         "        return tuple(repackage_hidden(v) for v in h)\n",
 478 |         "    \n",
 479 |         "test_data = batchify(corpus.test, 10)\n",
 480 |         "loss = evaluate(test_data)"
 481 |       ],
 482 |       "execution_count": 146,
 483 |       "outputs": [
 484 |         {
 485 |           "output_type": "stream",
 486 |           "text": [
 487 |             "CPU times: user 5min 55s, sys: 1.68 s, total: 5min 57s\n",
 488 |             "Wall time: 5min 57s\n"
 489 |           ],
 490 |           "name": "stdout"
 491 |         }
 492 |       ]
 493 |     },
 494 |     {
 495 |       "metadata": {
 496 |         "id": "mt5E12qghArC",
 497 |         "colab_type": "code",
 498 |         "colab": {
 499 |           "base_uri": "https://localhost:8080/",
 500 |           "height": 34
 501 |         },
 502 |         "outputId": "13da0e7d-d3ce-44af-910d-d0a3a4265d90"
 503 |       },
 504 |       "cell_type": "code",
 505 |       "source": [
 506 |         "loss, np.exp(loss)"
 507 |       ],
 508 |       "execution_count": 147,
 509 |       "outputs": [
 510 |         {
 511 |           "output_type": "execute_result",
 512 |           "data": {
 513 |             "text/plain": [
 514 |               "(4.486460813329338, 88.8065859480267)"
 515 |             ]
 516 |           },
 517 |           "metadata": {
 518 |             "tags": []
 519 |           },
 520 |           "execution_count": 147
 521 |         }
 522 |       ]
 523 |     },
 524 |     {
 525 |       "metadata": {
 526 |         "id": "mRWnZR7oM9DO",
 527 |         "colab_type": "text"
 528 |       },
 529 |       "cell_type": "markdown",
 530 |       "source": [
 531 |         "### Check the Next Word Predictions"
 532 |       ]
 533 |     },
 534 |     {
 535 |       "metadata": {
 536 |         "id": "UYzjsoksGn59",
 537 |         "colab_type": "code",
 538 |         "colab": {
 539 |           "base_uri": "https://localhost:8080/",
 540 |           "height": 34
 541 |         },
 542 |         "outputId": "11a12226-12d2-4dcd-86ca-85de4ba11849"
 543 |       },
 544 |       "cell_type": "code",
 545 |       "source": [
 546 |         "test_tokens = corpus.test.numpy()\n",
 547 |         "eos_pos = np.where(test_tokens == corpus.dictionary.word2idx[\"<eos>\"])[0]\n",
 548 |         "print(\"Number of lines in test:\", len(eos_pos))"
 549 |       ],
 550 |       "execution_count": 148,
 551 |       "outputs": [
 552 |         {
 553 |           "output_type": "stream",
 554 |           "text": [
 555 |             "Number of lines in test: 2891\n"
 556 |           ],
 557 |           "name": "stdout"
 558 |         }
 559 |       ]
 560 |     },
 561 |     {
 562 |       "metadata": {
 563 |         "id": "DJRJZL-SF943",
 564 |         "colab_type": "code",
 565 |         "colab": {
 566 |           "base_uri": "https://localhost:8080/",
 567 |           "height": 54
 568 |         },
 569 |         "outputId": "9df5a937-86fa-4dae-aafa-65f87b123870"
 570 |       },
 571 |       "cell_type": "code",
 572 |       "source": [
 573 |         "# A random line from test dataset\n",
 574 |         "print(\" \".join([corpus.dictionary.idx2word[c] for c in test_tokens[eos_pos[28]+1:eos_pos[29]]]))"
 575 |       ],
 576 |       "execution_count": 149,
 577 |       "outputs": [
 578 |         {
 579 |           "output_type": "stream",
 580 |           "text": [
 581 |             "The An <unk> Rebellion began in December <unk> , and was not completely suppressed for almost eight years . It caused enormous disruption to Chinese society : the census of 754 recorded 52 @.@ 9 million people , but ten years later , the census counted just 16 @.@ 9 million , the remainder having been displaced or killed . During this time , Du Fu led a largely itinerant life <unk> by wars , associated <unk> and imperial <unk> . This period of <unk> was the making of Du Fu as a poet : Even Shan Chou has written that , \" What he saw around him — the lives of his family , neighbors , and strangers – what he heard , and what he hoped for or feared from the progress of various campaigns — these became the enduring themes of his poetry \" . Even when he learned of the death of his youngest child , he turned to the suffering of others in his poetry instead of dwelling upon his own <unk> . Du Fu wrote :\n"
 582 |           ],
 583 |           "name": "stdout"
 584 |         }
 585 |       ]
 586 |     },
 587 |     {
 588 |       "metadata": {
 589 |         "id": "4Lsf43zAFKDT",
 590 |         "colab_type": "code",
 591 |         "colab": {}
 592 |       },
 593 |       "cell_type": "code",
 594 |       "source": [
 595 |         "def eval_chunk(start, end):\n",
 596 |         "    token_tensor = corpus.test[eos_pos[start]+1:eos_pos[end]]\n",
 597 |         "    hidden = model.init_hidden(1)\n",
 598 |         "    with torch.no_grad():\n",
 599 |         "        targets = token_tensor[1:]\n",
 600 |         "        output, hidden = model(token_tensor.unsqueeze(1), hidden)\n",
 601 |         "        output_flat = output.squeeze(1)\n",
 602 |         "        loss = CRITERION(output_flat[:-1], targets).item()\n",
 603 |         "    \n",
 604 |         "    sorted_idx = np.argsort(output_flat.numpy(), 1)\n",
 605 |         "    preds = []\n",
 606 |         "    for i in range(1, 4):\n",
 607 |         "        preds.append(list(map(lambda x: corpus.dictionary.idx2word[x], sorted_idx[:, -i])))\n",
 608 |         "    # preds = list(map(lambda x: itos[x], np.argmax(logits.data.cpu().numpy(), 1)))\n",
 609 |         "    return (\n",
 610 |         "        loss,\n",
 611 |         "        pd.DataFrame({\n",
 612 |         "            \"orig\": [corpus.dictionary.idx2word[x] for x in token_tensor.numpy()] + [\" \"], \n",
 613 |         "            \"pred_1\": [\"\"] + preds[0], \"pred_2\": [\"\"] + preds[1], \"pred_3\": [\"\"] + preds[2]\n",
 614 |         "        })\n",
 615 |         "    )"
 616 |       ],
 617 |       "execution_count": 0,
 618 |       "outputs": []
 619 |     },
 620 |     {
 621 |       "metadata": {
 622 |         "id": "zwgGIiA8MwmO",
 623 |         "colab_type": "text"
 624 |       },
 625 |       "cell_type": "markdown",
 626 |       "source": [
 627 |         "Let's try using only one line:"
 628 |       ]
 629 |     },
 630 |     {
 631 |       "metadata": {
 632 |         "id": "_gbOxcgBLSv8",
 633 |         "colab_type": "code",
 634 |         "colab": {
 635 |           "base_uri": "https://localhost:8080/",
 636 |           "height": 1616
 637 |         },
 638 |         "outputId": "813549de-18af-4f60-85dd-ae5f9c1ef2ef"
 639 |       },
 640 |       "cell_type": "code",
 641 |       "source": [
 642 |         "loss, df = eval_chunk(28, 29)\n",
 643 |         "print(\"Loss:\", np.exp(loss))\n",
 644 |         "df.iloc[-50:]"
 645 |       ],
 646 |       "execution_count": 151,
 647 |       "outputs": [
 648 |         {
 649 |           "output_type": "stream",
 650 |           "text": [
 651 |             "Loss: 163.91555818335866\n"
 652 |           ],
 653 |           "name": "stdout"
 654 |         },
 655 |         {
 656 |           "output_type": "execute_result",
 657 |           "data": {
 658 |             "text/html": [
 659 |               "<div>\n",
 660 |               "<style scoped>\n",
 661 |               "    .dataframe tbody tr th:only-of-type {\n",
 662 |               "        vertical-align: middle;\n",
 663 |               "    }\n",
 664 |               "\n",
 665 |               "    .dataframe tbody tr th {\n",
 666 |               "        vertical-align: top;\n",
 667 |               "    }\n",
 668 |               "\n",
 669 |               "    .dataframe thead th {\n",
 670 |               "        text-align: right;\n",
 671 |               "    }\n",
 672 |               "</style>\n",
 673 |               "<table border=\"1\" class=\"dataframe\">\n",
 674 |               "  <thead>\n",
 675 |               "    <tr style=\"text-align: right;\">\n",
 676 |               "      <th></th>\n",
 677 |               "      <th>orig</th>\n",
 678 |               "      <th>pred_1</th>\n",
 679 |               "      <th>pred_2</th>\n",
 680 |               "      <th>pred_3</th>\n",
 681 |               "    </tr>\n",
 682 |               "  </thead>\n",
 683 |               "  <tbody>\n",
 684 |               "    <tr>\n",
 685 |               "      <th>133</th>\n",
 686 |               "      <td>progress</td>\n",
 687 |               "      <td>&lt;unk&gt;</td>\n",
 688 |               "      <td>world</td>\n",
 689 |               "      <td>time</td>\n",
 690 |               "    </tr>\n",
 691 |               "    <tr>\n",
 692 |               "      <th>134</th>\n",
 693 |               "      <td>of</td>\n",
 694 |               "      <td>of</td>\n",
 695 |               "      <td>.</td>\n",
 696 |               "      <td>,</td>\n",
 697 |               "    </tr>\n",
 698 |               "    <tr>\n",
 699 |               "      <th>135</th>\n",
 700 |               "      <td>various</td>\n",
 701 |               "      <td>the</td>\n",
 702 |               "      <td>his</td>\n",
 703 |               "      <td>a</td>\n",
 704 |               "    </tr>\n",
 705 |               "    <tr>\n",
 706 |               "      <th>136</th>\n",
 707 |               "      <td>campaigns</td>\n",
 708 |               "      <td>people</td>\n",
 709 |               "      <td>things</td>\n",
 710 |               "      <td>&lt;unk&gt;</td>\n",
 711 |               "    </tr>\n",
 712 |               "    <tr>\n",
 713 |               "      <th>137</th>\n",
 714 |               "      <td>—</td>\n",
 715 |               "      <td>.</td>\n",
 716 |               "      <td>,</td>\n",
 717 |               "      <td>\"</td>\n",
 718 |               "    </tr>\n",
 719 |               "    <tr>\n",
 720 |               "      <th>138</th>\n",
 721 |               "      <td>these</td>\n",
 722 |               "      <td>and</td>\n",
 723 |               "      <td>the</td>\n",
 724 |               "      <td>\"</td>\n",
 725 |               "    </tr>\n",
 726 |               "    <tr>\n",
 727 |               "      <th>139</th>\n",
 728 |               "      <td>became</td>\n",
 729 |               "      <td>are</td>\n",
 730 |               "      <td>were</td>\n",
 731 |               "      <td>people</td>\n",
 732 |               "    </tr>\n",
 733 |               "    <tr>\n",
 734 |               "      <th>140</th>\n",
 735 |               "      <td>the</td>\n",
 736 |               "      <td>a</td>\n",
 737 |               "      <td>the</td>\n",
 738 |               "      <td>more</td>\n",
 739 |               "    </tr>\n",
 740 |               "    <tr>\n",
 741 |               "      <th>141</th>\n",
 742 |               "      <td>enduring</td>\n",
 743 |               "      <td>most</td>\n",
 744 |               "      <td>&lt;unk&gt;</td>\n",
 745 |               "      <td>first</td>\n",
 746 |               "    </tr>\n",
 747 |               "    <tr>\n",
 748 |               "      <th>142</th>\n",
 749 |               "      <td>themes</td>\n",
 750 |               "      <td>&lt;unk&gt;</td>\n",
 751 |               "      <td>subject</td>\n",
 752 |               "      <td>thing</td>\n",
 753 |               "    </tr>\n",
 754 |               "    <tr>\n",
 755 |               "      <th>143</th>\n",
 756 |               "      <td>of</td>\n",
 757 |               "      <td>of</td>\n",
 758 |               "      <td>in</td>\n",
 759 |               "      <td>.</td>\n",
 760 |               "    </tr>\n",
 761 |               "    <tr>\n",
 762 |               "      <th>144</th>\n",
 763 |               "      <td>his</td>\n",
 764 |               "      <td>the</td>\n",
 765 |               "      <td>his</td>\n",
 766 |               "      <td>all</td>\n",
 767 |               "    </tr>\n",
 768 |               "    <tr>\n",
 769 |               "      <th>145</th>\n",
 770 |               "      <td>poetry</td>\n",
 771 |               "      <td>life</td>\n",
 772 |               "      <td>own</td>\n",
 773 |               "      <td>&lt;unk&gt;</td>\n",
 774 |               "    </tr>\n",
 775 |               "    <tr>\n",
 776 |               "      <th>146</th>\n",
 777 |               "      <td>\"</td>\n",
 778 |               "      <td>.</td>\n",
 779 |               "      <td>,</td>\n",
 780 |               "      <td>and</td>\n",
 781 |               "    </tr>\n",
 782 |               "    <tr>\n",
 783 |               "      <th>147</th>\n",
 784 |               "      <td>.</td>\n",
 785 |               "      <td>.</td>\n",
 786 |               "      <td>,</td>\n",
 787 |               "      <td>and</td>\n",
 788 |               "    </tr>\n",
 789 |               "    <tr>\n",
 790 |               "      <th>148</th>\n",
 791 |               "      <td>Even</td>\n",
 792 |               "      <td>&lt;eos&gt;</td>\n",
 793 |               "      <td>The</td>\n",
 794 |               "      <td>He</td>\n",
 795 |               "    </tr>\n",
 796 |               "    <tr>\n",
 797 |               "      <th>149</th>\n",
 798 |               "      <td>when</td>\n",
 799 |               "      <td>though</td>\n",
 800 |               "      <td>in</td>\n",
 801 |               "      <td>after</td>\n",
 802 |               "    </tr>\n",
 803 |               "    <tr>\n",
 804 |               "      <th>150</th>\n",
 805 |               "      <td>he</td>\n",
 806 |               "      <td>he</td>\n",
 807 |               "      <td>the</td>\n",
 808 |               "      <td>,</td>\n",
 809 |               "    </tr>\n",
 810 |               "    <tr>\n",
 811 |               "      <th>151</th>\n",
 812 |               "      <td>learned</td>\n",
 813 |               "      <td>was</td>\n",
 814 |               "      <td>had</td>\n",
 815 |               "      <td>died</td>\n",
 816 |               "    </tr>\n",
 817 |               "    <tr>\n",
 818 |               "      <th>152</th>\n",
 819 |               "      <td>of</td>\n",
 820 |               "      <td>of</td>\n",
 821 |               "      <td>that</td>\n",
 822 |               "      <td>about</td>\n",
 823 |               "    </tr>\n",
 824 |               "    <tr>\n",
 825 |               "      <th>153</th>\n",
 826 |               "      <td>the</td>\n",
 827 |               "      <td>the</td>\n",
 828 |               "      <td>his</td>\n",
 829 |               "      <td>her</td>\n",
 830 |               "    </tr>\n",
 831 |               "    <tr>\n",
 832 |               "      <th>154</th>\n",
 833 |               "      <td>death</td>\n",
 834 |               "      <td>&lt;unk&gt;</td>\n",
 835 |               "      <td>book</td>\n",
 836 |               "      <td>death</td>\n",
 837 |               "    </tr>\n",
 838 |               "    <tr>\n",
 839 |               "      <th>155</th>\n",
 840 |               "      <td>of</td>\n",
 841 |               "      <td>of</td>\n",
 842 |               "      <td>,</td>\n",
 843 |               "      <td>he</td>\n",
 844 |               "    </tr>\n",
 845 |               "    <tr>\n",
 846 |               "      <th>156</th>\n",
 847 |               "      <td>his</td>\n",
 848 |               "      <td>his</td>\n",
 849 |               "      <td>the</td>\n",
 850 |               "      <td>a</td>\n",
 851 |               "    </tr>\n",
 852 |               "    <tr>\n",
 853 |               "      <th>157</th>\n",
 854 |               "      <td>youngest</td>\n",
 855 |               "      <td>wife</td>\n",
 856 |               "      <td>father</td>\n",
 857 |               "      <td>brother</td>\n",
 858 |               "    </tr>\n",
 859 |               "    <tr>\n",
 860 |               "      <th>158</th>\n",
 861 |               "      <td>child</td>\n",
 862 |               "      <td>wife</td>\n",
 863 |               "      <td>son</td>\n",
 864 |               "      <td>brother</td>\n",
 865 |               "    </tr>\n",
 866 |               "    <tr>\n",
 867 |               "      <th>159</th>\n",
 868 |               "      <td>,</td>\n",
 869 |               "      <td>,</td>\n",
 870 |               "      <td>in</td>\n",
 871 |               "      <td>and</td>\n",
 872 |               "    </tr>\n",
 873 |               "    <tr>\n",
 874 |               "      <th>160</th>\n",
 875 |               "      <td>he</td>\n",
 876 |               "      <td>he</td>\n",
 877 |               "      <td>the</td>\n",
 878 |               "      <td>John</td>\n",
 879 |               "    </tr>\n",
 880 |               "    <tr>\n",
 881 |               "      <th>161</th>\n",
 882 |               "      <td>turned</td>\n",
 883 |               "      <td>was</td>\n",
 884 |               "      <td>had</td>\n",
 885 |               "      <td>wrote</td>\n",
 886 |               "    </tr>\n",
 887 |               "    <tr>\n",
 888 |               "      <th>162</th>\n",
 889 |               "      <td>to</td>\n",
 890 |               "      <td>out</td>\n",
 891 |               "      <td>to</td>\n",
 892 |               "      <td>down</td>\n",
 893 |               "    </tr>\n",
 894 |               "    <tr>\n",
 895 |               "      <th>163</th>\n",
 896 |               "      <td>the</td>\n",
 897 |               "      <td>the</td>\n",
 898 |               "      <td>a</td>\n",
 899 |               "      <td>&lt;unk&gt;</td>\n",
 900 |               "    </tr>\n",
 901 |               "    <tr>\n",
 902 |               "      <th>164</th>\n",
 903 |               "      <td>suffering</td>\n",
 904 |               "      <td>&lt;unk&gt;</td>\n",
 905 |               "      <td>public</td>\n",
 906 |               "      <td>house</td>\n",
 907 |               "    </tr>\n",
 908 |               "    <tr>\n",
 909 |               "      <th>165</th>\n",
 910 |               "      <td>of</td>\n",
 911 |               "      <td>of</td>\n",
 912 |               "      <td>and</td>\n",
 913 |               "      <td>,</td>\n",
 914 |               "    </tr>\n",
 915 |               "    <tr>\n",
 916 |               "      <th>166</th>\n",
 917 |               "      <td>others</td>\n",
 918 |               "      <td>his</td>\n",
 919 |               "      <td>the</td>\n",
 920 |               "      <td>a</td>\n",
 921 |               "    </tr>\n",
 922 |               "    <tr>\n",
 923 |               "      <th>167</th>\n",
 924 |               "      <td>in</td>\n",
 925 |               "      <td>,</td>\n",
 926 |               "      <td>and</td>\n",
 927 |               "      <td>.</td>\n",
 928 |               "    </tr>\n",
 929 |               "    <tr>\n",
 930 |               "      <th>168</th>\n",
 931 |               "      <td>his</td>\n",
 932 |               "      <td>the</td>\n",
 933 |               "      <td>his</td>\n",
 934 |               "      <td>a</td>\n",
 935 |               "    </tr>\n",
 936 |               "    <tr>\n",
 937 |               "      <th>169</th>\n",
 938 |               "      <td>poetry</td>\n",
 939 |               "      <td>life</td>\n",
 940 |               "      <td>own</td>\n",
 941 |               "      <td>&lt;unk&gt;</td>\n",
 942 |               "    </tr>\n",
 943 |               "    <tr>\n",
 944 |               "      <th>170</th>\n",
 945 |               "      <td>instead</td>\n",
 946 |               "      <td>.</td>\n",
 947 |               "      <td>,</td>\n",
 948 |               "      <td>and</td>\n",
 949 |               "    </tr>\n",
 950 |               "    <tr>\n",
 951 |               "      <th>171</th>\n",
 952 |               "      <td>of</td>\n",
 953 |               "      <td>of</td>\n",
 954 |               "      <td>.</td>\n",
 955 |               "      <td>,</td>\n",
 956 |               "    </tr>\n",
 957 |               "    <tr>\n",
 958 |               "      <th>172</th>\n",
 959 |               "      <td>dwelling</td>\n",
 960 |               "      <td>his</td>\n",
 961 |               "      <td>the</td>\n",
 962 |               "      <td>a</td>\n",
 963 |               "    </tr>\n",
 964 |               "    <tr>\n",
 965 |               "      <th>173</th>\n",
 966 |               "      <td>upon</td>\n",
 967 |               "      <td>.</td>\n",
 968 |               "      <td>,</td>\n",
 969 |               "      <td>and</td>\n",
 970 |               "    </tr>\n",
 971 |               "    <tr>\n",
 972 |               "      <th>174</th>\n",
 973 |               "      <td>his</td>\n",
 974 |               "      <td>his</td>\n",
 975 |               "      <td>the</td>\n",
 976 |               "      <td>a</td>\n",
 977 |               "    </tr>\n",
 978 |               "    <tr>\n",
 979 |               "      <th>175</th>\n",
 980 |               "      <td>own</td>\n",
 981 |               "      <td>death</td>\n",
 982 |               "      <td>arrival</td>\n",
 983 |               "      <td>&lt;unk&gt;</td>\n",
 984 |               "    </tr>\n",
 985 |               "    <tr>\n",
 986 |               "      <th>176</th>\n",
 987 |               "      <td>&lt;unk&gt;</td>\n",
 988 |               "      <td>death</td>\n",
 989 |               "      <td>&lt;unk&gt;</td>\n",
 990 |               "      <td>work</td>\n",
 991 |               "    </tr>\n",
 992 |               "    <tr>\n",
 993 |               "      <th>177</th>\n",
 994 |               "      <td>.</td>\n",
 995 |               "      <td>.</td>\n",
 996 |               "      <td>,</td>\n",
 997 |               "      <td>and</td>\n",
 998 |               "    </tr>\n",
 999 |               "    <tr>\n",
1000 |               "      <th>178</th>\n",
1001 |               "      <td>Du</td>\n",
1002 |               "      <td>&lt;eos&gt;</td>\n",
1003 |               "      <td>The</td>\n",
1004 |               "      <td>He</td>\n",
1005 |               "    </tr>\n",
1006 |               "    <tr>\n",
1007 |               "      <th>179</th>\n",
1008 |               "      <td>Fu</td>\n",
1009 |               "      <td>&lt;unk&gt;</td>\n",
1010 |               "      <td>Braun</td>\n",
1011 |               "      <td>Jarl</td>\n",
1012 |               "    </tr>\n",
1013 |               "    <tr>\n",
1014 |               "      <th>180</th>\n",
1015 |               "      <td>wrote</td>\n",
1016 |               "      <td>,</td>\n",
1017 |               "      <td>was</td>\n",
1018 |               "      <td>and</td>\n",
1019 |               "    </tr>\n",
1020 |               "    <tr>\n",
1021 |               "      <th>181</th>\n",
1022 |               "      <td>:</td>\n",
1023 |               "      <td>a</td>\n",
1024 |               "      <td>the</td>\n",
1025 |               "      <td>that</td>\n",
1026 |               "    </tr>\n",
1027 |               "    <tr>\n",
1028 |               "      <th>182</th>\n",
1029 |               "      <td></td>\n",
1030 |               "      <td>\"</td>\n",
1031 |               "      <td>&lt;eos&gt;</td>\n",
1032 |               "      <td>'</td>\n",
1033 |               "    </tr>\n",
1034 |               "  </tbody>\n",
1035 |               "</table>\n",
1036 |               "</div>"
1037 |             ],
1038 |             "text/plain": [
1039 |               "          orig  pred_1   pred_2   pred_3\n",
1040 |               "133   progress   <unk>    world     time\n",
1041 |               "134         of      of        .        ,\n",
1042 |               "135    various     the      his        a\n",
1043 |               "136  campaigns  people   things    <unk>\n",
1044 |               "137          —       .        ,        \"\n",
1045 |               "138      these     and      the        \"\n",
1046 |               "139     became     are     were   people\n",
1047 |               "140        the       a      the     more\n",
1048 |               "141   enduring    most    <unk>    first\n",
1049 |               "142     themes   <unk>  subject    thing\n",
1050 |               "143         of      of       in        .\n",
1051 |               "144        his     the      his      all\n",
1052 |               "145     poetry    life      own    <unk>\n",
1053 |               "146          \"       .        ,      and\n",
1054 |               "147          .       .        ,      and\n",
1055 |               "148       Even   <eos>      The       He\n",
1056 |               "149       when  though       in    after\n",
1057 |               "150         he      he      the        ,\n",
1058 |               "151    learned     was      had     died\n",
1059 |               "152         of      of     that    about\n",
1060 |               "153        the     the      his      her\n",
1061 |               "154      death   <unk>     book    death\n",
1062 |               "155         of      of        ,       he\n",
1063 |               "156        his     his      the        a\n",
1064 |               "157   youngest    wife   father  brother\n",
1065 |               "158      child    wife      son  brother\n",
1066 |               "159          ,       ,       in      and\n",
1067 |               "160         he      he      the     John\n",
1068 |               "161     turned     was      had    wrote\n",
1069 |               "162         to     out       to     down\n",
1070 |               "163        the     the        a    <unk>\n",
1071 |               "164  suffering   <unk>   public    house\n",
1072 |               "165         of      of      and        ,\n",
1073 |               "166     others     his      the        a\n",
1074 |               "167         in       ,      and        .\n",
1075 |               "168        his     the      his        a\n",
1076 |               "169     poetry    life      own    <unk>\n",
1077 |               "170    instead       .        ,      and\n",
1078 |               "171         of      of        .        ,\n",
1079 |               "172   dwelling     his      the        a\n",
1080 |               "173       upon       .        ,      and\n",
1081 |               "174        his     his      the        a\n",
1082 |               "175        own   death  arrival    <unk>\n",
1083 |               "176      <unk>   death    <unk>     work\n",
1084 |               "177          .       .        ,      and\n",
1085 |               "178         Du   <eos>      The       He\n",
1086 |               "179         Fu   <unk>    Braun     Jarl\n",
1087 |               "180      wrote       ,      was      and\n",
1088 |               "181          :       a      the     that\n",
1089 |               "182                  \"    <eos>        '"
1090 |             ]
1091 |           },
1092 |           "metadata": {
1093 |             "tags": []
1094 |           },
1095 |           "execution_count": 151
1096 |         }
1097 |       ]
1098 |     },
1099 |     {
1100 |       "metadata": {
1101 |         "id": "5NT3hejgMzcH",
1102 |         "colab_type": "text"
1103 |       },
1104 |       "cell_type": "markdown",
1105 |       "source": [
1106 |         "Now try providing more context:"
1107 |       ]
1108 |     },
1109 |     {
1110 |       "metadata": {
1111 |         "id": "hIw64ToYMUJp",
1112 |         "colab_type": "code",
1113 |         "colab": {
1114 |           "base_uri": "https://localhost:8080/",
1115 |           "height": 1616
1116 |         },
1117 |         "outputId": "f8b76fb2-dad3-4eb4-cd47-a419ef260f34"
1118 |       },
1119 |       "cell_type": "code",
1120 |       "source": [
1121 |         "loss, df = eval_chunk(28, 34)\n",
1122 |         "print(\"Loss:\", np.exp(loss))\n",
1123 |         "df.iloc[-50:]"
1124 |       ],
1125 |       "execution_count": 152,
1126 |       "outputs": [
1127 |         {
1128 |           "output_type": "stream",
1129 |           "text": [
1130 |             "Loss: 104.32415212207026\n"
1131 |           ],
1132 |           "name": "stdout"
1133 |         },
1134 |         {
1135 |           "output_type": "execute_result",
1136 |           "data": {
1137 |             "text/html": [
1138 |               "<div>\n",
1139 |               "<style scoped>\n",
1140 |               "    .dataframe tbody tr th:only-of-type {\n",
1141 |               "        vertical-align: middle;\n",
1142 |               "    }\n",
1143 |               "\n",
1144 |               "    .dataframe tbody tr th {\n",
1145 |               "        vertical-align: top;\n",
1146 |               "    }\n",
1147 |               "\n",
1148 |               "    .dataframe thead th {\n",
1149 |               "        text-align: right;\n",
1150 |               "    }\n",
1151 |               "</style>\n",
1152 |               "<table border=\"1\" class=\"dataframe\">\n",
1153 |               "  <thead>\n",
1154 |               "    <tr style=\"text-align: right;\">\n",
1155 |               "      <th></th>\n",
1156 |               "      <th>orig</th>\n",
1157 |               "      <th>pred_1</th>\n",
1158 |               "      <th>pred_2</th>\n",
1159 |               "      <th>pred_3</th>\n",
1160 |               "    </tr>\n",
1161 |               "  </thead>\n",
1162 |               "  <tbody>\n",
1163 |               "    <tr>\n",
1164 |               "      <th>489</th>\n",
1165 |               "      <td>in</td>\n",
1166 |               "      <td>to</td>\n",
1167 |               "      <td>the</td>\n",
1168 |               "      <td>a</td>\n",
1169 |               "    </tr>\n",
1170 |               "    <tr>\n",
1171 |               "      <th>490</th>\n",
1172 |               "      <td>the</td>\n",
1173 |               "      <td>the</td>\n",
1174 |               "      <td>a</td>\n",
1175 |               "      <td>his</td>\n",
1176 |               "    </tr>\n",
1177 |               "    <tr>\n",
1178 |               "      <th>491</th>\n",
1179 |               "      <td>summer</td>\n",
1180 |               "      <td>&lt;unk&gt;</td>\n",
1181 |               "      <td>middle</td>\n",
1182 |               "      <td>morning</td>\n",
1183 |               "    </tr>\n",
1184 |               "    <tr>\n",
1185 |               "      <th>492</th>\n",
1186 |               "      <td>of</td>\n",
1187 |               "      <td>of</td>\n",
1188 |               "      <td>and</td>\n",
1189 |               "      <td>,</td>\n",
1190 |               "    </tr>\n",
1191 |               "    <tr>\n",
1192 |               "      <th>493</th>\n",
1193 |               "      <td>&lt;unk&gt;</td>\n",
1194 |               "      <td>1918</td>\n",
1195 |               "      <td>the</td>\n",
1196 |               "      <td>1916</td>\n",
1197 |               "    </tr>\n",
1198 |               "    <tr>\n",
1199 |               "      <th>494</th>\n",
1200 |               "      <td>;</td>\n",
1201 |               "      <td>,</td>\n",
1202 |               "      <td>and</td>\n",
1203 |               "      <td>.</td>\n",
1204 |               "    </tr>\n",
1205 |               "    <tr>\n",
1206 |               "      <th>495</th>\n",
1207 |               "      <td>this</td>\n",
1208 |               "      <td>he</td>\n",
1209 |               "      <td>the</td>\n",
1210 |               "      <td>his</td>\n",
1211 |               "    </tr>\n",
1212 |               "    <tr>\n",
1213 |               "      <th>496</th>\n",
1214 |               "      <td>has</td>\n",
1215 |               "      <td>was</td>\n",
1216 |               "      <td>time</td>\n",
1217 |               "      <td>is</td>\n",
1218 |               "    </tr>\n",
1219 |               "    <tr>\n",
1220 |               "      <th>497</th>\n",
1221 |               "      <td>traditionally</td>\n",
1222 |               "      <td>been</td>\n",
1223 |               "      <td>a</td>\n",
1224 |               "      <td>also</td>\n",
1225 |               "    </tr>\n",
1226 |               "    <tr>\n",
1227 |               "      <th>498</th>\n",
1228 |               "      <td>been</td>\n",
1229 |               "      <td>been</td>\n",
1230 |               "      <td>occurred</td>\n",
1231 |               "      <td>come</td>\n",
1232 |               "    </tr>\n",
1233 |               "    <tr>\n",
1234 |               "      <th>499</th>\n",
1235 |               "      <td>ascribed</td>\n",
1236 |               "      <td>described</td>\n",
1237 |               "      <td>a</td>\n",
1238 |               "      <td>used</td>\n",
1239 |               "    </tr>\n",
1240 |               "    <tr>\n",
1241 |               "      <th>500</th>\n",
1242 |               "      <td>to</td>\n",
1243 |               "      <td>to</td>\n",
1244 |               "      <td>by</td>\n",
1245 |               "      <td>the</td>\n",
1246 |               "    </tr>\n",
1247 |               "    <tr>\n",
1248 |               "      <th>501</th>\n",
1249 |               "      <td>famine</td>\n",
1250 |               "      <td>the</td>\n",
1251 |               "      <td>his</td>\n",
1252 |               "      <td>a</td>\n",
1253 |               "    </tr>\n",
1254 |               "    <tr>\n",
1255 |               "      <th>502</th>\n",
1256 |               "      <td>,</td>\n",
1257 |               "      <td>.</td>\n",
1258 |               "      <td>,</td>\n",
1259 |               "      <td>and</td>\n",
1260 |               "    </tr>\n",
1261 |               "    <tr>\n",
1262 |               "      <th>503</th>\n",
1263 |               "      <td>but</td>\n",
1264 |               "      <td>and</td>\n",
1265 |               "      <td>but</td>\n",
1266 |               "      <td>as</td>\n",
1267 |               "    </tr>\n",
1268 |               "    <tr>\n",
1269 |               "      <th>504</th>\n",
1270 |               "      <td>&lt;unk&gt;</td>\n",
1271 |               "      <td>the</td>\n",
1272 |               "      <td>he</td>\n",
1273 |               "      <td>it</td>\n",
1274 |               "    </tr>\n",
1275 |               "    <tr>\n",
1276 |               "      <th>505</th>\n",
1277 |               "      <td>believes</td>\n",
1278 |               "      <td>,</td>\n",
1279 |               "      <td>&lt;unk&gt;</td>\n",
1280 |               "      <td>the</td>\n",
1281 |               "    </tr>\n",
1282 |               "    <tr>\n",
1283 |               "      <th>506</th>\n",
1284 |               "      <td>that</td>\n",
1285 |               "      <td>that</td>\n",
1286 |               "      <td>the</td>\n",
1287 |               "      <td>he</td>\n",
1288 |               "    </tr>\n",
1289 |               "    <tr>\n",
1290 |               "      <th>507</th>\n",
1291 |               "      <td>frustration</td>\n",
1292 |               "      <td>the</td>\n",
1293 |               "      <td>he</td>\n",
1294 |               "      <td>his</td>\n",
1295 |               "    </tr>\n",
1296 |               "    <tr>\n",
1297 |               "      <th>508</th>\n",
1298 |               "      <td>is</td>\n",
1299 |               "      <td>in</td>\n",
1300 |               "      <td>from</td>\n",
1301 |               "      <td>was</td>\n",
1302 |               "    </tr>\n",
1303 |               "    <tr>\n",
1304 |               "      <th>509</th>\n",
1305 |               "      <td>a</td>\n",
1306 |               "      <td>not</td>\n",
1307 |               "      <td>the</td>\n",
1308 |               "      <td>a</td>\n",
1309 |               "    </tr>\n",
1310 |               "    <tr>\n",
1311 |               "      <th>510</th>\n",
1312 |               "      <td>more</td>\n",
1313 |               "      <td>\"</td>\n",
1314 |               "      <td>&lt;unk&gt;</td>\n",
1315 |               "      <td>great</td>\n",
1316 |               "    </tr>\n",
1317 |               "    <tr>\n",
1318 |               "      <th>511</th>\n",
1319 |               "      <td>likely</td>\n",
1320 |               "      <td>important</td>\n",
1321 |               "      <td>&lt;unk&gt;</td>\n",
1322 |               "      <td>powerful</td>\n",
1323 |               "    </tr>\n",
1324 |               "    <tr>\n",
1325 |               "      <th>512</th>\n",
1326 |               "      <td>reason</td>\n",
1327 |               "      <td>&lt;unk&gt;</td>\n",
1328 |               "      <td>part</td>\n",
1329 |               "      <td>subject</td>\n",
1330 |               "    </tr>\n",
1331 |               "    <tr>\n",
1332 |               "      <th>513</th>\n",
1333 |               "      <td>.</td>\n",
1334 |               "      <td>for</td>\n",
1335 |               "      <td>to</td>\n",
1336 |               "      <td>.</td>\n",
1337 |               "    </tr>\n",
1338 |               "    <tr>\n",
1339 |               "      <th>514</th>\n",
1340 |               "      <td>He</td>\n",
1341 |               "      <td>&lt;eos&gt;</td>\n",
1342 |               "      <td>He</td>\n",
1343 |               "      <td>The</td>\n",
1344 |               "    </tr>\n",
1345 |               "    <tr>\n",
1346 |               "      <th>515</th>\n",
1347 |               "      <td>next</td>\n",
1348 |               "      <td>was</td>\n",
1349 |               "      <td>also</td>\n",
1350 |               "      <td>is</td>\n",
1351 |               "    </tr>\n",
1352 |               "    <tr>\n",
1353 |               "      <th>516</th>\n",
1354 |               "      <td>spent</td>\n",
1355 |               "      <td>,</td>\n",
1356 |               "      <td>&lt;unk&gt;</td>\n",
1357 |               "      <td>was</td>\n",
1358 |               "    </tr>\n",
1359 |               "    <tr>\n",
1360 |               "      <th>517</th>\n",
1361 |               "      <td>around</td>\n",
1362 |               "      <td>the</td>\n",
1363 |               "      <td>a</td>\n",
1364 |               "      <td>his</td>\n",
1365 |               "    </tr>\n",
1366 |               "    <tr>\n",
1367 |               "      <th>518</th>\n",
1368 |               "      <td>six</td>\n",
1369 |               "      <td>the</td>\n",
1370 |               "      <td>a</td>\n",
1371 |               "      <td>his</td>\n",
1372 |               "    </tr>\n",
1373 |               "    <tr>\n",
1374 |               "      <th>519</th>\n",
1375 |               "      <td>weeks</td>\n",
1376 |               "      <td>years</td>\n",
1377 |               "      <td>months</td>\n",
1378 |               "      <td>days</td>\n",
1379 |               "    </tr>\n",
1380 |               "    <tr>\n",
1381 |               "      <th>520</th>\n",
1382 |               "      <td>in</td>\n",
1383 |               "      <td>,</td>\n",
1384 |               "      <td>of</td>\n",
1385 |               "      <td>in</td>\n",
1386 |               "    </tr>\n",
1387 |               "    <tr>\n",
1388 |               "      <th>521</th>\n",
1389 |               "      <td>&lt;unk&gt;</td>\n",
1390 |               "      <td>the</td>\n",
1391 |               "      <td>his</td>\n",
1392 |               "      <td>a</td>\n",
1393 |               "    </tr>\n",
1394 |               "    <tr>\n",
1395 |               "      <th>522</th>\n",
1396 |               "      <td>(</td>\n",
1397 |               "      <td>,</td>\n",
1398 |               "      <td>and</td>\n",
1399 |               "      <td>.</td>\n",
1400 |               "    </tr>\n",
1401 |               "    <tr>\n",
1402 |               "      <th>523</th>\n",
1403 |               "      <td>now</td>\n",
1404 |               "      <td>&lt;unk&gt;</td>\n",
1405 |               "      <td>a</td>\n",
1406 |               "      <td>and</td>\n",
1407 |               "    </tr>\n",
1408 |               "    <tr>\n",
1409 |               "      <th>524</th>\n",
1410 |               "      <td>&lt;unk&gt;</td>\n",
1411 |               "      <td>&lt;unk&gt;</td>\n",
1412 |               "      <td>the</td>\n",
1413 |               "      <td>a</td>\n",
1414 |               "    </tr>\n",
1415 |               "    <tr>\n",
1416 |               "      <th>525</th>\n",
1417 |               "      <td>,</td>\n",
1418 |               "      <td>)</td>\n",
1419 |               "      <td>,</td>\n",
1420 |               "      <td>&lt;unk&gt;</td>\n",
1421 |               "    </tr>\n",
1422 |               "    <tr>\n",
1423 |               "      <th>526</th>\n",
1424 |               "      <td>Gansu</td>\n",
1425 |               "      <td>&lt;unk&gt;</td>\n",
1426 |               "      <td>the</td>\n",
1427 |               "      <td>now</td>\n",
1428 |               "    </tr>\n",
1429 |               "    <tr>\n",
1430 |               "      <th>527</th>\n",
1431 |               "      <td>province</td>\n",
1432 |               "      <td>,</td>\n",
1433 |               "      <td>)</td>\n",
1434 |               "      <td>and</td>\n",
1435 |               "    </tr>\n",
1436 |               "    <tr>\n",
1437 |               "      <th>528</th>\n",
1438 |               "      <td>)</td>\n",
1439 |               "      <td>)</td>\n",
1440 |               "      <td>,</td>\n",
1441 |               "      <td>and</td>\n",
1442 |               "    </tr>\n",
1443 |               "    <tr>\n",
1444 |               "      <th>529</th>\n",
1445 |               "      <td>,</td>\n",
1446 |               "      <td>,</td>\n",
1447 |               "      <td>and</td>\n",
1448 |               "      <td>.</td>\n",
1449 |               "    </tr>\n",
1450 |               "    <tr>\n",
1451 |               "      <th>530</th>\n",
1452 |               "      <td>where</td>\n",
1453 |               "      <td>and</td>\n",
1454 |               "      <td>where</td>\n",
1455 |               "      <td>but</td>\n",
1456 |               "    </tr>\n",
1457 |               "    <tr>\n",
1458 |               "      <th>531</th>\n",
1459 |               "      <td>he</td>\n",
1460 |               "      <td>he</td>\n",
1461 |               "      <td>the</td>\n",
1462 |               "      <td>his</td>\n",
1463 |               "    </tr>\n",
1464 |               "    <tr>\n",
1465 |               "      <th>532</th>\n",
1466 |               "      <td>wrote</td>\n",
1467 |               "      <td>was</td>\n",
1468 |               "      <td>had</td>\n",
1469 |               "      <td>died</td>\n",
1470 |               "    </tr>\n",
1471 |               "    <tr>\n",
1472 |               "      <th>533</th>\n",
1473 |               "      <td>more</td>\n",
1474 |               "      <td>to</td>\n",
1475 |               "      <td>a</td>\n",
1476 |               "      <td>the</td>\n",
1477 |               "    </tr>\n",
1478 |               "    <tr>\n",
1479 |               "      <th>534</th>\n",
1480 |               "      <td>than</td>\n",
1481 |               "      <td>than</td>\n",
1482 |               "      <td>of</td>\n",
1483 |               "      <td>&lt;unk&gt;</td>\n",
1484 |               "    </tr>\n",
1485 |               "    <tr>\n",
1486 |               "      <th>535</th>\n",
1487 |               "      <td>sixty</td>\n",
1488 |               "      <td>a</td>\n",
1489 |               "      <td>one</td>\n",
1490 |               "      <td>the</td>\n",
1491 |               "    </tr>\n",
1492 |               "    <tr>\n",
1493 |               "      <th>536</th>\n",
1494 |               "      <td>poems</td>\n",
1495 |               "      <td>years</td>\n",
1496 |               "      <td>@-@</td>\n",
1497 |               "      <td>men</td>\n",
1498 |               "    </tr>\n",
1499 |               "    <tr>\n",
1500 |               "      <th>537</th>\n",
1501 |               "      <td>.</td>\n",
1502 |               "      <td>,</td>\n",
1503 |               "      <td>.</td>\n",
1504 |               "      <td>and</td>\n",
1505 |               "    </tr>\n",
1506 |               "    <tr>\n",
1507 |               "      <th>538</th>\n",
1508 |               "      <td></td>\n",
1509 |               "      <td>&lt;eos&gt;</td>\n",
1510 |               "      <td>He</td>\n",
1511 |               "      <td>The</td>\n",
1512 |               "    </tr>\n",
1513 |               "  </tbody>\n",
1514 |               "</table>\n",
1515 |               "</div>"
1516 |             ],
1517 |             "text/plain": [
1518 |               "              orig     pred_1    pred_2    pred_3\n",
1519 |               "489             in         to       the         a\n",
1520 |               "490            the        the         a       his\n",
1521 |               "491         summer      <unk>    middle   morning\n",
1522 |               "492             of         of       and         ,\n",
1523 |               "493          <unk>       1918       the      1916\n",
1524 |               "494              ;          ,       and         .\n",
1525 |               "495           this         he       the       his\n",
1526 |               "496            has        was      time        is\n",
1527 |               "497  traditionally       been         a      also\n",
1528 |               "498           been       been  occurred      come\n",
1529 |               "499       ascribed  described         a      used\n",
1530 |               "500             to         to        by       the\n",
1531 |               "501         famine        the       his         a\n",
1532 |               "502              ,          .         ,       and\n",
1533 |               "503            but        and       but        as\n",
1534 |               "504          <unk>        the        he        it\n",
1535 |               "505       believes          ,     <unk>       the\n",
1536 |               "506           that       that       the        he\n",
1537 |               "507    frustration        the        he       his\n",
1538 |               "508             is         in      from       was\n",
1539 |               "509              a        not       the         a\n",
1540 |               "510           more          \"     <unk>     great\n",
1541 |               "511         likely  important     <unk>  powerful\n",
1542 |               "512         reason      <unk>      part   subject\n",
1543 |               "513              .        for        to         .\n",
1544 |               "514             He      <eos>        He       The\n",
1545 |               "515           next        was      also        is\n",
1546 |               "516          spent          ,     <unk>       was\n",
1547 |               "517         around        the         a       his\n",
1548 |               "518            six        the         a       his\n",
1549 |               "519          weeks      years    months      days\n",
1550 |               "520             in          ,        of        in\n",
1551 |               "521          <unk>        the       his         a\n",
1552 |               "522              (          ,       and         .\n",
1553 |               "523            now      <unk>         a       and\n",
1554 |               "524          <unk>      <unk>       the         a\n",
1555 |               "525              ,          )         ,     <unk>\n",
1556 |               "526          Gansu      <unk>       the       now\n",
1557 |               "527       province          ,         )       and\n",
1558 |               "528              )          )         ,       and\n",
1559 |               "529              ,          ,       and         .\n",
1560 |               "530          where        and     where       but\n",
1561 |               "531             he         he       the       his\n",
1562 |               "532          wrote        was       had      died\n",
1563 |               "533           more         to         a       the\n",
1564 |               "534           than       than        of     <unk>\n",
1565 |               "535          sixty          a       one       the\n",
1566 |               "536          poems      years       @-@       men\n",
1567 |               "537              .          ,         .       and\n",
1568 |               "538                     <eos>        He       The"
1569 |             ]
1570 |           },
1571 |           "metadata": {
1572 |             "tags": []
1573 |           },
1574 |           "execution_count": 152
1575 |         }
1576 |       ]
1577 |     },
1578 |     {
1579 |       "metadata": {
1580 |         "id": "n1J3aRTNNCtJ",
1581 |         "colab_type": "text"
1582 |       },
1583 |       "cell_type": "markdown",
1584 |       "source": [
1585 |         "### Try to Generate Texts"
1586 |       ]
1587 |     },
1588 |     {
1589 |       "metadata": {
1590 |         "id": "5vZ1f4HVUHRH",
1591 |         "colab_type": "code",
1592 |         "colab": {
1593 |           "base_uri": "https://localhost:8080/",
1594 |           "height": 34
1595 |         },
1596 |         "outputId": "6e813046-6f40-4626-f68d-6abc8e1dbbb0"
1597 |       },
1598 |       "cell_type": "code",
1599 |       "source": [
1600 |         "UNK = corpus.dictionary.word2idx[\"<unk>\"]\n",
1601 |         "UNK"
1602 |       ],
1603 |       "execution_count": 153,
1604 |       "outputs": [
1605 |         {
1606 |           "output_type": "execute_result",
1607 |           "data": {
1608 |             "text/plain": [
1609 |               "9"
1610 |             ]
1611 |           },
1612 |           "metadata": {
1613 |             "tags": []
1614 |           },
1615 |           "execution_count": 153
1616 |         }
1617 |       ]
1618 |     },
1619 |     {
1620 |       "metadata": {
1621 |         "id": "RcC-UdUZVrD2",
1622 |         "colab_type": "text"
1623 |       },
1624 |       "cell_type": "markdown",
1625 |       "source": [
1626 |         "#### Greedy Selection"
1627 |       ]
1628 |     },
1629 |     {
1630 |       "metadata": {
1631 |         "id": "A-1CnFWVNGTX",
1632 |         "colab_type": "code",
1633 |         "colab": {}
1634 |       },
1635 |       "cell_type": "code",
1636 |       "source": [
1637 |         "def generate_text_from_chunk(start, end, target_length=20):\n",
1638 |         "    \"\"\"Greedy selection of the next token.\"\"\"\n",
1639 |         "    token_tensor = corpus.test[eos_pos[start]+1:eos_pos[end]]\n",
1640 |         "    return generate_text_from_tensor(token_tensor, target_length)\n",
1641 |         "    \n",
1642 |         "def generate_text_from_tensor(token_tensor, target_length):\n",
1643 |         "    hidden = model.init_hidden(1)\n",
1644 |         "    output, hidden = model(token_tensor.unsqueeze(1), hidden)\n",
1645 |         "    index = output[-1, -0, :].argmax()\n",
1646 |         "    res = [index.numpy()]\n",
1647 |         "    with torch.no_grad():    \n",
1648 |         "        for i in range(target_length):\n",
1649 |         "            output, hidden = model(index.unsqueeze(0).unsqueeze(0), hidden)\n",
1650 |         "            index = output[-1, 0, ].argmax()\n",
1651 |         "            res.append(index.numpy())\n",
1652 |         "    return [\n",
1653 |         "        [\n",
1654 |         "           corpus.dictionary.idx2word[x] for x in arr            \n",
1655 |         "        ] for arr in (token_tensor.numpy(), res)\n",
1656 |         "    ]"
1657 |       ],
1658 |       "execution_count": 0,
1659 |       "outputs": []
1660 |     },
1661 |     {
1662 |       "metadata": {
1663 |         "id": "QceMDqccPTNl",
1664 |         "colab_type": "code",
1665 |         "colab": {
1666 |           "base_uri": "https://localhost:8080/",
1667 |           "height": 51
1668 |         },
1669 |         "outputId": "e4a60054-a406-4e24-cbdc-cecf13d0782e"
1670 |       },
1671 |       "cell_type": "code",
1672 |       "source": [
1673 |         "context, new_texts = generate_text_from_chunk(28, 29)\n",
1674 |         "print(\" \".join(context[-10:]))\n",
1675 |         "print(\" \".join(new_texts))"
1676 |       ],
1677 |       "execution_count": 156,
1678 |       "outputs": [
1679 |         {
1680 |           "output_type": "stream",
1681 |           "text": [
1682 |             "dwelling upon his own <unk> . Du Fu wrote :\n",
1683 |             "\" I 'm not going to be a <unk> , and I am not going to be a <unk> . \"\n"
1684 |           ],
1685 |           "name": "stdout"
1686 |         }
1687 |       ]
1688 |     },
1689 |     {
1690 |       "metadata": {
1691 |         "id": "L_eca7VhS76z",
1692 |         "colab_type": "code",
1693 |         "colab": {
1694 |           "base_uri": "https://localhost:8080/",
1695 |           "height": 51
1696 |         },
1697 |         "outputId": "e5b88ba5-295a-42ec-91f9-27d78b917065"
1698 |       },
1699 |       "cell_type": "code",
1700 |       "source": [
1701 |         "context, new_texts = generate_text_from_chunk(28, 38)\n",
1702 |         "print(\" \".join(context[-10:]))\n",
1703 |         "print(\" \".join(new_texts))"
1704 |       ],
1705 |       "execution_count": 162,
1706 |       "outputs": [
1707 |         {
1708 |           "output_type": "stream",
1709 |           "text": [
1710 |             "Fu financially and employed him as his unofficial secretary .\n",
1711 |             "The Latin chronicler John C. <unk> also described him as his \" liberal @-@ confident \" . He described them\n"
1712 |           ],
1713 |           "name": "stdout"
1714 |         }
1715 |       ]
1716 |     },
1717 |     {
1718 |       "metadata": {
1719 |         "id": "A7LXS_pnVvnY",
1720 |         "colab_type": "text"
1721 |       },
1722 |       "cell_type": "markdown",
1723 |       "source": [
1724 |         "#### Sampling from the Predicted Distribution with a Temeperature Knob"
1725 |       ]
1726 |     },
1727 |     {
1728 |       "metadata": {
1729 |         "id": "kSBkyqogV-M7",
1730 |         "colab_type": "code",
1731 |         "colab": {}
1732 |       },
1733 |       "cell_type": "code",
1734 |       "source": [
1735 |         "def generate_text_from_chunk(start, end, target_length=20, temperature=1.0):\n",
1736 |         "    token_tensor = corpus.test[eos_pos[start]+1:eos_pos[end]]\n",
1737 |         "    return generate_text_from_tensor(token_tensor, target_length, temperature)\n",
1738 |         "    \n",
1739 |         "\n",
1740 |         "def generate_text_from_tensor(token_tensor, target_length, temperature):\n",
1741 |         "    \"\"\"Sampling from the softmax distribution.\"\"\"    \n",
1742 |         "    hidden = model.init_hidden(1)\n",
1743 |         "    _, hidden = model(token_tensor[:-1].unsqueeze(1), hidden)\n",
1744 |         "    input_tensor = torch.zeros((1, 1)).long().to(DEVICE)\n",
1745 |         "    input_tensor[0, 0].fill_(token_tensor[-1])\n",
1746 |         "    res = []\n",
1747 |         "    with torch.no_grad():    \n",
1748 |         "        for i in range(target_length):            \n",
1749 |         "            output, hidden = model(input_tensor, hidden)\n",
1750 |         "            word_weights = output.squeeze().div(temperature).exp()\n",
1751 |         "            word_idx = torch.multinomial(word_weights, 1)[0]\n",
1752 |         "            input_tensor[0, 0].fill_(word_idx)\n",
1753 |         "            res.append(word_idx.item())\n",
1754 |         "    return [\n",
1755 |         "        [\n",
1756 |         "           corpus.dictionary.idx2word[x] for x in arr            \n",
1757 |         "        ] for arr in (token_tensor.numpy(), res)\n",
1758 |         "    ]"
1759 |       ],
1760 |       "execution_count": 0,
1761 |       "outputs": []
1762 |     },
1763 |     {
1764 |       "metadata": {
1765 |         "id": "qpZVvNGpXHxj",
1766 |         "colab_type": "code",
1767 |         "colab": {
1768 |           "base_uri": "https://localhost:8080/",
1769 |           "height": 119
1770 |         },
1771 |         "outputId": "4861f329-bb55-45d4-b5ea-2d822065194f"
1772 |       },
1773 |       "cell_type": "code",
1774 |       "source": [
1775 |         "context, new_texts = generate_text_from_chunk(28, 33, target_length=50)\n",
1776 |         "print(\" \".join(context[-10:]))\n",
1777 |         "for i in range(0, len(new_texts), 10):\n",
1778 |         "    print(\" \".join(new_texts[i:i+10]))"
1779 |       ],
1780 |       "execution_count": 172,
1781 |       "outputs": [
1782 |         {
1783 |           "output_type": "stream",
1784 |           "text": [
1785 |             "bring more papers to pile higher on my desk .\n",
1786 |             "\" <unk> ( two ) and Cristina 's army in\n",
1787 |             "<unk> where all historians discovered that the German sniper was\n",
1788 |             "still <unk> from and one out of the Sisler children\n",
1789 |             ". A brother <unk> , the friend of Richard ,\n",
1790 |             "senior of the island , was therefore procured in the\n"
1791 |           ],
1792 |           "name": "stdout"
1793 |         }
1794 |       ]
1795 |     },
1796 |     {
1797 |       "metadata": {
1798 |         "id": "uataLNWWYqK8",
1799 |         "colab_type": "code",
1800 |         "colab": {}
1801 |       },
1802 |       "cell_type": "code",
1803 |       "source": [
1804 |         "def generate_text_from_texts(texts, target_length=20, temperature=1.0):\n",
1805 |         "    \"\"\"texts needs to be tokens seperated by space characters.\"\"\"\n",
1806 |         "    token_tensor = torch.LongTensor([\n",
1807 |         "        corpus.dictionary.word2idx[x] for x in texts.split(\" \")\n",
1808 |         "    ]).to(DEVICE)\n",
1809 |         "    return generate_text_from_tensor(token_tensor, target_length, temperature)"
1810 |       ],
1811 |       "execution_count": 0,
1812 |       "outputs": []
1813 |     },
1814 |     {
1815 |       "metadata": {
1816 |         "id": "4DNHQVUQjE2w",
1817 |         "colab_type": "code",
1818 |         "colab": {
1819 |           "base_uri": "https://localhost:8080/",
1820 |           "height": 204
1821 |         },
1822 |         "outputId": "577a9287-dc35-42bb-ccca-8d8f17fc5332"
1823 |       },
1824 |       "cell_type": "code",
1825 |       "source": [
1826 |         "context, new_texts =  generate_text_from_texts(\"In the fall of 1944 , <unk> enrolled at the University of Michigan . The United Press syndicate\", target_length=100)\n",
1827 |         "print(\" \".join(context[-10:]))\n",
1828 |         "for i in range(0, len(new_texts), 10):\n",
1829 |         "    print(\" \".join(new_texts[i:i+10]))"
1830 |       ],
1831 |       "execution_count": 173,
1832 |       "outputs": [
1833 |         {
1834 |           "output_type": "stream",
1835 |           "text": [
1836 |             "at the University of Michigan . The United Press syndicate\n",
1837 |             "and officials was interpreted by the searing complaints being used\n",
1838 |             "as the musician by another mixed review , but expressed\n",
1839 |             "concern that the laws would be found out in the\n",
1840 |             "United States and during a transmission control of the same\n",
1841 |             "second landscapes . Lisa that he managed to visit the\n",
1842 |             "relationship with Carey and Marvel 's general president for food\n",
1843 |             "was \" desperate and looking , based on their own\n",
1844 |             "wing . \" Asked in this , the company was\n",
1845 |             "told by the US Bureau of Education , who decided\n",
1846 |             ", and eventually admitted to the 1920s , and \"\n"
1847 |           ],
1848 |           "name": "stdout"
1849 |         }
1850 |       ]
1851 |     },
1852 |     {
1853 |       "metadata": {
1854 |         "id": "S4jHkK4NjaFm",
1855 |         "colab_type": "code",
1856 |         "colab": {}
1857 |       },
1858 |       "cell_type": "code",
1859 |       "source": [
1860 |         ""
1861 |       ],
1862 |       "execution_count": 0,
1863 |       "outputs": []
1864 |     }
1865 |   ]
1866 | }


--------------------------------------------------------------------------------