├── mnist
    ├── requirements.txt
    ├── README.md
    └── main.py
├── snli
    ├── requirements.txt
    ├── util.py
    ├── model.py
    └── train.py
├── word_language_model
    ├── requirements.txt
    ├── README.md
    ├── data.py
    ├── model.py
    ├── generate.py
    └── main.py
├── imagenet
    ├── requirements.txt
    ├── README.md
    └── main.py
├── dcgan
    ├── requirements.txt
    ├── README.md
    └── main.py
├── mnist_hogwild
    ├── requirements.txt
    ├── main.py
    └── train.py
├── .gitignore
├── vae
    ├── requirements.txt
    ├── README.md
    └── main.py
├── reinforcement_learning
    ├── requirements.txt
    ├── README.md
    ├── reinforce.py
    └── actor_critic.py
├── OpenNMT
    ├── onmt
    │   ├── modules
    │   │   ├── __init__.py
    │   │   └── GlobalAttention.py
    │   ├── Constants.py
    │   ├── __init__.py
    │   ├── Dataset.py
    │   ├── Optim.py
    │   ├── Beam.py
    │   ├── Dict.py
    │   ├── Models.py
    │   └── Translator.py
    ├── LICENSE.md
    ├── README.md
    ├── translate.py
    ├── preprocess.py
    └── train.py
├── regression
    ├── README.md
    └── main.py
├── README.md
├── super_resolution
    ├── dataset.py
    ├── super_resolve.py
    ├── model.py
    ├── README.md
    ├── data.py
    └── main.py
└── LICENSE


/mnist/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | 


--------------------------------------------------------------------------------
/snli/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchtext
3 | 


--------------------------------------------------------------------------------
/word_language_model/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | 


--------------------------------------------------------------------------------
/imagenet/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | 


--------------------------------------------------------------------------------
/dcgan/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | lmdb
4 | 


--------------------------------------------------------------------------------
/mnist_hogwild/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dcgan/data
2 | data
3 | *.pyc
4 | OpenNMT/data
5 | 


--------------------------------------------------------------------------------
/vae/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | tqdm
4 | six
5 | 


--------------------------------------------------------------------------------
/reinforcement_learning/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | numpy
3 | gym
4 | 


--------------------------------------------------------------------------------
/OpenNMT/onmt/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from onmt.modules.GlobalAttention import GlobalAttention
2 | 


--------------------------------------------------------------------------------
/regression/README.md:
--------------------------------------------------------------------------------
1 | # Linear regression example
2 | 
3 | Trains a single fully-connected layer to fit a 4th degree polynomial.
4 | 


--------------------------------------------------------------------------------
/OpenNMT/onmt/Constants.py:
--------------------------------------------------------------------------------
 1 | 
 2 | PAD = 0
 3 | UNK = 1
 4 | BOS = 2
 5 | EOS = 3
 6 | 
 7 | PAD_WORD = '<blank>'
 8 | UNK_WORD = '<unk>'
 9 | BOS_WORD = '<s>'
10 | EOS_WORD = '</s>'
11 | 


--------------------------------------------------------------------------------
/mnist/README.md:
--------------------------------------------------------------------------------
1 | # Basic MNIST Example
2 | 
3 | ```bash
4 | pip install -r requirements.txt
5 | python main.py
6 | # CUDA_VISIBLE_DEVICES=2 python main.py  # to specify GPU id to ex. 2
7 | ```
8 | 


--------------------------------------------------------------------------------
/OpenNMT/onmt/__init__.py:
--------------------------------------------------------------------------------
1 | import onmt.Constants
2 | import onmt.Models
3 | from onmt.Translator import Translator
4 | from onmt.Dataset import Dataset
5 | from onmt.Optim import Optim
6 | from onmt.Dict import Dict
7 | from onmt.Beam import Beam
8 | 


--------------------------------------------------------------------------------
/reinforcement_learning/README.md:
--------------------------------------------------------------------------------
 1 | # Reinforcement learning training example
 2 | 
 3 | ```bash
 4 | pip install -r requirements.txt
 5 | # For REINFORCE:
 6 | python reinforce.py
 7 | # For actor critic:
 8 | python actor_critic.py
 9 | ```
10 | 


--------------------------------------------------------------------------------
/vae/README.md:
--------------------------------------------------------------------------------
 1 | # Basic VAE Example
 2 | 
 3 | This is an improved implementation of the paper [Stochastic Gradient VB and the
 4 | Variational Auto-Encoder](http://arxiv.org/abs/1312.6114) by Kingma and Welling.
 5 | It uses ReLUs and the adam optimizer, instead of sigmoids and adagrad. These changes make the network converge much faster.
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | python main.py
10 | ```
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch Examples
 2 | 
 3 | A repository showcasing examples of using pytorch
 4 | 
 5 | - MNIST Convnets
 6 | - Word level Language Modeling using LSTM RNNs
 7 | - Training Imagenet Classifiers with Residual Networks
 8 | - Generative Adversarial Networks (DCGAN)
 9 | - Variational Auto-Encoders
10 | - Superresolution using an efficient sub-pixel convolutional neural network
11 | - Hogwild training of shared ConvNets across multiple processes on MNIST
12 | - Training a CartPole to balance in OpenAI Gym with actor-critic
13 | - Natural Language Inference (SNLI) with GloVe vectors, LSTMs, and torchtext
14 | - Neural Machine Translation using sequence-to-sequence RNN with attention (OpenNMT)
15 | 


--------------------------------------------------------------------------------
/OpenNMT/LICENSE.md:
--------------------------------------------------------------------------------
 1 | This software is derived from the OpenNMT project at 
 2 | https://github.com/OpenNMT/OpenNMT.
 3 | 
 4 | The MIT License (MIT)
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/super_resolution/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | 
 3 | from os import listdir
 4 | from os.path import join
 5 | from PIL import Image
 6 | 
 7 | 
 8 | def is_image_file(filename):
 9 |     return any(filename.endswith(extension) for extension in [".png", ".jpg", ".jpeg"])
10 | 
11 | 
12 | def load_img(filepath):
13 |     img = Image.open(filepath).convert('YCbCr')
14 |     y, _, _ = img.split()
15 |     return y
16 | 
17 | 
18 | class DatasetFromFolder(data.Dataset):
19 |     def __init__(self, image_dir, input_transform=None, target_transform=None):
20 |         super(DatasetFromFolder, self).__init__()
21 |         self.image_filenames = [join(image_dir, x) for x in listdir(image_dir) if is_image_file(x)]
22 | 
23 |         self.input_transform = input_transform
24 |         self.target_transform = target_transform
25 | 
26 |     def __getitem__(self, index):
27 |         input = load_img(self.image_filenames[index])
28 |         target = input.copy()
29 |         if self.input_transform:
30 |             input = self.input_transform(input)
31 |         if self.target_transform:
32 |             target = self.target_transform(target)
33 | 
34 |         return input, target
35 | 
36 |     def __len__(self):
37 |         return len(self.image_filenames)
38 | 


--------------------------------------------------------------------------------
/word_language_model/README.md:
--------------------------------------------------------------------------------
 1 | # Word-level language modeling RNN
 2 | 
 3 | This example trains a multi-layer RNN (Elman, GRU, or LSTM) on a language modeling task.
 4 | By default, the training script uses the PTB dataset, provided.
 5 | The trained model can then be used by the generate script to generate new text.
 6 | 
 7 | ```bash
 8 | python main.py --cuda  # Train an LSTM on ptb with cuda (cuDNN). Should reach perplexity of 113
 9 | python generate.py     # Generate samples from the trained LSTM model.
10 | ```
11 | 
12 | The model uses the `nn.RNN` module (and its sister modules `nn.GRU` and `nn.LSTM`)
13 | which will automatically use the cuDNN backend if run on CUDA with cuDNN installed.
14 | 
15 | The `main.py` script accepts the following arguments:
16 | 
17 | ```bash
18 | optional arguments:
19 |   -h, --help         show this help message and exit
20 |   --data DATA        location of the data corpus
21 |   --model MODEL      type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)
22 |   --emsize EMSIZE    size of word embeddings
23 |   --nhid NHID        humber of hidden units per layer
24 |   --nlayers NLAYERS  number of layers
25 |   --lr LR            initial learning rate
26 |   --clip CLIP        gradient clipping
27 |   --epochs EPOCHS    upper epoch limit
28 |   --batch-size N     batch size
29 |   --bptt BPTT        sequence length
30 |   --seed SEED        random seed
31 |   --cuda             use CUDA
32 |   --log-interval N   report interval
33 |   --save SAVE        path to save the final model
34 | ```
35 | 


--------------------------------------------------------------------------------
/super_resolution/super_resolve.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import argparse
 3 | import torch
 4 | from torch.autograd import Variable
 5 | from PIL import Image
 6 | from torchvision.transforms import ToTensor
 7 | 
 8 | import numpy as np
 9 | 
10 | # Training settings
11 | parser = argparse.ArgumentParser(description='PyTorch Super Res Example')
12 | parser.add_argument('--input_image', type=str, required=True, help='input image to use')
13 | parser.add_argument('--model', type=str, required=True, help='model file to use')
14 | parser.add_argument('--output_filename', type=str, help='where to save the output image')
15 | parser.add_argument('--cuda', action='store_true', help='use cuda')
16 | opt = parser.parse_args()
17 | 
18 | print(opt)
19 | img = Image.open(opt.input_image).convert('YCbCr')
20 | y, cb, cr = img.split()
21 | 
22 | model = torch.load(opt.model)
23 | input = Variable(ToTensor()(y)).view(1, -1, y.size[1], y.size[0])
24 | 
25 | if opt.cuda:
26 |     model = model.cuda()
27 |     input = input.cuda()
28 | 
29 | out = model(input)
30 | out = out.cpu()
31 | out_img_y = out.data[0].numpy()
32 | out_img_y *= 255.0
33 | out_img_y = out_img_y.clip(0, 255)
34 | out_img_y = Image.fromarray(np.uint8(out_img_y[0]), mode='L')
35 | 
36 | out_img_cb = cb.resize(out_img_y.size, Image.BICUBIC)
37 | out_img_cr = cr.resize(out_img_y.size, Image.BICUBIC)
38 | out_img = Image.merge('YCbCr', [out_img_y, out_img_cb, out_img_cr]).convert('RGB')
39 | 
40 | out_img.save(opt.output_filename)
41 | print('output image saved to ', opt.output_filename)
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, 
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/OpenNMT/onmt/Dataset.py:
--------------------------------------------------------------------------------
 1 | import onmt
 2 | from torch.autograd import Variable
 3 | 
 4 | 
 5 | class Dataset(object):
 6 | 
 7 |     def __init__(self, srcData, tgtData, batchSize, cuda):
 8 |         self.src = srcData
 9 |         if tgtData:
10 |             self.tgt = tgtData
11 |             assert(len(self.src) == len(self.tgt))
12 |         else:
13 |             self.tgt = None
14 |         self.cuda = cuda
15 | 
16 |         self.batchSize = batchSize
17 |         self.numBatches = len(self.src) // batchSize
18 | 
19 |     def _batchify(self, data, align_right=False):
20 |         max_length = max(x.size(0) for x in data)
21 |         out = data[0].new(len(data), max_length).fill_(onmt.Constants.PAD)
22 |         for i in range(len(data)):
23 |             data_length = data[i].size(0)
24 |             offset = max_length - data_length if align_right else 0
25 |             out[i].narrow(0, offset, data_length).copy_(data[i])
26 | 
27 |         out = out.t().contiguous()
28 |         if self.cuda:
29 |             out = out.cuda()
30 | 
31 |         v = Variable(out)
32 |         return v
33 | 
34 |     def __getitem__(self, index):
35 |         assert index < self.numBatches, "%d > %d" % (index, self.numBatches)
36 |         srcBatch = self._batchify(
37 |             self.src[index*self.batchSize:(index+1)*self.batchSize], align_right=True)
38 | 
39 |         if self.tgt:
40 |             tgtBatch = self._batchify(
41 |                 self.tgt[index*self.batchSize:(index+1)*self.batchSize])
42 |         else:
43 |             tgtBatch = None
44 | 
45 |         return srcBatch, tgtBatch
46 | 
47 |     def __len__(self):
48 |         return self.numBatches
49 | 


--------------------------------------------------------------------------------
/word_language_model/data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | 
 4 | class Dictionary(object):
 5 |     def __init__(self):
 6 |         self.word2idx = {}
 7 |         self.idx2word = []
 8 | 
 9 |     def add_word(self, word):
10 |         if word not in self.word2idx:
11 |             self.idx2word.append(word)
12 |             self.word2idx[word] = len(self.idx2word) - 1
13 |         return self.word2idx[word]
14 | 
15 |     def __len__(self):
16 |         return len(self.idx2word)
17 | 
18 | 
19 | class Corpus(object):
20 |     def __init__(self, path):
21 |         self.dictionary = Dictionary()
22 |         self.train = self.tokenize(os.path.join(path, 'train.txt'))
23 |         self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
24 |         self.test = self.tokenize(os.path.join(path, 'test.txt'))
25 | 
26 |     def tokenize(self, path):
27 |         """Tokenizes a text file."""
28 |         assert os.path.exists(path)
29 |         # Add words to the dictionary
30 |         with open(path, 'r') as f:
31 |             tokens = 0
32 |             for line in f:
33 |                 words = line.split() + ['<eos>']
34 |                 tokens += len(words)
35 |                 for word in words:
36 |                     self.dictionary.add_word(word)
37 | 
38 |         # Tokenize file content
39 |         with open(path, 'r') as f:
40 |             ids = torch.LongTensor(tokens)
41 |             token = 0
42 |             for line in f:
43 |                 words = line.split() + ['<eos>']
44 |                 for word in words:
45 |                     ids[token] = self.dictionary.word2idx[word]
46 |                     token += 1
47 | 
48 |         return ids
49 | 


--------------------------------------------------------------------------------
/snli/util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from argparse import ArgumentParser
 3 | 
 4 | def get_args():
 5 |     parser = ArgumentParser(description='PyTorch/torchtext SNLI example')
 6 |     parser.add_argument('--epochs', type=int, default=50)
 7 |     parser.add_argument('--batch_size', type=int, default=128)
 8 |     parser.add_argument('--d_embed', type=int, default=300)
 9 |     parser.add_argument('--d_proj', type=int, default=300)
10 |     parser.add_argument('--d_hidden', type=int, default=300)
11 |     parser.add_argument('--n_layers', type=int, default=1)
12 |     parser.add_argument('--log_every', type=int, default=50)
13 |     parser.add_argument('--lr', type=float, default=.001)
14 |     parser.add_argument('--dev_every', type=int, default=1000)
15 |     parser.add_argument('--save_every', type=int, default=1000)
16 |     parser.add_argument('--dp_ratio', type=int, default=0.2)
17 |     parser.add_argument('--no-bidirectional', action='store_false', dest='birnn')
18 |     parser.add_argument('--preserve-case', action='store_false', dest='lower')
19 |     parser.add_argument('--no-projection', action='store_false', dest='projection')
20 |     parser.add_argument('--train_embed', action='store_false', dest='fix_emb')
21 |     parser.add_argument('--gpu', type=int, default=0)
22 |     parser.add_argument('--save_path', type=str, default='results')
23 |     parser.add_argument('--data_cache', type=str, default=os.path.join(os.getcwd(), '.data_cache'))
24 |     parser.add_argument('--vector_cache', type=str, default=os.path.join(os.getcwd(), '.vector_cache/input_vectors.pt'))
25 |     parser.add_argument('--word_vectors', type=str, default='glove.42B')
26 |     parser.add_argument('--resume_snapshot', type=str, default='')
27 |     args = parser.parse_args()
28 |     return args
29 | 


--------------------------------------------------------------------------------
/super_resolution/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from numpy.random import normal
 4 | from numpy.linalg import svd
 5 | from math import sqrt
 6 | 
 7 | 
 8 | def _get_orthogonal_init_weights(weights):
 9 |     fan_out = weights.size(0)
10 |     fan_in = weights.size(1) * weights.size(2) * weights.size(3)
11 | 
12 |     u, _, v = svd(normal(0.0, 1.0, (fan_out, fan_in)), full_matrices=False)
13 | 
14 |     if u.shape == (fan_out, fan_in):
15 |         return torch.Tensor(u.reshape(weights.size()))
16 |     else:
17 |         return torch.Tensor(v.reshape(weights.size()))
18 | 
19 | 
20 | class Net(nn.Module):
21 |     def __init__(self, upscale_factor):
22 |         super(Net, self).__init__()
23 | 
24 |         self.relu = nn.ReLU()
25 |         self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2))
26 |         self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1))
27 |         self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1))
28 |         self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1))
29 |         self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
30 | 
31 |         self._initialize_weights()
32 | 
33 |     def forward(self, x):
34 |         x = self.relu(self.conv1(x))
35 |         x = self.relu(self.conv2(x))
36 |         x = self.relu(self.conv3(x))
37 |         x = self.pixel_shuffle(self.conv4(x))
38 |         return x
39 | 
40 |     def _initialize_weights(self):
41 |         self.conv1.weight.data.copy_(_get_orthogonal_init_weights(self.conv1.weight) * sqrt(2))
42 |         self.conv2.weight.data.copy_(_get_orthogonal_init_weights(self.conv2.weight) * sqrt(2))
43 |         self.conv3.weight.data.copy_(_get_orthogonal_init_weights(self.conv3.weight) * sqrt(2))
44 |         self.conv4.weight.data.copy_(_get_orthogonal_init_weights(self.conv4.weight))
45 | 


--------------------------------------------------------------------------------
/super_resolution/README.md:
--------------------------------------------------------------------------------
 1 | # Superresolution using an efficient sub-pixel convolutional neural network
 2 | 
 3 | This example illustrates how to use the efficient sub-pixel convolution layer described in  ["Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network" - Shi et al.](https://arxiv.org/abs/1609.05158) for increasing spatial resolution within your network for tasks such as superresolution.
 4 | 
 5 | ```
 6 | usage: main.py [-h] --upscale_factor UPSCALE_FACTOR [--batchSize BATCHSIZE]
 7 |                [--testBatchSize TESTBATCHSIZE] [--nEpochs NEPOCHS] [--lr LR]
 8 |                [--cuda] [--threads THREADS] [--seed SEED]
 9 | 
10 | PyTorch Super Res Example
11 | 
12 | optional arguments:
13 |   -h, --help            show this help message and exit
14 |   --upscale_factor      super resolution upscale factor
15 |   --batchSize           training batch size
16 |   --testBatchSize       testing batch size
17 |   --nEpochs             number of epochs to train for
18 |   --lr                  Learning Rate. Default=0.01
19 |   --cuda                use cuda
20 |   --threads             number of threads for data loader to use Default=4
21 |   --seed                random seed to use. Default=123
22 | ```
23 | This example trains a super-resolution network on the [BSD300 dataset](https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/), using crops from the 200 training images, and evaluating on crops of the 100 test images. A snapshot of the model after every epoch with filename model_epoch_<epoch_number>.pth
24 | 
25 | ##Example Usage:
26 | 
27 | ###Train
28 | `python main.py --upscale_factor 3 --batchSize 4 --testBatchSize 100 --nEpochs 30 --lr 0.001`
29 | ###Super Resolve
30 | `python super_resolve.py --input_image dataset/BSDS300/images/test/16077.jpg --model model_epoch_500.pth --output_filename out.png`
31 | 


--------------------------------------------------------------------------------
/regression/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | from itertools import count
 4 | 
 5 | import torch
 6 | import torch.autograd
 7 | import torch.nn.functional as F
 8 | from torch.autograd import Variable
 9 | 
10 | POLY_DEGREE = 4
11 | W_target = torch.randn(POLY_DEGREE, 1) * 5
12 | b_target = torch.randn(1) * 5
13 | 
14 | 
15 | def make_features(x):
16 |     """Builds features i.e. a matrix with columns [x, x^2, x^3, x^4]."""
17 |     x = x.unsqueeze(1)
18 |     return torch.cat([x ** i for i in range(1, POLY_DEGREE+1)], 1)
19 | 
20 | 
21 | def f(x):
22 |     """Approximated function."""
23 |     return x.mm(W_target) + b_target[0]
24 | 
25 | 
26 | def poly_desc(W, b):
27 |     """Creates a string description of a polynomial."""
28 |     result = 'y = '
29 |     for i, w in enumerate(W):
30 |         result += '{:+.2f} x^{} '.format(w, len(W) - i)
31 |     result += '{:+.2f}'.format(b[0])
32 |     return result
33 | 
34 | 
35 | def get_batch(batch_size=32):
36 |     """Builds a batch i.e. (x, f(x)) pair."""
37 |     random = torch.randn(batch_size)
38 |     x = make_features(random)
39 |     y = f(x)
40 |     return Variable(x), Variable(y)
41 | 
42 | 
43 | # Define model
44 | fc = torch.nn.Linear(W_target.size(0), 1)
45 | 
46 | for batch_idx in count(1):
47 |     # Get data
48 |     batch_x, batch_y = get_batch()
49 | 
50 |     # Reset gradients
51 |     fc.zero_grad()
52 | 
53 |     # Forward pass
54 |     output = F.smooth_l1_loss(fc(batch_x), batch_y)
55 |     loss = output.data[0]
56 | 
57 |     # Backward pass
58 |     output.backward()
59 | 
60 |     # Apply gradients
61 |     for param in fc.parameters():
62 |         param.data.add_(-0.1 * param.grad.data)
63 | 
64 |     # Stop criterion
65 |     if loss < 1e-3:
66 |         break
67 | 
68 | print('Loss: {:.6f} after {} batches'.format(loss, batch_idx))
69 | print('==> Learned function:\t' + poly_desc(fc.weight.data.view(-1), fc.bias.data))
70 | print('==> Actual function:\t' + poly_desc(W_target.view(-1), b_target))
71 | 


--------------------------------------------------------------------------------
/OpenNMT/onmt/modules/GlobalAttention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Global attention takes a matrix and a query vector. It
 3 | then computes a parameterized convex combination of the matrix
 4 | based on the input query.
 5 | 
 6 | 
 7 |         H_1 H_2 H_3 ... H_n
 8 |           q   q   q       q
 9 |             |  |   |       |
10 |               \ |   |      /
11 |                       .....
12 |                   \   |  /
13 |                           a
14 | 
15 | Constructs a unit mapping.
16 |     $$(H_1 + H_n, q) => (a)$$
17 |     Where H is of `batch x n x dim` and q is of `batch x dim`.
18 | 
19 |     The full def is  $$\tanh(W_2 [(softmax((W_1 q + b_1) H) H), q] + b_2)$$.:
20 | 
21 | """
22 | 
23 | import torch
24 | import torch.nn as nn
25 | import math
26 | 
27 | _INF = float('inf')
28 | 
29 | class GlobalAttention(nn.Module):
30 |     def __init__(self, dim):
31 |         super(GlobalAttention, self).__init__()
32 |         self.linear_in = nn.Linear(dim, dim, bias=False)
33 |         self.sm = nn.Softmax()
34 |         self.linear_out = nn.Linear(dim*2, dim, bias=False)
35 |         self.tanh = nn.Tanh()
36 |         self.mask = None
37 | 
38 |     def applyMask(self, mask):
39 |         self.mask = mask
40 | 
41 |     def forward(self, input, context):
42 |         """
43 |         input: batch x dim
44 |         context: batch x sourceL x dim
45 |         """
46 |         targetT = self.linear_in(input).unsqueeze(2)  # batch x dim x 1
47 | 
48 |         # Get attention
49 |         attn = torch.bmm(context, targetT).squeeze(2)  # batch x sourceL
50 |         if self.mask is not None:
51 |             attn.data.masked_fill_(self.mask, -_INF)
52 |         attn = self.sm(attn)
53 |         attn3 = attn.view(attn.size(0), 1, attn.size(1))  # batch x 1 x sourceL
54 | 
55 |         weightedContext = torch.bmm(attn3, context).squeeze(1)  # batch x dim
56 |         contextCombined = torch.cat((weightedContext, input), 1)
57 | 
58 |         contextOutput = self.tanh(self.linear_out(contextCombined))
59 | 
60 |         return contextOutput, attn
61 | 


--------------------------------------------------------------------------------
/dcgan/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Convolution Generative Adversarial Networks
 2 | 
 3 | This example implements the paper [Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks](http://arxiv.org/abs/1511.06434)
 4 | 
 5 | The implementation is very close to the Torch implementation [dcgan.torch](https://github.com/soumith/dcgan.torch)
 6 | 
 7 | After every 100 training iterations, the files `real_samples.png` and `fake_samples.png` are written to disk
 8 | with the samples from the generative model.
 9 | 
10 | After every epoch, models are saved to: `netG_epoch_%d.pth` and `netD_epoch_%d.pth`
11 | 
12 | ##Downloading the dataset
13 | You can download the LSUN dataset by cloning [this repo](https://github.com/fyu/lsun) and running
14 | ```
15 | python donwload.py -c bedroom
16 | ```
17 | 
18 | ##Usage
19 | ```
20 | usage: main.py [-h] --dataset DATASET --dataroot DATAROOT [--workers WORKERS]
21 |                [--batchSize BATCHSIZE] [--imageSize IMAGESIZE] [--nz NZ]
22 |                [--ngf NGF] [--ndf NDF] [--niter NITER] [--lr LR]
23 |                [--beta1 BETA1] [--cuda] [--ngpu NGPU] [--netG NETG]
24 |                [--netD NETD]
25 | 
26 | optional arguments:
27 |   -h, --help            show this help message and exit
28 |   --dataset DATASET     cifar10 | lsun | imagenet | folder | lfw
29 |   --dataroot DATAROOT   path to dataset
30 |   --workers WORKERS     number of data loading workers
31 |   --batchSize BATCHSIZE
32 |                         input batch size
33 |   --imageSize IMAGESIZE
34 |                         the height / width of the input image to network
35 |   --nz NZ               size of the latent z vector
36 |   --ngf NGF
37 |   --ndf NDF
38 |   --niter NITER         number of epochs to train for
39 |   --lr LR               learning rate, default=0.0002
40 |   --beta1 BETA1         beta1 for adam. default=0.5
41 |   --cuda                enables cuda
42 |   --ngpu NGPU           number of GPUs to use
43 |   --netG NETG           path to netG (to continue training)
44 |   --netD NETD           path to netD (to continue training)
45 | ```
46 | 


--------------------------------------------------------------------------------
/word_language_model/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Variable
 3 | 
 4 | class RNNModel(nn.Module):
 5 |     """Container module with an encoder, a recurrent module, and a decoder."""
 6 | 
 7 |     def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers):
 8 |         super(RNNModel, self).__init__()
 9 |         self.encoder = nn.Embedding(ntoken, ninp)
10 |         if rnn_type in ['LSTM', 'GRU']:
11 |             self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, bias=False)
12 |         else:
13 |             try:
14 |                 nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
15 |             except KeyError:
16 |                 raise ValueError( """An invalid option for `--model` was supplied,
17 |                                  options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
18 |             self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, bias=False)
19 |         self.decoder = nn.Linear(nhid, ntoken)
20 | 
21 |         self.init_weights()
22 | 
23 |         self.rnn_type = rnn_type
24 |         self.nhid = nhid
25 |         self.nlayers = nlayers
26 | 
27 |     def init_weights(self):
28 |         initrange = 0.1
29 |         self.encoder.weight.data.uniform_(-initrange, initrange)
30 |         self.decoder.bias.data.fill_(0)
31 |         self.decoder.weight.data.uniform_(-initrange, initrange)
32 | 
33 |     def forward(self, input, hidden):
34 |         emb = self.encoder(input)
35 |         output, hidden = self.rnn(emb, hidden)
36 |         decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
37 |         return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
38 | 
39 |     def init_hidden(self, bsz):
40 |         weight = next(self.parameters()).data
41 |         if self.rnn_type == 'LSTM':
42 |             return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()),
43 |                     Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()))
44 |         else:
45 |             return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())
46 | 


--------------------------------------------------------------------------------
/OpenNMT/onmt/Optim.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch.optim as optim
 3 | 
 4 | class Optim(object):
 5 | 
 6 |     def _makeOptimizer(self):
 7 |         if self.method == 'sgd':
 8 |             self.optimizer = optim.SGD(self.params, lr=self.lr)
 9 |         elif self.method == 'adagrad':
10 |             self.optimizer = optim.Adagrad(self.params, lr=self.lr)
11 |         elif self.method == 'adadelta':
12 |             self.optimizer = optim.Adadelta(self.params, lr=self.lr)
13 |         elif self.method == 'adam':
14 |             self.optimizer = optim.Adam(self.params, lr=self.lr)
15 |         else:
16 |             raise RuntimeError("Invalid optim method: " + self.method)
17 | 
18 |     def __init__(self, params, method, lr, max_grad_norm, lr_decay=1, start_decay_at=None):
19 |         self.params = list(params)  # careful: params may be a generator
20 |         self.last_ppl = None
21 |         self.lr = lr
22 |         self.max_grad_norm = max_grad_norm
23 |         self.method = method
24 |         self.lr_decay = lr_decay
25 |         self.start_decay_at = start_decay_at
26 |         self.start_decay = False
27 | 
28 |         self._makeOptimizer()
29 | 
30 |     def step(self):
31 |         # Compute gradients norm.
32 |         grad_norm = 0
33 |         for param in self.params:
34 |             grad_norm += math.pow(param.grad.data.norm(), 2)
35 | 
36 |         grad_norm = math.sqrt(grad_norm)
37 |         shrinkage = self.max_grad_norm / grad_norm
38 | 
39 |         for param in self.params:
40 |             if shrinkage < 1:
41 |                 param.grad.data.mul_(shrinkage)
42 | 
43 |         self.optimizer.step()
44 |         return grad_norm
45 | 
46 |     # decay learning rate if val perf does not improve or we hit the start_decay_at limit
47 |     def updateLearningRate(self, ppl, epoch):
48 |         if self.start_decay_at is not None and epoch >= self.start_decay_at:
49 |             self.start_decay = True
50 |         if self.last_ppl is not None and ppl > self.last_ppl:
51 |             self.start_decay = True
52 | 
53 |         if self.start_decay:
54 |             self.lr = self.lr * self.lr_decay
55 |             print("Decaying learning rate to %g" % self.lr)
56 | 
57 |         self.last_ppl = ppl
58 | 
59 |         self._makeOptimizer()
60 | 


--------------------------------------------------------------------------------
/super_resolution/data.py:
--------------------------------------------------------------------------------
 1 | from os.path import exists, join, basename
 2 | from os import makedirs, remove
 3 | from six.moves import urllib
 4 | import tarfile
 5 | from torchvision.transforms import Compose, CenterCrop, ToTensor, Scale
 6 | 
 7 | from dataset import DatasetFromFolder
 8 | 
 9 | 
10 | def download_bsd300(dest="dataset"):
11 |     output_image_dir = join(dest, "BSDS300/images")
12 | 
13 |     if not exists(output_image_dir):
14 |         makedirs(dest)
15 |         url = "http://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/BSDS300-images.tgz"
16 |         print("downloading url ", url)
17 | 
18 |         data = urllib.request.urlopen(url)
19 | 
20 |         file_path = join(dest, basename(url))
21 |         with open(file_path, 'wb') as f:
22 |             f.write(data.read())
23 | 
24 |         print("Extracting data")
25 |         with tarfile.open(file_path) as tar:
26 |             for item in tar:
27 |                 tar.extract(item, dest)
28 | 
29 |         remove(file_path)
30 | 
31 |     return output_image_dir
32 | 
33 | 
34 | def calculate_valid_crop_size(crop_size, upscale_factor):
35 |     return crop_size - (crop_size % upscale_factor)
36 | 
37 | 
38 | def input_transform(crop_size, upscale_factor):
39 |     return Compose([
40 |         CenterCrop(crop_size),
41 |         Scale(crop_size // upscale_factor),
42 |         ToTensor(),
43 |     ])
44 | 
45 | 
46 | def target_transform(crop_size):
47 |     return Compose([
48 |         CenterCrop(crop_size),
49 |         ToTensor(),
50 |     ])
51 | 
52 | 
53 | def get_training_set(upscale_factor):
54 |     root_dir = download_bsd300()
55 |     train_dir = join(root_dir, "train")
56 |     crop_size = calculate_valid_crop_size(256, upscale_factor)
57 | 
58 |     return DatasetFromFolder(train_dir,
59 |                              input_transform=input_transform(crop_size, upscale_factor),
60 |                              target_transform=target_transform(crop_size))
61 | 
62 | 
63 | def get_test_set(upscale_factor):
64 |     root_dir = download_bsd300()
65 |     test_dir = join(root_dir, "test")
66 |     crop_size = calculate_valid_crop_size(256, upscale_factor)
67 | 
68 |     return DatasetFromFolder(test_dir,
69 |                              input_transform=input_transform(crop_size, upscale_factor),
70 |                              target_transform=target_transform(crop_size))
71 | 


--------------------------------------------------------------------------------
/imagenet/README.md:
--------------------------------------------------------------------------------
 1 | # ImageNet training in PyTorch
 2 | 
 3 | This implements training of popular model architectures, such as ResNet, AlexNet, and VGG on the ImageNet dataset.
 4 | 
 5 | ## Requirements
 6 | 
 7 | - Install PyTorch ([pytorch.org](http://pytorch.org))
 8 | - `pip install -r requirements.txt`
 9 | - Download the ImageNet dataset and move validation images to labeled subfolders
10 | 
11 | ## Training
12 | 
13 | To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset:
14 | 
15 | ```bash
16 | python main.py -a resnet18 [imagenet-folder with train and val folders]
17 | ```
18 | 
19 | The default learning rate schedule starts at 0.1 and decays by a factor of 10 every 30 epochs. This is appropriate for ResNet and models with batch normalization, but too high for AlexNet and VGG. Use 0.01 as the initial learning rate for AlexNet or VGG:
20 | 
21 | ```bash
22 | python main.py -a alexnet --lr 0.01 [imagenet-folder with train and val folders]
23 | ```
24 | 
25 | ## Usage
26 | 
27 | ```
28 | usage: main.py [-h] [--arch ARCH] [-j N] [--epochs N] [--start-epoch N] [-b N]
29 |                [--lr LR] [--momentum M] [--weight-decay W] [--print-freq N]
30 |                [--resume PATH] [-e] [--pretrained]
31 |                DIR
32 | 
33 | PyTorch ImageNet Training
34 | 
35 | positional arguments:
36 |   DIR                   path to dataset
37 | 
38 | optional arguments:
39 |   -h, --help            show this help message and exit
40 |   --arch ARCH, -a ARCH  model architecture: alexnet | resnet | resnet101 |
41 |                         resnet152 | resnet18 | resnet34 | resnet50 | vgg |
42 |                         vgg11 | vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn
43 |                         | vgg19 | vgg19_bn (default: resnet18)
44 |   -j N, --workers N     number of data loading workers (default: 4)
45 |   --epochs N            number of total epochs to run
46 |   --start-epoch N       manual epoch number (useful on restarts)
47 |   -b N, --batch-size N  mini-batch size (default: 256)
48 |   --lr LR, --learning-rate LR
49 |                         initial learning rate
50 |   --momentum M          momentum
51 |   --weight-decay W, --wd W
52 |                         weight decay (default: 1e-4)
53 |   --print-freq N, -p N  print frequency (default: 10)
54 |   --resume PATH         path to latest checkpoint (default: none)
55 |   -e, --evaluate        evaluate model on validation set
56 |   --pretrained          use pre-trained model
57 | ```
58 | 


--------------------------------------------------------------------------------
/OpenNMT/README.md:
--------------------------------------------------------------------------------
 1 | # OpenNMT: Open-Source Neural Machine Translation
 2 | 
 3 | This is a [Pytorch](https://github.com/pytorch/pytorch)
 4 | port of [OpenNMT](https://github.com/OpenNMT/OpenNMT),
 5 | an open-source (MIT) neural machine translation system.
 6 | 
 7 | <center style="padding: 40px"><img width="70%" src="http://opennmt.github.io/simple-attn.png" /></center>
 8 | 
 9 | ## Quickstart
10 | 
11 | OpenNMT consists of three commands:
12 | 
13 | 0) Download the data.
14 | 
15 | ```wget https://s3.amazonaws.com/pytorch/examples/opennmt/data/onmt-data.tar && tar -xf onmt-data.tar```
16 | 
17 | 1) Preprocess the data.
18 | 
19 | ```python preprocess.py -train_src data/src-train.txt -train_tgt data/tgt-train.txt -valid_src data/src-val.txt -valid_tgt data/tgt-val.txt -save_data data/demo```
20 | 
21 | 2) Train the model.
22 | 
23 | ```python train.py -data data/demo-train.pt -save_model model -cuda```
24 | 
25 | 3) Translate sentences.
26 | 
27 | ```python translate.py -cuda -model model_e13_*.pt -src data/src-test.txt -tgt data/tgt-test.txt -replace_unk -verbose```
28 | 
29 | ## Pretrained Models
30 | 
31 | The following pretrained models can be downloaded and used with translate.py.
32 | 
33 | - [onmt_model_en_de_200k](https://s3.amazonaws.com/pytorch/examples/opennmt/models/onmt_model_en_de_200k-4783d9c3.pt): An English-German translation model based on the 200k sentence dataset at [OpenNMT/IntegrationTesting](https://github.com/OpenNMT/IntegrationTesting/tree/master/data). Perplexity: 21. 
34 | - [onmt_model_en_fr_b1M](https://s3.amazonaws.com/pytorch/examples/opennmt/models/onmt_model_en_fr_b1M-261c69a7.pt): An English-French model trained on benchmark-1M. Perplexity: 4.85.
35 | 
36 | ## Release Notes
37 | 
38 | The following OpenNMT features are implemented:
39 | 
40 | - multi-layer bidirectional RNNs with attention and dropout
41 | - data preprocessing
42 | - saving and loading from checkpoints
43 | - inference (translation) with batching and beam search
44 | 
45 | Not yet implemented:
46 | 
47 | - word features
48 | - multi-GPU
49 | - residual connections
50 | 
51 | ## Performance
52 | 
53 | With default parameters on a single Maxwell GPU, this version runs about 70% faster than the Lua torch OpenNMT. The improved performance comes from two main sources:
54 | 
55 | - CuDNN is used for the encoder (although not for the decoder, since it can't handle attention)
56 | - The decoder softmax layer is batched to efficiently trade off CPU vs. memory efficiency; this can be tuned with the -max_generator_batches parameter.
57 | 


--------------------------------------------------------------------------------
/mnist_hogwild/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import argparse
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | import torch.multiprocessing as mp
 7 | 
 8 | from train import train
 9 | 
10 | # Training settings
11 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
12 | parser.add_argument('--batch-size', type=int, default=64, metavar='N',
13 |                     help='input batch size for training (default: 64)')
14 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
15 |                     help='input batch size for testing (default: 1000)')
16 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
17 |                     help='number of epochs to train (default: 2)')
18 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
19 |                     help='learning rate (default: 0.01)')
20 | parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
21 |                     help='SGD momentum (default: 0.5)')
22 | parser.add_argument('--seed', type=int, default=1, metavar='S',
23 |                     help='random seed (default: 1)')
24 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
25 |                     help='how many batches to wait before logging training status')
26 | parser.add_argument('--num-processes', type=int, default=2, metavar='N',
27 |                     help='how many training processes to use (default: 2)')
28 | 
29 | class Net(nn.Module):
30 |     def __init__(self):
31 |         super(Net, self).__init__()
32 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
33 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
34 |         self.conv2_drop = nn.Dropout2d()
35 |         self.fc1 = nn.Linear(320, 50)
36 |         self.fc2 = nn.Linear(50, 10)
37 | 
38 |     def forward(self, x):
39 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
40 |         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
41 |         x = x.view(-1, 320)
42 |         x = F.relu(self.fc1(x))
43 |         x = F.dropout(x, training=self.training)
44 |         x = F.relu(self.fc2(x))
45 |         return F.log_softmax(x)
46 | 
47 | if __name__ == '__main__':
48 |     args = parser.parse_args()
49 | 
50 |     torch.manual_seed(args.seed)
51 | 
52 |     model = Net()
53 |     model.share_memory()
54 | 
55 |     processes = []
56 |     for rank in range(args.num_processes):
57 |         p = mp.Process(target=train, args=(rank, args, model))
58 |         p.start()
59 |         processes.append(p)
60 |     for p in processes:
61 |         p.join()
62 | 


--------------------------------------------------------------------------------
/mnist_hogwild/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import torch.optim as optim
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Variable
 6 | from torchvision import datasets, transforms
 7 | 
 8 | def train(rank, args, model):
 9 |     torch.manual_seed(args.seed + rank)
10 |     for param in model.parameters():
11 |         # Break gradient sharing
12 |         param.grad.data = param.grad.data.clone()
13 | 
14 |     train_loader = torch.utils.data.DataLoader(
15 |         datasets.MNIST('../data', train=True, download=True,
16 |                     transform=transforms.Compose([
17 |                         transforms.ToTensor(),
18 |                         transforms.Normalize((0.1307,), (0.3081,))
19 |                     ])),
20 |         batch_size=args.batch_size, shuffle=True, num_workers=1)
21 |     test_loader = torch.utils.data.DataLoader(
22 |         datasets.MNIST('../data', train=False, transform=transforms.Compose([
23 |                         transforms.ToTensor(),
24 |                         transforms.Normalize((0.1307,), (0.3081,))
25 |                     ])),
26 |         batch_size=args.batch_size, shuffle=True, num_workers=1)
27 | 
28 |     optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
29 |     for epoch in range(1, args.epochs + 1):
30 |         train_epoch(epoch, args, model, train_loader, optimizer)
31 |         test_epoch(model, test_loader)
32 | 
33 | 
34 | def train_epoch(epoch, args, model, data_loader, optimizer):
35 |     model.train()
36 |     pid = os.getpid()
37 |     for batch_idx, (data, target) in enumerate(data_loader):
38 |         data, target = Variable(data), Variable(target)
39 |         optimizer.zero_grad()
40 |         output = model(data)
41 |         loss = F.nll_loss(output, target)
42 |         loss.backward()
43 |         optimizer.step()
44 |         if batch_idx % args.log_interval == 0:
45 |             print('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
46 |                 pid, epoch, batch_idx * len(data), len(data_loader.dataset),
47 |                 100. * batch_idx / len(data_loader), loss.data[0]))
48 | 
49 | 
50 | def test_epoch(model, data_loader):
51 |     model.eval()
52 |     test_loss = 0
53 |     correct = 0
54 |     for data, target in data_loader:
55 |         data, target = Variable(data, volatile=True), Variable(target)
56 |         output = model(data)
57 |         test_loss += F.nll_loss(output, target).data[0]
58 |         pred = output.data.max(1)[1] # get the index of the max log-probability
59 |         correct += pred.eq(target.data).cpu().sum()
60 | 
61 |     test_loss = test_loss
62 |     test_loss /= len(data_loader) # loss function already averages over batch size
63 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
64 |         test_loss, correct, len(data_loader.dataset),
65 |         100. * correct / len(data_loader.dataset)))
66 | 


--------------------------------------------------------------------------------
/word_language_model/generate.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Language Modeling on Penn Tree Bank
 3 | #
 4 | # This file generates new sentences sampled from the language model
 5 | #
 6 | ###############################################################################
 7 | 
 8 | import argparse
 9 | 
10 | import torch
11 | from torch.autograd import Variable
12 | 
13 | import data
14 | 
15 | parser = argparse.ArgumentParser(description='PyTorch PTB Language Model')
16 | 
17 | # Model parameters.
18 | parser.add_argument('--data', type=str, default='./data/penn',
19 |                     help='location of the data corpus')
20 | parser.add_argument('--checkpoint', type=str, default='./model.pt',
21 |                     help='model checkpoint to use')
22 | parser.add_argument('--outf', type=str, default='generated.txt',
23 |                     help='output file for generated text')
24 | parser.add_argument('--words', type=int, default='1000',
25 |                     help='number of words to generate')
26 | parser.add_argument('--seed', type=int, default=1111,
27 |                     help='random seed')
28 | parser.add_argument('--cuda', action='store_true',
29 |                     help='use CUDA')
30 | parser.add_argument('--temperature', type=float, default=1.0,
31 |                     help='temperature - higher will increase diversity')
32 | parser.add_argument('--log-interval', type=int, default=100,
33 |                     help='reporting interval')
34 | args = parser.parse_args()
35 | 
36 | # Set the random seed manually for reproducibility.
37 | torch.manual_seed(args.seed)
38 | if torch.cuda.is_available():
39 |     if not args.cuda:
40 |         print("WARNING: You have a CUDA device, so you should probably run with --cuda")
41 |     else:
42 |         torch.cuda.manual_seed(args.seed)
43 | 
44 | if args.temperature < 1e-3:
45 |     parser.error("--temperature has to be greater or equal 1e-3")
46 | 
47 | with open(args.checkpoint, 'rb') as f:
48 |     model = torch.load(f)
49 | 
50 | if args.cuda:
51 |     model.cuda()
52 | else:
53 |     model.cpu()
54 | 
55 | corpus = data.Corpus(args.data)
56 | ntokens = len(corpus.dictionary)
57 | hidden = model.init_hidden(1)
58 | input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True)
59 | if args.cuda:
60 |     input.data = input.data.cuda()
61 | 
62 | with open(args.outf, 'w') as outf:
63 |     for i in range(args.words):
64 |         output, hidden = model(input, hidden)
65 |         word_weights = output.squeeze().data.div(args.temperature).exp().cpu()
66 |         word_idx = torch.multinomial(word_weights, 1)[0]
67 |         input.data.fill_(word_idx)
68 |         word = corpus.dictionary.idx2word[word_idx]
69 | 
70 |         outf.write(word + ('\n' if i % 20 == 19 else ' '))
71 | 
72 |         if i % args.log_interval == 0:
73 |             print('| Generated {}/{} words'.format(i, args.words))
74 | 


--------------------------------------------------------------------------------
/snli/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | 
 5 | 
 6 | class Bottle(nn.Module):
 7 | 
 8 |     def forward(self, input):
 9 |         if len(input.size()) <= 2:
10 |             return super(Bottle, self).forward(input)
11 |         size = input.size()[:2]
12 |         out = super(Bottle, self).forward(input.view(size[0]*size[1], -1))
13 |         return out.view(*size, -1)
14 | 
15 | 
16 | class Linear(Bottle, nn.Linear):
17 |     pass
18 | 
19 | 
20 | class Encoder(nn.Module):
21 | 
22 |     def __init__(self, config):
23 |         super(Encoder, self).__init__()
24 |         self.config = config
25 |         input_size = config.d_proj if config.projection else config.d_embed
26 |         self.rnn = nn.LSTM(input_size=input_size, hidden_size=config.d_hidden,
27 |                         num_layers=config.n_layers, dropout=config.dp_ratio,
28 |                         bidirectional=config.birnn)
29 | 
30 |     def forward(self, inputs):
31 |         batch_size = inputs.size()[1]
32 |         state_shape = self.config.n_cells, batch_size, self.config.d_hidden
33 |         h0 = c0 = Variable(inputs.data.new(*state_shape).zero_())
34 |         outputs, (ht, ct) = self.rnn(inputs, (h0, c0))
35 |         return ht[-1] if not self.config.birnn else ht[-2:].transpose(0, 1).contiguous().view(batch_size, -1)
36 | 
37 | 
38 | class SNLIClassifier(nn.Module):
39 | 
40 |     def __init__(self, config):
41 |         super(SNLIClassifier, self).__init__()
42 |         self.config = config
43 |         self.embed = nn.Embedding(config.n_embed, config.d_embed)
44 |         self.projection = Linear(config.d_embed, config.d_proj)
45 |         self.encoder = Encoder(config)
46 |         self.dropout = nn.Dropout(p=config.dp_ratio)
47 |         self.relu = nn.ReLU()
48 |         seq_in_size = 2*config.d_hidden
49 |         if self.config.birnn:
50 |             seq_in_size *= 2
51 |         lin_config = [seq_in_size]*2
52 |         self.out = nn.Sequential(
53 |             Linear(*lin_config),
54 |             self.relu,
55 |             self.dropout,
56 |             Linear(*lin_config),
57 |             self.relu,
58 |             self.dropout,
59 |             Linear(*lin_config),
60 |             self.relu,
61 |             self.dropout,
62 |             Linear(seq_in_size, config.d_out))
63 | 
64 |     def forward(self, batch):
65 |         prem_embed = self.embed(batch.premise)
66 |         hypo_embed = self.embed(batch.hypothesis)
67 |         if self.config.fix_emb:
68 |             prem_embed = Variable(prem_embed.data)
69 |             hypo_embed = Variable(hypo_embed.data)
70 |         if self.config.projection:
71 |             prem_embed = self.relu(self.projection(prem_embed))
72 |             hypo_embed = self.relu(self.projection(hypo_embed))
73 |         premise = self.encoder(prem_embed)
74 |         hypothesis = self.encoder(hypo_embed)
75 |         scores = self.out(torch.cat([premise, hypothesis], 1))
76 |         return scores
77 | 


--------------------------------------------------------------------------------
/reinforcement_learning/reinforce.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import gym
 3 | import numpy as np
 4 | from itertools import count
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | import torch.optim as optim
10 | import torch.autograd as autograd
11 | from torch.autograd import Variable
12 | 
13 | 
14 | parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
15 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
16 |                     help='discount factor (default: 0.99)')
17 | parser.add_argument('--seed', type=int, default=543, metavar='N',
18 |                     help='random seed (default: 1)')
19 | parser.add_argument('--render', action='store_true',
20 |                     help='render the environment')
21 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
22 |                     help='interval between training status logs (default: 10)')
23 | args = parser.parse_args()
24 | 
25 | 
26 | env = gym.make('CartPole-v0')
27 | env.seed(args.seed)
28 | torch.manual_seed(args.seed)
29 | 
30 | 
31 | class Policy(nn.Module):
32 |     def __init__(self):
33 |         super(Policy, self).__init__()
34 |         self.affine1 = nn.Linear(4, 128)
35 |         self.affine2 = nn.Linear(128, 2)
36 | 
37 |         self.saved_actions = []
38 |         self.rewards = []
39 | 
40 |     def forward(self, x):
41 |         x = F.relu(self.affine1(x))
42 |         action_scores = self.affine2(x)
43 |         return F.softmax(action_scores)
44 | 
45 | 
46 | model = Policy()
47 | optimizer = optim.Adam(model.parameters(), lr=1e-2)
48 | 
49 | 
50 | def select_action(state):
51 |     state = torch.from_numpy(state).float().unsqueeze(0)
52 |     probs = model(Variable(state))
53 |     action = probs.multinomial()
54 |     model.saved_actions.append(action)
55 |     return action.data
56 | 
57 | 
58 | def finish_episode():
59 |     R = 0
60 |     saved_actions = model.saved_actions
61 |     rewards = []
62 |     for r in model.rewards[::-1]:
63 |         R = r + args.gamma * R
64 |         rewards.insert(0, R)
65 |     rewards = torch.Tensor(rewards)
66 |     rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
67 |     for action, r in zip(model.saved_actions, rewards):
68 |         action.reinforce(r)
69 |     optimizer.zero_grad()
70 |     autograd.backward(model.saved_actions, [None for _ in model.saved_actions])
71 |     optimizer.step()
72 |     del model.rewards[:]
73 |     del model.saved_actions[:]
74 | 
75 | 
76 | running_reward = 10
77 | for i_episode in count(1):
78 |     state = env.reset()
79 |     for t in range(10000): # Don't infinite loop while learning
80 |         action = select_action(state)
81 |         state, reward, done, _ = env.step(action[0,0])
82 |         if args.render:
83 |             env.render()
84 |         model.rewards.append(reward)
85 |         if done:
86 |             break
87 | 
88 |     running_reward = running_reward * 0.99 + t * 0.01
89 |     finish_episode()
90 |     if i_episode % args.log_interval == 0:
91 |         print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format(
92 |             i_episode, t, running_reward))
93 |     if running_reward > 200:
94 |         print("Solved! Running reward is now {} and "
95 |               "the last episode runs to {} time steps!".format(running_reward, t))
96 |         break
97 | 


--------------------------------------------------------------------------------
/super_resolution/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import argparse
 3 | from math import log10
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.optim as optim
 8 | from torch.autograd import Variable
 9 | from torch.utils.data import DataLoader
10 | from model import Net
11 | from data import get_training_set, get_test_set
12 | 
13 | # Training settings
14 | parser = argparse.ArgumentParser(description='PyTorch Super Res Example')
15 | parser.add_argument('--upscale_factor', type=int, required=True, help="super resolution upscale factor")
16 | parser.add_argument('--batchSize', type=int, default=64, help='training batch size')
17 | parser.add_argument('--testBatchSize', type=int, default=10, help='testing batch size')
18 | parser.add_argument('--nEpochs', type=int, default=2, help='number of epochs to train for')
19 | parser.add_argument('--lr', type=float, default=0.01, help='Learning Rate. Default=0.01')
20 | parser.add_argument('--cuda', action='store_true', help='use cuda?')
21 | parser.add_argument('--threads', type=int, default=4, help='number of threads for data loader to use')
22 | parser.add_argument('--seed', type=int, default=123, help='random seed to use. Default=123')
23 | opt = parser.parse_args()
24 | 
25 | print(opt)
26 | 
27 | cuda = opt.cuda
28 | if cuda and not torch.cuda.is_available():
29 |     raise Exception("No GPU found, please run without --cuda")
30 | 
31 | torch.manual_seed(opt.seed)
32 | if cuda:
33 |     torch.cuda.manual_seed(opt.seed)
34 | 
35 | print('===> Loading datasets')
36 | train_set = get_training_set(opt.upscale_factor)
37 | test_set = get_test_set(opt.upscale_factor)
38 | training_data_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batchSize, shuffle=True)
39 | testing_data_loader = DataLoader(dataset=test_set, num_workers=opt.threads, batch_size=opt.testBatchSize, shuffle=False)
40 | 
41 | print('===> Building model')
42 | model = Net(upscale_factor=opt.upscale_factor)
43 | criterion = nn.MSELoss()
44 | 
45 | if cuda:
46 |     model = model.cuda()
47 |     criterion = criterion.cuda()
48 | 
49 | optimizer = optim.Adam(model.parameters(), lr=opt.lr)
50 | 
51 | 
52 | def train(epoch):
53 |     epoch_loss = 0
54 |     for iteration, batch in enumerate(training_data_loader, 1):
55 |         input, target = Variable(batch[0]), Variable(batch[1])
56 |         if cuda:
57 |             input = input.cuda()
58 |             target = target.cuda()
59 | 
60 |         optimizer.zero_grad()
61 |         loss = criterion(model(input), target)
62 |         epoch_loss += loss.data[0]
63 |         loss.backward()
64 |         optimizer.step()
65 | 
66 |         print("===> Epoch[{}]({}/{}): Loss: {:.4f}".format(epoch, iteration, len(training_data_loader), loss.data[0]))
67 | 
68 |     print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, epoch_loss / len(training_data_loader)))
69 | 
70 | 
71 | def test():
72 |     avg_psnr = 0
73 |     for batch in testing_data_loader:
74 |         input, target = Variable(batch[0]), Variable(batch[1])
75 |         if cuda:
76 |             input = input.cuda()
77 |             target = target.cuda()
78 | 
79 |         prediction = model(input)
80 |         mse = criterion(prediction, target)
81 |         psnr = 10 * log10(1 / mse.data[0])
82 |         avg_psnr += psnr
83 |     print("===> Avg. PSNR: {:.4f} dB".format(avg_psnr / len(testing_data_loader)))
84 | 
85 | 
86 | def checkpoint(epoch):
87 |     model_out_path = "model_epoch_{}.pth".format(epoch)
88 |     torch.save(model, model_out_path)
89 |     print("Checkpoint saved to {}".format(model_out_path))
90 | 
91 | for epoch in range(1, opt.nEpochs + 1):
92 |     train(epoch)
93 |     test()
94 |     checkpoint(epoch)
95 | 


--------------------------------------------------------------------------------
/OpenNMT/onmt/Beam.py:
--------------------------------------------------------------------------------
  1 | # Class for managing the internals of the beam search process.
  2 | #
  3 | #
  4 | #         hyp1#-hyp1---hyp1 -hyp1
  5 | #                 \             /
  6 | #         hyp2 \-hyp2 /-hyp2#hyp2
  7 | #                               /      \
  8 | #         hyp3#-hyp3---hyp3 -hyp3
  9 | #         ========================
 10 | #
 11 | # Takes care of beams, back pointers, and scores.
 12 | 
 13 | import torch
 14 | import onmt
 15 | 
 16 | 
 17 | class Beam(object):
 18 |     def __init__(self, size, cuda=False):
 19 | 
 20 |         self.size = size
 21 |         self.done = False
 22 | 
 23 |         self.tt = torch.cuda if cuda else torch
 24 | 
 25 |         # The score for each translation on the beam.
 26 |         self.scores = self.tt.FloatTensor(size).zero_()
 27 | 
 28 |         # The backpointers at each time-step.
 29 |         self.prevKs = []
 30 | 
 31 |         # The outputs at each time-step.
 32 |         self.nextYs = [self.tt.LongTensor(size).fill_(onmt.Constants.PAD)]
 33 |         self.nextYs[0][0] = onmt.Constants.BOS
 34 | 
 35 |         # The attentions (matrix) for each time.
 36 |         self.attn = []
 37 | 
 38 |     # Get the outputs for the current timestep.
 39 |     def getCurrentState(self):
 40 |         return self.nextYs[-1]
 41 | 
 42 |     # Get the backpointers for the current timestep.
 43 |     def getCurrentOrigin(self):
 44 |         return self.prevKs[-1]
 45 | 
 46 |     #  Given prob over words for every last beam `wordLk` and attention
 47 |     #   `attnOut`: Compute and update the beam search.
 48 |     #
 49 |     # Parameters:
 50 |     #
 51 |     #     * `wordLk`- probs of advancing from the last step (K x words)
 52 |     #     * `attnOut`- attention at the last step
 53 |     #
 54 |     # Returns: True if beam search is complete.
 55 |     def advance(self, wordLk, attnOut):
 56 | 
 57 |         numWords = wordLk.size(1)
 58 | 
 59 |         # Sum the previous scores.
 60 |         if len(self.prevKs) > 0:
 61 |             beamLk = wordLk + self.scores.unsqueeze(1).expand_as(wordLk)
 62 |         else:
 63 |             beamLk = wordLk[0]
 64 | 
 65 |         flatBeamLk = beamLk.view(-1)
 66 | 
 67 |         bestScores, bestScoresId = flatBeamLk.topk(self.size, 0, True, True)
 68 |         self.scores = bestScores
 69 | 
 70 |         # bestScoresId is flattened beam x word array, so calculate which
 71 |         # word and beam each score came from
 72 |         prevK = bestScoresId / numWords
 73 |         self.prevKs.append(prevK)
 74 |         self.nextYs.append(bestScoresId - prevK * numWords)
 75 |         self.attn.append(attnOut.index_select(0, prevK))
 76 | 
 77 |         # End condition is when top-of-beam is EOS.
 78 |         if self.nextYs[-1][0] == onmt.Constants.EOS:
 79 |             self.done = True
 80 | 
 81 |         return self.done
 82 | 
 83 |     def sortBest(self):
 84 |         return torch.sort(self.scores, 0, True)
 85 | 
 86 |     # Get the score of the best in the beam.
 87 |     def getBest(self):
 88 |         scores, ids = self.sortBest()
 89 |         return scores[1], ids[1]
 90 | 
 91 |     # Walk back to construct the full hypothesis.
 92 |     #
 93 |     # Parameters.
 94 |     #
 95 |     #     * `k` - the position in the beam to construct.
 96 |     #
 97 |     # Returns.
 98 |     #
 99 |     #     1. The hypothesis
100 |     #     2. The attention at each time step.
101 |     def getHyp(self, k):
102 |         hyp, attn = [], []
103 |         # print(len(self.prevKs), len(self.nextYs), len(self.attn))
104 |         for j in range(len(self.prevKs) - 1, -1, -1):
105 |             hyp.append(self.nextYs[j+1][k])
106 |             attn.append(self.attn[j][k])
107 |             k = self.prevKs[j][k]
108 | 
109 |         return hyp[::-1], torch.stack(attn[::-1])
110 | 


--------------------------------------------------------------------------------
/reinforcement_learning/actor_critic.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gym
  3 | import numpy as np
  4 | from itertools import count
  5 | from collections import namedtuple
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import torch.optim as optim
 11 | import torch.autograd as autograd
 12 | from torch.autograd import Variable
 13 | 
 14 | 
 15 | parser = argparse.ArgumentParser(description='PyTorch actor-critic example')
 16 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
 17 |                     help='discount factor (default: 0.99)')
 18 | parser.add_argument('--seed', type=int, default=543, metavar='N',
 19 |                     help='random seed (default: 1)')
 20 | parser.add_argument('--render', action='store_true',
 21 |                     help='render the environment')
 22 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
 23 |                     help='interval between training status logs (default: 10)')
 24 | args = parser.parse_args()
 25 | 
 26 | 
 27 | env = gym.make('CartPole-v0')
 28 | env.seed(args.seed)
 29 | torch.manual_seed(args.seed)
 30 | 
 31 | 
 32 | SavedAction = namedtuple('SavedAction', ['action', 'value'])
 33 | class Policy(nn.Module):
 34 |     def __init__(self):
 35 |         super(Policy, self).__init__()
 36 |         self.affine1 = nn.Linear(4, 128)
 37 |         self.action_head = nn.Linear(128, 2)
 38 |         self.value_head = nn.Linear(128, 1)
 39 | 
 40 |         self.saved_actions = []
 41 |         self.rewards = []
 42 | 
 43 |     def forward(self, x):
 44 |         x = F.relu(self.affine1(x))
 45 |         action_scores = self.action_head(x)
 46 |         state_values = self.value_head(x)
 47 |         return F.softmax(action_scores), state_values
 48 | 
 49 | 
 50 | model = Policy()
 51 | optimizer = optim.Adam(model.parameters(), lr=3e-2)
 52 | 
 53 | 
 54 | def select_action(state):
 55 |     state = torch.from_numpy(state).float().unsqueeze(0)
 56 |     probs, state_value = model(Variable(state))
 57 |     action = probs.multinomial()
 58 |     model.saved_actions.append(SavedAction(action, state_value))
 59 |     return action.data
 60 | 
 61 | 
 62 | def finish_episode():
 63 |     R = 0
 64 |     saved_actions = model.saved_actions
 65 |     value_loss = 0
 66 |     rewards = []
 67 |     for r in model.rewards[::-1]:
 68 |         R = r + args.gamma * R
 69 |         rewards.insert(0, R)
 70 |     rewards = torch.Tensor(rewards)
 71 |     rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
 72 |     for (action, value), r in zip(saved_actions, rewards):
 73 |         action.reinforce(r - value.data.squeeze())
 74 |         value_loss += F.smooth_l1_loss(value, Variable(torch.Tensor([r])))
 75 |     optimizer.zero_grad()
 76 |     final_nodes = [value_loss] + list(map(lambda p: p.action, saved_actions))
 77 |     gradients = [torch.ones(1)] + [None] * len(saved_actions)
 78 |     autograd.backward(final_nodes, gradients)
 79 |     optimizer.step()
 80 |     del model.rewards[:]
 81 |     del model.saved_actions[:]
 82 | 
 83 | 
 84 | running_reward = 10
 85 | for i_episode in count(1):
 86 |     state = env.reset()
 87 |     for t in range(10000): # Don't infinite loop while learning
 88 |         action = select_action(state)
 89 |         state, reward, done, _ = env.step(action[0,0])
 90 |         if args.render:
 91 |             env.render()
 92 |         model.rewards.append(reward)
 93 |         if done:
 94 |             break
 95 | 
 96 |     running_reward = running_reward * 0.99 + t * 0.01
 97 |     finish_episode()
 98 |     if i_episode % args.log_interval == 0:
 99 |         print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format(
100 |             i_episode, t, running_reward))
101 |     if running_reward > 200:
102 |         print("Solved! Running reward is now {} and "
103 |               "the last episode runs to {} time steps!".format(running_reward, t))
104 |         break
105 | 


--------------------------------------------------------------------------------
/OpenNMT/onmt/Dict.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | class Dict(object):
  5 |     def __init__(self, data=None):
  6 |         self.idxToLabel = {}
  7 |         self.labelToIdx = {}
  8 |         self.frequencies = {}
  9 | 
 10 |         # Special entries will not be pruned.
 11 |         self.special = []
 12 | 
 13 |         if data is not None:
 14 |             if type(data) == str:
 15 |                 self.loadFile(data)
 16 |             else:
 17 |                 self.addSpecials(data)
 18 | 
 19 |     def size(self):
 20 |         return len(self.idxToLabel)
 21 | 
 22 |     # Load entries from a file.
 23 |     def loadFile(self, filename):
 24 |         for line in open(filename):
 25 |             fields = line.split()
 26 |             label = fields[0]
 27 |             idx = int(fields[1])
 28 |             self.add(label, idx)
 29 | 
 30 |     # Write entries to a file.
 31 |     def writeFile(self, filename):
 32 |         with open(filename, 'w') as file:
 33 |             for i in range(self.size()):
 34 |                 label = self.idxToLabel[i]
 35 |                 file.write('%s %d\n' % (label, i))
 36 | 
 37 |         file.close()
 38 | 
 39 |     def lookup(self, key, default=None):
 40 |         try:
 41 |             return self.labelToIdx[key]
 42 |         except KeyError:
 43 |             return default
 44 | 
 45 |     def getLabel(self, idx, default=None):
 46 |         try:
 47 |             return self.idxToLabel[idx]
 48 |         except KeyError:
 49 |             return default
 50 | 
 51 |     # Mark this `label` and `idx` as special (i.e. will not be pruned).
 52 |     def addSpecial(self, label, idx=None):
 53 |         idx = self.add(label, idx)
 54 |         self.special += [idx]
 55 | 
 56 |     # Mark all labels in `labels` as specials (i.e. will not be pruned).
 57 |     def addSpecials(self, labels):
 58 |         for label in labels:
 59 |             self.addSpecial(label)
 60 | 
 61 |     # Add `label` in the dictionary. Use `idx` as its index if given.
 62 |     def add(self, label, idx=None):
 63 |         if idx is not None:
 64 |             self.idxToLabel[idx] = label
 65 |             self.labelToIdx[label] = idx
 66 |         else:
 67 |             if label in self.labelToIdx:
 68 |                 idx = self.labelToIdx[label]
 69 |             else:
 70 |                 idx = len(self.idxToLabel)
 71 |                 self.idxToLabel[idx] = label
 72 |                 self.labelToIdx[label] = idx
 73 | 
 74 |         if idx not in self.frequencies:
 75 |             self.frequencies[idx] = 1
 76 |         else:
 77 |             self.frequencies[idx] += 1
 78 | 
 79 |         return idx
 80 | 
 81 |     # Return a new dictionary with the `size` most frequent entries.
 82 |     def prune(self, size):
 83 |         if size >= self.size():
 84 |             return self
 85 | 
 86 |         # Only keep the `size` most frequent entries.
 87 |         freq = torch.Tensor(
 88 |                 [self.frequencies[i] for i in range(len(self.frequencies))])
 89 |         _, idx = torch.sort(freq, 0, True)
 90 | 
 91 |         newDict = Dict()
 92 | 
 93 |         # Add special entries in all cases.
 94 |         for i in self.special:
 95 |             newDict.addSpecial(self.idxToLabel[i])
 96 | 
 97 |         for i in idx[:size]:
 98 |             newDict.add(self.idxToLabel[i])
 99 | 
100 |         return newDict
101 | 
102 |     # Convert `labels` to indices. Use `unkWord` if not found.
103 |     # Optionally insert `bosWord` at the beginning and `eosWord` at the .
104 |     def convertToIdx(self, labels, unkWord, bosWord=None, eosWord=None):
105 |         vec = []
106 | 
107 |         if bosWord is not None:
108 |             vec += [self.lookup(bosWord)]
109 | 
110 |         unk = self.lookup(unkWord)
111 |         vec += [self.lookup(label, default=unk) for label in labels]
112 | 
113 |         if eosWord is not None:
114 |             vec += [self.lookup(eosWord)]
115 | 
116 |         return torch.LongTensor(vec)
117 | 
118 |     # Convert `idx` to labels. If index `stop` is reached, convert it and return.
119 |     def convertToLabels(self, idx, stop):
120 |         labels = []
121 | 
122 |         for i in idx:
123 |             labels += [self.getLabel(i)]
124 |             if i == stop:
125 |                 break
126 | 
127 |         return labels
128 | 


--------------------------------------------------------------------------------
/OpenNMT/translate.py:
--------------------------------------------------------------------------------
  1 | import onmt
  2 | import torch
  3 | import argparse
  4 | import math
  5 | 
  6 | parser = argparse.ArgumentParser(description='translate.py')
  7 | 
  8 | parser.add_argument('-model', required=True,
  9 |                     help='Path to model .pt file')
 10 | parser.add_argument('-src',   required=True,
 11 |                     help='Source sequence to decode (one line per sequence)')
 12 | parser.add_argument('-tgt',
 13 |                     help='True target sequence (optional)')
 14 | parser.add_argument('-output', default='pred.txt',
 15 |                     help="""Path to output the predictions (each line will
 16 |                     be the decoded sequence""")
 17 | parser.add_argument('-beam_size',  type=int, default=5,
 18 |                     help='Beam size')
 19 | parser.add_argument('-batch_size', type=int, default=30,
 20 |                     help='Batch size')
 21 | parser.add_argument('-max_sent_length', default=100,
 22 |                     help='Maximum sentence length.')
 23 | parser.add_argument('-replace_unk', action="store_true",
 24 |                     help="""Replace the generated UNK tokens with the source
 25 |                     token that had the highest attention weight. If phrase_table
 26 |                     is provided, it will lookup the identified source token and
 27 |                     give the corresponding target token. If it is not provided
 28 |                     (or the identified source token does not exist in the
 29 |                     table) then it will copy the source token""")
 30 | # parser.add_argument('-phrase_table',
 31 | #                     help="""Path to source-target dictionary to replace UNK
 32 | #                     tokens. See README.md for the format of this file.""")
 33 | parser.add_argument('-verbose', action="store_true",
 34 |                     help='Print scores and predictions for each sentence')
 35 | parser.add_argument('-n_best', type=int, default=1,
 36 |                     help="""If verbose is set, will output the n_best
 37 |                     decoded sentences""")
 38 | 
 39 | parser.add_argument('-gpu', type=int, default=-1,
 40 |                     help="Device to run on")
 41 | 
 42 | 
 43 | 
 44 | def reportScore(name, scoreTotal, wordsTotal):
 45 |     print("%s AVG SCORE: %.4f, %s PPL: %.4f" % (
 46 |         name, scoreTotal / wordsTotal,
 47 |         name, math.exp(-scoreTotal/wordsTotal)))
 48 | 
 49 | 
 50 | def main():
 51 |     opt = parser.parse_args()
 52 |     opt.cuda = opt.gpu > -1
 53 |     torch.cuda.set_device(opt.gpu)
 54 | 
 55 |     translator = onmt.Translator(opt)
 56 | 
 57 |     outF = open(opt.output, 'w')
 58 | 
 59 |     predScoreTotal, predWordsTotal, goldScoreTotal, goldWordsTotal = 0, 0, 0, 0
 60 | 
 61 |     srcBatch, tgtBatch = [], []
 62 | 
 63 |     count = 0
 64 | 
 65 |     tgtF = open(opt.tgt) if opt.tgt else None
 66 |     for line in open(opt.src):
 67 | 
 68 |         srcTokens = line.split()
 69 |         srcBatch += [srcTokens]
 70 |         if tgtF:
 71 |             tgtTokens = tgtF.readline().split() if tgtF else None
 72 |             tgtBatch += [tgtTokens]
 73 | 
 74 |         if len(srcBatch) < opt.batch_size:
 75 |             continue
 76 | 
 77 |         predBatch, predScore, goldScore = translator.translate(srcBatch, tgtBatch)
 78 | 
 79 |         predScoreTotal += sum(score[0] for score in predScore)
 80 |         predWordsTotal += sum(len(x) for x in predBatch)
 81 |         if tgtF is not None:
 82 |             goldScoreTotal += sum(goldScore)
 83 |             goldWordsTotal += sum(len(x) for x in tgtBatch)
 84 | 
 85 |         for b in range(len(predBatch)):
 86 |             count += 1
 87 |             outF.write(" ".join(predBatch[b][0]) + '\n')
 88 | 
 89 |             if opt.verbose:
 90 |                 print('SENT %d: %s' % (count, " ".join(srcBatch[b])))
 91 |                 print('PRED %d: %s' % (count, " ".join(predBatch[b][0])))
 92 |                 print("PRED SCORE: %.4f" % predScore[b][0])
 93 | 
 94 |                 if tgtF is not None:
 95 |                     print('GOLD %d: %s ' % (count, " ".join(tgtBatch[b])))
 96 |                     print("GOLD SCORE: %.4f" % goldScore[b])
 97 | 
 98 |                 if opt.n_best > 1:
 99 |                     print('\nBEST HYP:')
100 |                     for n in range(opt.n_best):
101 |                         print("[%.4f] %s" % (predScore[b][n], " ".join(predBatch[b][0])))
102 | 
103 |                 print('')
104 | 
105 |         srcBatch, tgtBatch = [], []
106 | 
107 |     reportScore('PRED', predScoreTotal, predWordsTotal)
108 |     if tgtF:
109 |         reportScore('GOLD', goldScoreTotal, goldWordsTotal)
110 | 
111 |     if tgtF:
112 |         tgtF.close()
113 | 
114 | 
115 | if __name__ == "__main__":
116 |     main()
117 | 


--------------------------------------------------------------------------------
/snli/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import glob
  4 | 
  5 | import torch
  6 | import torch.optim as O
  7 | import torch.nn as nn
  8 | 
  9 | from torchtext import data
 10 | from torchtext import datasets
 11 | 
 12 | from model import SNLIClassifier
 13 | from util import get_args
 14 | 
 15 | 
 16 | args = get_args()
 17 | torch.cuda.set_device(args.gpu)
 18 | 
 19 | inputs = data.Field(lower=args.lower)
 20 | answers = data.Field(sequential=False)
 21 | 
 22 | train, dev, test = datasets.SNLI.splits(inputs, answers)
 23 | 
 24 | inputs.build_vocab(train, dev, test)
 25 | if args.word_vectors:
 26 |     if os.path.isfile(args.vector_cache):
 27 |         inputs.vocab.vectors = torch.load(args.vector_cache)
 28 |     else:
 29 |         inputs.vocab.load_vectors(wv_dir=args.data_cache, wv_type=args.word_vectors, wv_dim=args.d_embed)
 30 |         os.makedirs(os.path.dirname(args.vector_cache), exist_ok=True)
 31 |         torch.save(inputs.vocab.vectors, args.vector_cache)
 32 | answers.build_vocab(train)
 33 | 
 34 | train_iter, dev_iter, test_iter = data.BucketIterator.splits(
 35 |             (train, dev, test), batch_size=args.batch_size, device=args.gpu)
 36 | 
 37 | config = args
 38 | config.n_embed = len(inputs.vocab)
 39 | config.d_out = len(answers.vocab)
 40 | config.n_cells = config.n_layers
 41 | if config.birnn:
 42 |     config.n_cells *= 2
 43 | 
 44 | if args.resume_snapshot:
 45 |     model = torch.load(args.resume_snapshot, map_location=lambda storage, locatoin: storage.cuda(args.gpu))
 46 | else:
 47 |     model = SNLIClassifier(config)
 48 |     if args.word_vectors:
 49 |         model.embed.weight.data = inputs.vocab.vectors
 50 |         model.cuda()
 51 | 
 52 | criterion = nn.CrossEntropyLoss()
 53 | opt = O.Adam(model.parameters(), lr=args.lr)
 54 | 
 55 | iterations = 0
 56 | start = time.time()
 57 | best_dev_acc = -1
 58 | train_iter.repeat = False
 59 | header = '  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss     Accuracy  Dev/Accuracy'
 60 | dev_log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:8.6f},{:12.4f},{:12.4f}'.split(','))
 61 | log_template =     ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{},{:12.4f},{}'.split(','))
 62 | os.makedirs(args.save_path, exist_ok=True)
 63 | print(header)
 64 | 
 65 | for epoch in range(args.epochs):
 66 |     train_iter.init_epoch()
 67 |     n_correct, n_total = 0, 0
 68 |     for batch_idx, batch in enumerate(train_iter):
 69 |         model.train(); opt.zero_grad()
 70 |         iterations += 1
 71 |         answer = model(batch)
 72 |         n_correct += (torch.max(answer, 1)[1].view(batch.label.size()).data == batch.label.data).sum()
 73 |         n_total += batch.batch_size
 74 |         train_acc = 100. * n_correct/n_total
 75 |         loss = criterion(answer, batch.label)
 76 |         loss.backward(); opt.step()
 77 |         if iterations % args.save_every == 0:
 78 |             snapshot_prefix = os.path.join(args.save_path, 'snapshot')
 79 |             snapshot_path = snapshot_prefix + '_acc_{:.4f}_loss_{:.6f}_iter_{}_model.pt'.format(train_acc, loss.data[0], iterations)
 80 |             torch.save(model, snapshot_path)
 81 |             for f in glob.glob(snapshot_prefix + '*'):
 82 |                 if f != snapshot_path:
 83 |                     os.remove(f)
 84 |         if iterations % args.dev_every == 0:
 85 |             model.eval(); dev_iter.init_epoch()
 86 |             n_dev_correct, dev_loss = 0, 0
 87 |             for dev_batch_idx, dev_batch in enumerate(dev_iter):
 88 |                  answer = model(dev_batch)
 89 |                  n_dev_correct += (torch.max(answer, 1)[1].view(dev_batch.label.size()).data == dev_batch.label.data).sum()
 90 |                  dev_loss = criterion(answer, dev_batch.label)
 91 |             dev_acc = 100. * n_dev_correct / len(dev)
 92 |             print(dev_log_template.format(time.time()-start,
 93 |                 epoch, iterations, 1+batch_idx, len(train_iter),
 94 |                 100. * (1+batch_idx) / len(train_iter), loss.data[0], dev_loss.data[0], train_acc, dev_acc))
 95 |             if dev_acc > best_dev_acc:
 96 |                 best_dev_acc = dev_acc
 97 |                 snapshot_prefix = os.path.join(args.save_path, 'best_snapshot')
 98 |                 snapshot_path = snapshot_prefix + '_devacc_{}_devloss_{}__iter_{}_model.pt'.format(dev_acc, dev_loss.data[0], iterations)
 99 |                 torch.save(model, snapshot_path)
100 |                 for f in glob.glob(snapshot_prefix + '*'):
101 |                     if f != snapshot_path:
102 |                         os.remove(f)
103 |         elif iterations % args.log_every == 0:
104 |             print(log_template.format(time.time()-start,
105 |                 epoch, iterations, 1+batch_idx, len(train_iter),
106 |                 100. * (1+batch_idx) / len(train_iter), loss.data[0], ' '*8, n_correct/n_total*100, ' '*12))
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/mnist/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import torch.optim as optim
  7 | from torchvision import datasets, transforms
  8 | from torch.autograd import Variable
  9 | 
 10 | # Training settings
 11 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 12 | parser.add_argument('--batch-size', type=int, default=64, metavar='N',
 13 |                     help='input batch size for training (default: 64)')
 14 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
 15 |                     help='input batch size for testing (default: 1000)')
 16 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
 17 |                     help='number of epochs to train (default: 10)')
 18 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
 19 |                     help='learning rate (default: 0.01)')
 20 | parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
 21 |                     help='SGD momentum (default: 0.5)')
 22 | parser.add_argument('--no-cuda', action='store_true', default=False,
 23 |                     help='enables CUDA training')
 24 | parser.add_argument('--seed', type=int, default=1, metavar='S',
 25 |                     help='random seed (default: 1)')
 26 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
 27 |                     help='how many batches to wait before logging training status')
 28 | args = parser.parse_args()
 29 | args.cuda = not args.no_cuda and torch.cuda.is_available()
 30 | 
 31 | torch.manual_seed(args.seed)
 32 | if args.cuda:
 33 |     torch.cuda.manual_seed(args.seed)
 34 | 
 35 | 
 36 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
 37 | train_loader = torch.utils.data.DataLoader(
 38 |     datasets.MNIST('../data', train=True, download=True,
 39 |                    transform=transforms.Compose([
 40 |                        transforms.ToTensor(),
 41 |                        transforms.Normalize((0.1307,), (0.3081,))
 42 |                    ])),
 43 |     batch_size=args.batch_size, shuffle=True, **kwargs)
 44 | test_loader = torch.utils.data.DataLoader(
 45 |     datasets.MNIST('../data', train=False, transform=transforms.Compose([
 46 |                        transforms.ToTensor(),
 47 |                        transforms.Normalize((0.1307,), (0.3081,))
 48 |                    ])),
 49 |     batch_size=args.batch_size, shuffle=True, **kwargs)
 50 | 
 51 | 
 52 | class Net(nn.Module):
 53 |     def __init__(self):
 54 |         super(Net, self).__init__()
 55 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
 56 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
 57 |         self.conv2_drop = nn.Dropout2d()
 58 |         self.fc1 = nn.Linear(320, 50)
 59 |         self.fc2 = nn.Linear(50, 10)
 60 | 
 61 |     def forward(self, x):
 62 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
 63 |         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
 64 |         x = x.view(-1, 320)
 65 |         x = F.relu(self.fc1(x))
 66 |         x = F.dropout(x, training=self.training)
 67 |         x = F.relu(self.fc2(x))
 68 |         return F.log_softmax(x)
 69 | 
 70 | model = Net()
 71 | if args.cuda:
 72 |     model.cuda()
 73 | 
 74 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
 75 | 
 76 | def train(epoch):
 77 |     model.train()
 78 |     for batch_idx, (data, target) in enumerate(train_loader):
 79 |         if args.cuda:
 80 |             data, target = data.cuda(), target.cuda()
 81 |         data, target = Variable(data), Variable(target)
 82 |         optimizer.zero_grad()
 83 |         output = model(data)
 84 |         loss = F.nll_loss(output, target)
 85 |         loss.backward()
 86 |         optimizer.step()
 87 |         if batch_idx % args.log_interval == 0:
 88 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
 89 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
 90 |                 100. * batch_idx / len(train_loader), loss.data[0]))
 91 | 
 92 | def test(epoch):
 93 |     model.eval()
 94 |     test_loss = 0
 95 |     correct = 0
 96 |     for data, target in test_loader:
 97 |         if args.cuda:
 98 |             data, target = data.cuda(), target.cuda()
 99 |         data, target = Variable(data, volatile=True), Variable(target)
100 |         output = model(data)
101 |         test_loss += F.nll_loss(output, target).data[0]
102 |         pred = output.data.max(1)[1] # get the index of the max log-probability
103 |         correct += pred.eq(target.data).cpu().sum()
104 | 
105 |     test_loss = test_loss
106 |     test_loss /= len(test_loader) # loss function already averages over batch size
107 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
108 |         test_loss, correct, len(test_loader.dataset),
109 |         100. * correct / len(test_loader.dataset)))
110 | 
111 | 
112 | for epoch in range(1, args.epochs + 1):
113 |     train(epoch)
114 |     test(epoch)
115 | 


--------------------------------------------------------------------------------
/vae/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.utils.data
  5 | import torch.nn as nn
  6 | import torch.optim as optim
  7 | from torch.autograd import Variable
  8 | from torchvision import datasets, transforms
  9 | 
 10 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 11 | parser.add_argument('--batch-size', type=int, default=128, metavar='N',
 12 |                     help='input batch size for training (default: 64)')
 13 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
 14 |                     help='number of epochs to train (default: 2)')
 15 | parser.add_argument('--no-cuda', action='store_true', default=False,
 16 |                     help='enables CUDA training')
 17 | parser.add_argument('--seed', type=int, default=1, metavar='S',
 18 |                     help='random seed (default: 1)')
 19 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
 20 |                     help='how many batches to wait before logging training status')
 21 | args = parser.parse_args()
 22 | args.cuda = not args.no_cuda and torch.cuda.is_available()
 23 | 
 24 | 
 25 | torch.manual_seed(args.seed)
 26 | if args.cuda:
 27 |     torch.cuda.manual_seed(args.seed)
 28 | 
 29 | 
 30 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
 31 | train_loader = torch.utils.data.DataLoader(
 32 |     datasets.MNIST('../data', train=True, download=True,
 33 |                    transform=transforms.ToTensor()),
 34 |     batch_size=args.batch_size, shuffle=True, **kwargs)
 35 | test_loader = torch.utils.data.DataLoader(
 36 |     datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
 37 |     batch_size=args.batch_size, shuffle=True, **kwargs)
 38 | 
 39 | 
 40 | class VAE(nn.Module):
 41 |     def __init__(self):
 42 |         super(VAE, self).__init__()
 43 | 
 44 |         self.fc1 = nn.Linear(784, 400)
 45 |         self.fc21 = nn.Linear(400, 20)
 46 |         self.fc22 = nn.Linear(400, 20)
 47 |         self.fc3 = nn.Linear(20, 400)
 48 |         self.fc4 = nn.Linear(400, 784)
 49 | 
 50 |         self.relu = nn.ReLU()
 51 |         self.sigmoid = nn.Sigmoid()
 52 | 
 53 |     def encode(self, x):
 54 |         h1 = self.relu(self.fc1(x))
 55 |         return self.fc21(h1), self.fc22(h1)
 56 | 
 57 |     def reparametrize(self, mu, logvar):
 58 |         std = logvar.mul(0.5).exp_()
 59 |         if args.cuda:
 60 |             eps = torch.cuda.FloatTensor(std.size()).normal_()
 61 |         else:
 62 |             eps = torch.FloatTensor(std.size()).normal_()
 63 |         eps = Variable(eps)
 64 |         return eps.mul(std).add_(mu)
 65 | 
 66 |     def decode(self, z):
 67 |         h3 = self.relu(self.fc3(z))
 68 |         return self.sigmoid(self.fc4(h3))
 69 | 
 70 |     def forward(self, x):
 71 |         mu, logvar = self.encode(x.view(-1, 784))
 72 |         z = self.reparametrize(mu, logvar)
 73 |         return self.decode(z), mu, logvar
 74 | 
 75 | 
 76 | model = VAE()
 77 | if args.cuda:
 78 |     model.cuda()
 79 | 
 80 | reconstruction_function = nn.BCELoss()
 81 | reconstruction_function.size_average = False
 82 | 
 83 | 
 84 | def loss_function(recon_x, x, mu, logvar):
 85 |     BCE = reconstruction_function(recon_x, x)
 86 | 
 87 |     # see Appendix B from VAE paper:
 88 |     # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
 89 |     # https://arxiv.org/abs/1312.6114
 90 |     # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
 91 |     KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
 92 |     KLD = torch.sum(KLD_element).mul_(-0.5)
 93 | 
 94 |     return BCE + KLD
 95 | 
 96 | 
 97 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
 98 | 
 99 | 
100 | def train(epoch):
101 |     model.train()
102 |     train_loss = 0
103 |     for batch_idx, (data, _) in enumerate(train_loader):
104 |         data = Variable(data)
105 |         if args.cuda:
106 |             data = data.cuda()
107 |         optimizer.zero_grad()
108 |         recon_batch, mu, logvar = model(data)
109 |         loss = loss_function(recon_batch, data, mu, logvar)
110 |         loss.backward()
111 |         train_loss += loss.data[0]
112 |         optimizer.step()
113 |         if batch_idx % args.log_interval == 0:
114 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
115 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
116 |                 100. * batch_idx / len(train_loader),
117 |                 loss.data[0] / len(data)))
118 | 
119 |     print('====> Epoch: {} Average loss: {:.4f}'.format(
120 |           epoch, train_loss / len(train_loader.dataset)))
121 | 
122 | 
123 | def test(epoch):
124 |     model.eval()
125 |     test_loss = 0
126 |     for data, _ in test_loader:
127 |         if args.cuda:
128 |             data = data.cuda()
129 |         data = Variable(data, volatile=True)
130 |         recon_batch, mu, logvar = model(data)
131 |         test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
132 | 
133 |     test_loss /= len(test_loader.dataset)
134 |     print('====> Test set loss: {:.4f}'.format(test_loss))
135 | 
136 | 
137 | for epoch in range(1, args.epochs + 1):
138 |     train(epoch)
139 |     test(epoch)
140 | 


--------------------------------------------------------------------------------
/OpenNMT/onmt/Models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | import onmt.modules
  5 | 
  6 | class Encoder(nn.Module):
  7 | 
  8 |     def __init__(self, opt, dicts):
  9 |         self.layers = opt.layers
 10 |         self.num_directions = 2 if opt.brnn else 1
 11 |         assert opt.rnn_size % self.num_directions == 0
 12 |         self.hidden_size = opt.rnn_size // self.num_directions
 13 |         inputSize = opt.word_vec_size
 14 | 
 15 |         super(Encoder, self).__init__()
 16 |         self.word_lut = nn.Embedding(dicts.size(),
 17 |                                   opt.word_vec_size,
 18 |                                   padding_idx=onmt.Constants.PAD)
 19 |         self.rnn = nn.LSTM(inputSize, self.hidden_size,
 20 |                         num_layers=opt.layers,
 21 |                         dropout=opt.dropout,
 22 |                         bidirectional=opt.brnn)
 23 | 
 24 |         # self.rnn.bias_ih_l0.data.div_(2)
 25 |         # self.rnn.bias_hh_l0.data.copy_(self.rnn.bias_ih_l0.data)
 26 | 
 27 |         if opt.pre_word_vecs_enc is not None:
 28 |             pretrained = torch.load(opt.pre_word_vecs_enc)
 29 |             self.word_lut.weight.copy_(pretrained)
 30 | 
 31 |     def forward(self, input, hidden=None):
 32 |         batch_size = input.size(0) # batch first for multi-gpu compatibility
 33 |         emb = self.word_lut(input).transpose(0, 1)
 34 |         if hidden is None:
 35 |             h_size = (self.layers * self.num_directions, batch_size, self.hidden_size)
 36 |             h_0 = Variable(emb.data.new(*h_size).zero_(), requires_grad=False)
 37 |             c_0 = Variable(emb.data.new(*h_size).zero_(), requires_grad=False)
 38 |             hidden = (h_0, c_0)
 39 | 
 40 |         outputs, hidden_t = self.rnn(emb, hidden)
 41 |         return hidden_t, outputs
 42 | 
 43 | 
 44 | class StackedLSTM(nn.Module):
 45 |     def __init__(self, num_layers, input_size, rnn_size, dropout):
 46 |         super(StackedLSTM, self).__init__()
 47 |         self.dropout = nn.Dropout(dropout)
 48 |         self.num_layers = num_layers
 49 | 
 50 |         for i in range(num_layers):
 51 |             layer = nn.LSTMCell(input_size, rnn_size)
 52 |             self.add_module('layer_%d' % i, layer)
 53 |             input_size = rnn_size
 54 | 
 55 |     def forward(self, input, hidden):
 56 |         h_0, c_0 = hidden
 57 |         h_1, c_1 = [], []
 58 |         for i in range(self.num_layers):
 59 |             layer = getattr(self, 'layer_%d' % i)
 60 |             h_1_i, c_1_i = layer(input, (h_0[i], c_0[i]))
 61 |             input = h_1_i
 62 |             if i != self.num_layers:
 63 |                 input = self.dropout(input)
 64 |             h_1 += [h_1_i]
 65 |             c_1 += [c_1_i]
 66 | 
 67 |         h_1 = torch.stack(h_1)
 68 |         c_1 = torch.stack(c_1)
 69 | 
 70 |         return input, (h_1, c_1)
 71 | 
 72 | 
 73 | class Decoder(nn.Module):
 74 | 
 75 |     def __init__(self, opt, dicts):
 76 |         self.layers = opt.layers
 77 |         self.input_feed = opt.input_feed
 78 |         input_size = opt.word_vec_size
 79 |         if self.input_feed:
 80 |             input_size += opt.rnn_size
 81 | 
 82 |         super(Decoder, self).__init__()
 83 |         self.word_lut = nn.Embedding(dicts.size(),
 84 |                                   opt.word_vec_size,
 85 |                                   padding_idx=onmt.Constants.PAD)
 86 |         self.rnn = StackedLSTM(opt.layers, input_size, opt.rnn_size, opt.dropout)
 87 |         self.attn = onmt.modules.GlobalAttention(opt.rnn_size)
 88 |         self.dropout = nn.Dropout(opt.dropout)
 89 | 
 90 |         # self.rnn.bias_ih.data.div_(2)
 91 |         # self.rnn.bias_hh.data.copy_(self.rnn.bias_ih.data)
 92 | 
 93 |         self.hidden_size = opt.rnn_size
 94 | 
 95 |         if opt.pre_word_vecs_enc is not None:
 96 |             pretrained = torch.load(opt.pre_word_vecs_dec)
 97 |             self.word_lut.weight.copy_(pretrained)
 98 | 
 99 | 
100 |     def forward(self, input, hidden, context, init_output):
101 |         emb = self.word_lut(input).transpose(0, 1)
102 | 
103 |         batch_size = input.size(0)
104 | 
105 |         h_size = (batch_size, self.hidden_size)
106 |         output = Variable(emb.data.new(*h_size).zero_(), requires_grad=False)
107 | 
108 |         # n.b. you can increase performance if you compute W_ih * x for all
109 |         # iterations in parallel, but that's only possible if
110 |         # self.input_feed=False
111 |         outputs = []
112 |         output = init_output
113 |         for i, emb_t in enumerate(emb.chunk(emb.size(0), dim=0)):
114 |             emb_t = emb_t.squeeze(0)
115 |             if self.input_feed:
116 |                 emb_t = torch.cat([emb_t, output], 1)
117 | 
118 |             output, h = self.rnn(emb_t, hidden)
119 |             output, attn = self.attn(output, context.t())
120 |             output = self.dropout(output)
121 |             outputs += [output]
122 | 
123 |         outputs = torch.stack(outputs)
124 |         return outputs.transpose(0, 1), h, attn
125 | 
126 | 
127 | class NMTModel(nn.Module):
128 | 
129 |     def __init__(self, encoder, decoder, generator):
130 |         super(NMTModel, self).__init__()
131 |         self.encoder = encoder
132 |         self.decoder = decoder
133 |         self.generator = generator
134 |         self.generate = False
135 | 
136 |     def set_generate(self, enabled):
137 |         self.generate = enabled
138 | 
139 |     def make_init_decoder_output(self, context):
140 |         batch_size = context.size(1)
141 |         h_size = (batch_size, self.decoder.hidden_size)
142 |         return Variable(context.data.new(*h_size).zero_(), requires_grad=False)
143 | 
144 |     def _fix_enc_hidden(self, h):
145 |         #  the encoder hidden is  (layers*directions) x batch x dim
146 |         #  we need to convert it to layers x batch x (directions*dim)
147 |         if self.encoder.num_directions == 2:
148 |             return h.view(h.size(0) // 2, 2, h.size(1), h.size(2)) \
149 |                     .transpose(1, 2).contiguous() \
150 |                     .view(h.size(0) // 2, h.size(1), h.size(2) * 2)
151 |         else:
152 |             return h
153 | 
154 |     def forward(self, input):
155 |         src = input[0]
156 |         tgt = input[1][:, :-1]  # exclude last target from inputs
157 |         enc_hidden, context = self.encoder(src)
158 |         init_output = self.make_init_decoder_output(context)
159 | 
160 |         enc_hidden = (self._fix_enc_hidden(enc_hidden[0]),
161 |                       self._fix_enc_hidden(enc_hidden[1]))
162 | 
163 |         out, dec_hidden, _attn = self.decoder(tgt, enc_hidden, context, init_output)
164 |         if self.generate:
165 |             out = self.generator(out)
166 | 
167 |         return out
168 | 


--------------------------------------------------------------------------------
/OpenNMT/preprocess.py:
--------------------------------------------------------------------------------
  1 | import onmt
  2 | 
  3 | import argparse
  4 | import torch
  5 | 
  6 | parser = argparse.ArgumentParser(description='preprocess.lua')
  7 | 
  8 | ##
  9 | ## **Preprocess Options**
 10 | ##
 11 | 
 12 | parser.add_argument('-config',    help="Read options from this file")
 13 | 
 14 | parser.add_argument('-train_src', required=True,
 15 |                     help="Path to the training source data")
 16 | parser.add_argument('-train_tgt', required=True,
 17 |                     help="Path to the training target data")
 18 | parser.add_argument('-valid_src', required=True,
 19 |                     help="Path to the validation source data")
 20 | parser.add_argument('-valid_tgt', required=True,
 21 |                      help="Path to the validation target data")
 22 | 
 23 | parser.add_argument('-save_data', required=True,
 24 |                     help="Output file for the prepared data")
 25 | 
 26 | parser.add_argument('-src_vocab_size', type=int, default=50000,
 27 |                     help="Size of the source vocabulary")
 28 | parser.add_argument('-tgt_vocab_size', type=int, default=50000,
 29 |                     help="Size of the target vocabulary")
 30 | parser.add_argument('-src_vocab',
 31 |                     help="Path to an existing source vocabulary")
 32 | parser.add_argument('-tgt_vocab',
 33 |                     help="Path to an existing target vocabulary")
 34 | 
 35 | 
 36 | parser.add_argument('-seq_length', type=int, default=50,
 37 |                     help="Maximum sequence length")
 38 | parser.add_argument('-shuffle',    type=int, default=1,
 39 |                     help="Shuffle data")
 40 | parser.add_argument('-seed',       type=int, default=3435,
 41 |                     help="Random seed")
 42 | 
 43 | parser.add_argument('-report_every', type=int, default=100000,
 44 |                     help="Report status every this many sentences")
 45 | 
 46 | opt = parser.parse_args()
 47 | 
 48 | 
 49 | def makeVocabulary(filename, size):
 50 |     vocab = onmt.Dict([onmt.Constants.PAD_WORD, onmt.Constants.UNK_WORD,
 51 |                        onmt.Constants.BOS_WORD, onmt.Constants.EOS_WORD])
 52 | 
 53 |     with open(filename) as f:
 54 |         for sent in f.readlines():
 55 |             for word in sent.split():
 56 |                 vocab.add(word)
 57 | 
 58 |     originalSize = vocab.size()
 59 |     vocab = vocab.prune(size)
 60 |     print('Created dictionary of size %d (pruned from %d)' %
 61 |           (vocab.size(), originalSize))
 62 | 
 63 |     return vocab
 64 | 
 65 | 
 66 | def initVocabulary(name, dataFile, vocabFile, vocabSize):
 67 | 
 68 |     vocab = None
 69 |     if vocabFile is not None:
 70 |         # If given, load existing word dictionary.
 71 |         print('Reading ' + name + ' vocabulary from \'' + vocabFile + '\'...')
 72 |         vocab = onmt.Dict()
 73 |         vocab.loadFile(vocabFile)
 74 |         print('Loaded ' + vocab.size() + ' ' + name + ' words')
 75 | 
 76 |     if vocab is None:
 77 |         # If a dictionary is still missing, generate it.
 78 |         print('Building ' + name + ' vocabulary...')
 79 |         genWordVocab = makeVocabulary(dataFile, vocabSize)
 80 | 
 81 |         vocab = genWordVocab
 82 | 
 83 |     print()
 84 |     return vocab
 85 | 
 86 | 
 87 | def saveVocabulary(name, vocab, file):
 88 |     print('Saving ' + name + ' vocabulary to \'' + file + '\'...')
 89 |     vocab.writeFile(file)
 90 | 
 91 | 
 92 | def makeData(srcFile, tgtFile, srcDicts, tgtDicts):
 93 |     src, tgt = [], []
 94 |     sizes = []
 95 |     count, ignored = 0, 0
 96 | 
 97 |     print('Processing %s & %s ...' % (srcFile, tgtFile))
 98 |     srcF = open(srcFile)
 99 |     tgtF = open(tgtFile)
100 | 
101 |     while True:
102 |         srcWords = srcF.readline().split()
103 |         tgtWords = tgtF.readline().split()
104 | 
105 |         if not srcWords or not tgtWords:
106 |             if srcWords and not tgtWords or not srcWords and tgtWords:
107 |                 print('WARNING: source and target do not have the same number of sentences')
108 |             break
109 | 
110 |         if len(srcWords) <= opt.seq_length and len(tgtWords) <= opt.seq_length:
111 | 
112 |             src += [srcDicts.convertToIdx(srcWords,
113 |                                           onmt.Constants.UNK_WORD)]
114 |             tgt += [tgtDicts.convertToIdx(tgtWords,
115 |                                           onmt.Constants.UNK_WORD,
116 |                                           onmt.Constants.BOS_WORD,
117 |                                           onmt.Constants.EOS_WORD)]
118 | 
119 |             sizes += [len(srcWords)]
120 |         else:
121 |             ignored += 1
122 | 
123 |         count += 1
124 | 
125 |         if count % opt.report_every == 0:
126 |             print('... %d sentences prepared' % count)
127 | 
128 |     srcF.close()
129 |     tgtF.close()
130 | 
131 |     if opt.shuffle == 1:
132 |         print('... shuffling sentences')
133 |         perm = torch.randperm(len(src))
134 |         src = [src[idx] for idx in perm]
135 |         tgt = [tgt[idx] for idx in perm]
136 |         sizes = [sizes[idx] for idx in perm]
137 | 
138 |     print('... sorting sentences by size')
139 |     _, perm = torch.sort(torch.Tensor(sizes))
140 |     src = [src[idx] for idx in perm]
141 |     tgt = [tgt[idx] for idx in perm]
142 | 
143 |     print('Prepared %d sentences (%d ignored due to length == 0 or > %d)' %
144 |           (len(src), ignored, opt.seq_length))
145 | 
146 |     return src, tgt
147 | 
148 | 
149 | def main():
150 | 
151 |     dicts = {}
152 |     dicts['src'] = initVocabulary('source', opt.train_src, opt.src_vocab,
153 |                                   opt.src_vocab_size)
154 |     dicts['tgt'] = initVocabulary('target', opt.train_tgt, opt.tgt_vocab,
155 |                                   opt.tgt_vocab_size)
156 | 
157 |     print('Preparing training ...')
158 |     train = {}
159 |     train['src'], train['tgt'] = makeData(opt.train_src, opt.train_tgt,
160 |                                           dicts['src'], dicts['tgt'])
161 | 
162 |     print('Preparing validation ...')
163 |     valid = {}
164 |     valid['src'], valid['tgt'] = makeData(opt.valid_src, opt.valid_tgt,
165 |                                     dicts['src'], dicts['tgt'])
166 | 
167 |     if opt.src_vocab is None:
168 |         saveVocabulary('source', dicts['src'], opt.save_data + '.src.dict')
169 |     if opt.tgt_vocab is None:
170 |         saveVocabulary('target', dicts['tgt'], opt.save_data + '.tgt.dict')
171 | 
172 | 
173 |     print('Saving data to \'' + opt.save_data + '-train.pt\'...')
174 |     save_data = {'dicts': dicts,
175 |                  'train': train,
176 |                  'valid': valid}
177 |     torch.save(save_data, opt.save_data + '-train.pt')
178 | 
179 | 
180 | if __name__ == "__main__":
181 |     main()
182 | 


--------------------------------------------------------------------------------
/word_language_model/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | import math
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | 
  8 | import data
  9 | import model
 10 | 
 11 | parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model')
 12 | parser.add_argument('--data', type=str, default='./data/penn',
 13 |                     help='location of the data corpus')
 14 | parser.add_argument('--model', type=str, default='LSTM',
 15 |                     help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)')
 16 | parser.add_argument('--emsize', type=int, default=200,
 17 |                     help='size of word embeddings')
 18 | parser.add_argument('--nhid', type=int, default=200,
 19 |                     help='humber of hidden units per layer')
 20 | parser.add_argument('--nlayers', type=int, default=2,
 21 |                     help='number of layers')
 22 | parser.add_argument('--lr', type=float, default=20,
 23 |                     help='initial learning rate')
 24 | parser.add_argument('--clip', type=float, default=0.5,
 25 |                     help='gradient clipping')
 26 | parser.add_argument('--epochs', type=int, default=6,
 27 |                     help='upper epoch limit')
 28 | parser.add_argument('--batch-size', type=int, default=20, metavar='N',
 29 |                     help='batch size')
 30 | parser.add_argument('--bptt', type=int, default=20,
 31 |                     help='sequence length')
 32 | parser.add_argument('--seed', type=int, default=1111,
 33 |                     help='random seed')
 34 | parser.add_argument('--cuda', action='store_true',
 35 |                     help='use CUDA')
 36 | parser.add_argument('--log-interval', type=int, default=200, metavar='N',
 37 |                     help='report interval')
 38 | parser.add_argument('--save', type=str,  default='model.pt',
 39 |                     help='path to save the final model')
 40 | args = parser.parse_args()
 41 | 
 42 | # Set the random seed manually for reproducibility.
 43 | torch.manual_seed(args.seed)
 44 | if torch.cuda.is_available():
 45 |     if not args.cuda:
 46 |         print("WARNING: You have a CUDA device, so you should probably run with --cuda")
 47 |     else:
 48 |         torch.cuda.manual_seed(args.seed)
 49 | 
 50 | ###############################################################################
 51 | # Load data
 52 | ###############################################################################
 53 | 
 54 | corpus = data.Corpus(args.data)
 55 | 
 56 | def batchify(data, bsz):
 57 |     nbatch = data.size(0) // bsz
 58 |     data = data.narrow(0, 0, nbatch * bsz)
 59 |     data = data.view(bsz, -1).t().contiguous()
 60 |     if args.cuda:
 61 |         data = data.cuda()
 62 |     return data
 63 | 
 64 | eval_batch_size = 10
 65 | train_data = batchify(corpus.train, args.batch_size)
 66 | val_data = batchify(corpus.valid, eval_batch_size)
 67 | test_data = batchify(corpus.test, eval_batch_size)
 68 | 
 69 | ###############################################################################
 70 | # Build the model
 71 | ###############################################################################
 72 | 
 73 | ntokens = len(corpus.dictionary)
 74 | model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers)
 75 | if args.cuda:
 76 |     model.cuda()
 77 | 
 78 | criterion = nn.CrossEntropyLoss()
 79 | 
 80 | ###############################################################################
 81 | # Training code
 82 | ###############################################################################
 83 | 
 84 | def repackage_hidden(h):
 85 |     """Wraps hidden states in new Variables, to detach them from their history."""
 86 |     if type(h) == Variable:
 87 |         return Variable(h.data)
 88 |     else:
 89 |         return tuple(repackage_hidden(v) for v in h)
 90 | 
 91 | 
 92 | def get_batch(source, i, evaluation=False):
 93 |     seq_len = min(args.bptt, len(source) - 1 - i)
 94 |     data = Variable(source[i:i+seq_len], volatile=evaluation)
 95 |     target = Variable(source[i+1:i+1+seq_len].view(-1))
 96 |     return data, target
 97 | 
 98 | 
 99 | def evaluate(data_source):
100 |     total_loss = 0
101 |     ntokens = len(corpus.dictionary)
102 |     hidden = model.init_hidden(eval_batch_size)
103 |     for i in range(0, data_source.size(0) - 1, args.bptt):
104 |         data, targets = get_batch(data_source, i, evaluation=True)
105 |         output, hidden = model(data, hidden)
106 |         output_flat = output.view(-1, ntokens)
107 |         total_loss += len(data) * criterion(output_flat, targets).data
108 |         hidden = repackage_hidden(hidden)
109 |     return total_loss[0] / len(data_source)
110 | 
111 | 
112 | def train():
113 |     total_loss = 0
114 |     start_time = time.time()
115 |     ntokens = len(corpus.dictionary)
116 |     hidden = model.init_hidden(args.batch_size)
117 |     for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
118 |         data, targets = get_batch(train_data, i)
119 |         hidden = repackage_hidden(hidden)
120 |         model.zero_grad()
121 |         output, hidden = model(data, hidden)
122 |         loss = criterion(output.view(-1, ntokens), targets)
123 |         loss.backward()
124 | 
125 |         torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
126 |         for p in model.parameters():
127 |             p.data.add_(-lr, p.grad.data)
128 | 
129 |         total_loss += loss.data
130 | 
131 |         if batch % args.log_interval == 0 and batch > 0:
132 |             cur_loss = total_loss[0] / args.log_interval
133 |             elapsed = time.time() - start_time
134 |             print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
135 |                     'loss {:5.2f} | ppl {:8.2f}'.format(
136 |                 epoch, batch, len(train_data) // args.bptt, lr,
137 |                 elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
138 |             total_loss = 0
139 |             start_time = time.time()
140 | 
141 | 
142 | # Loop over epochs.
143 | lr = args.lr
144 | prev_val_loss = None
145 | for epoch in range(1, args.epochs+1):
146 |     epoch_start_time = time.time()
147 |     train()
148 |     val_loss = evaluate(val_data)
149 |     print('-' * 89)
150 |     print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
151 |             'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
152 |                                        val_loss, math.exp(val_loss)))
153 |     print('-' * 89)
154 |     # Anneal the learning rate.
155 |     if prev_val_loss and val_loss > prev_val_loss:
156 |         lr /= 4
157 |     prev_val_loss = val_loss
158 | 
159 | 
160 | # Run on test data and save the model.
161 | test_loss = evaluate(test_data)
162 | print('=' * 89)
163 | print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
164 |     test_loss, math.exp(test_loss)))
165 | print('=' * 89)
166 | if args.save != '':
167 |     with open(args.save, 'wb') as f:
168 |         torch.save(model, f)
169 | 


--------------------------------------------------------------------------------
/OpenNMT/onmt/Translator.py:
--------------------------------------------------------------------------------
  1 | import onmt
  2 | import torch
  3 | from torch.autograd import Variable
  4 | 
  5 | 
  6 | class Translator(object):
  7 |     def __init__(self, opt):
  8 |         self.opt = opt
  9 |         self.tt = torch.cuda if opt.cuda else torch
 10 | 
 11 |         checkpoint = torch.load(opt.model)
 12 |         self.model = checkpoint['model']
 13 | 
 14 |         self.model.eval()
 15 | 
 16 |         if opt.cuda:
 17 |             self.model.cuda()
 18 |         else:
 19 |             self.model.cpu()
 20 | 
 21 |         self.src_dict = checkpoint['dicts']['src']
 22 |         self.tgt_dict = checkpoint['dicts']['tgt']
 23 | 
 24 |     def buildData(self, srcBatch, goldBatch):
 25 |         srcData = [self.src_dict.convertToIdx(b,
 26 |                     onmt.Constants.UNK_WORD) for b in srcBatch]
 27 |         tgtData = None
 28 |         if goldBatch:
 29 |             tgtData = [self.tgt_dict.convertToIdx(b,
 30 |                        onmt.Constants.UNK_WORD,
 31 |                        onmt.Constants.BOS_WORD,
 32 |                        onmt.Constants.EOS_WORD) for b in goldBatch]
 33 | 
 34 |         return onmt.Dataset(srcData, tgtData,
 35 |             self.opt.batch_size, self.opt.cuda)
 36 | 
 37 |     def buildTargetTokens(self, pred, src, attn):
 38 |         tokens = self.tgt_dict.convertToLabels(pred, onmt.Constants.EOS)
 39 |         tokens = tokens[:-1]  # EOS
 40 |         if self.opt.replace_unk:
 41 |             for i in range(len(tokens)):
 42 |                 if tokens[i] == onmt.Constants.UNK_WORD:
 43 |                     _, maxIndex = attn[i].max(0)
 44 |                     # FIXME phrase table
 45 |                     tokens[i] = src[maxIndex[0]]
 46 | 
 47 |         return tokens
 48 | 
 49 |     def translateBatch(self, batch):
 50 |         srcBatch, tgtBatch = batch
 51 |         batchSize = srcBatch.size(0)
 52 |         beamSize = self.opt.beam_size
 53 | 
 54 |         #  (1) run the encoder on the src
 55 | 
 56 |         # have to execute the encoder manually to deal with padding
 57 |         encStates = None
 58 |         context = []
 59 |         for srcBatch_t in srcBatch.chunk(srcBatch.size(1), dim=1):
 60 |             encStates, context_t = self.model.encoder(srcBatch_t, hidden=encStates)
 61 |             batchPadIdx = srcBatch_t.data.squeeze(1).eq(onmt.Constants.PAD).nonzero()
 62 |             if batchPadIdx.nelement() > 0:
 63 |                 batchPadIdx = batchPadIdx.squeeze(1)
 64 |                 encStates[0].data.index_fill_(1, batchPadIdx, 0)
 65 |                 encStates[1].data.index_fill_(1, batchPadIdx, 0)
 66 |             context += [context_t]
 67 | 
 68 |         encStates = (self.model._fix_enc_hidden(encStates[0]),
 69 |                       self.model._fix_enc_hidden(encStates[1]))
 70 | 
 71 |         context = torch.cat(context)
 72 |         rnnSize = context.size(2)
 73 | 
 74 |         #  This mask is applied to the attention model inside the decoder
 75 |         #  so that the attention ignores source padding
 76 |         padMask = srcBatch.data.eq(onmt.Constants.PAD)
 77 |         def applyContextMask(m):
 78 |             if isinstance(m, onmt.modules.GlobalAttention):
 79 |                 m.applyMask(padMask)
 80 | 
 81 |         #  (2) if a target is specified, compute the 'goldScore'
 82 |         #  (i.e. log likelihood) of the target under the model
 83 |         goldScores = context.data.new(batchSize).zero_()
 84 |         if tgtBatch is not None:
 85 |             decStates = encStates
 86 |             decOut = self.model.make_init_decoder_output(context)
 87 |             self.model.decoder.apply(applyContextMask)
 88 |             initOutput = self.model.make_init_decoder_output(context)
 89 | 
 90 |             decOut, decStates, attn = self.model.decoder(
 91 |                     tgtBatch[:, :-1], decStates, context, initOutput)
 92 |             for dec_t, tgt_t in zip(decOut.transpose(0, 1), tgtBatch.transpose(0, 1)[1:].data):
 93 |                 gen_t = self.model.generator.forward(dec_t)
 94 |                 tgt_t = tgt_t.unsqueeze(1)
 95 |                 scores = gen_t.data.gather(1, tgt_t)
 96 |                 scores.masked_fill_(tgt_t.eq(onmt.Constants.PAD), 0)
 97 |                 goldScores += scores
 98 | 
 99 |         #  (3) run the decoder to generate sentences, using beam search
100 | 
101 |         # Expand tensors for each beam.
102 |         context = Variable(context.data.repeat(1, beamSize, 1))
103 |         decStates = (Variable(encStates[0].data.repeat(1, beamSize, 1)),
104 |                      Variable(encStates[1].data.repeat(1, beamSize, 1)))
105 | 
106 |         beam = [onmt.Beam(beamSize, self.opt.cuda) for k in range(batchSize)]
107 | 
108 |         decOut = self.model.make_init_decoder_output(context)
109 | 
110 |         padMask = srcBatch.data.eq(onmt.Constants.PAD).unsqueeze(0).repeat(beamSize, 1, 1)
111 | 
112 |         batchIdx = list(range(batchSize))
113 |         remainingSents = batchSize
114 |         for i in range(self.opt.max_sent_length):
115 | 
116 |             self.model.decoder.apply(applyContextMask)
117 | 
118 |             # Prepare decoder input.
119 |             input = torch.stack([b.getCurrentState() for b in beam
120 |                                if not b.done]).t().contiguous().view(1, -1)
121 | 
122 |             decOut, decStates, attn = self.model.decoder(
123 |                 Variable(input).transpose(0, 1), decStates, context, decOut)
124 |             # decOut: 1 x (beam*batch) x numWords
125 |             decOut = decOut.transpose(0, 1).squeeze(0)
126 |             out = self.model.generator.forward(decOut)
127 | 
128 |             # batch x beam x numWords
129 |             wordLk = out.view(beamSize, remainingSents, -1).transpose(0, 1).contiguous()
130 |             attn = attn.view(beamSize, remainingSents, -1).transpose(0, 1).contiguous()
131 | 
132 |             active = []
133 |             for b in range(batchSize):
134 |                 if beam[b].done:
135 |                     continue
136 | 
137 |                 idx = batchIdx[b]
138 |                 if not beam[b].advance(wordLk.data[idx], attn.data[idx]):
139 |                     active += [b]
140 | 
141 |                 for decState in decStates:  # iterate over h, c
142 |                     # layers x beam*sent x dim
143 |                     sentStates = decState.view(
144 |                         -1, beamSize, remainingSents, decState.size(2))[:, :, idx]
145 |                     sentStates.data.copy_(
146 |                         sentStates.data.index_select(1, beam[b].getCurrentOrigin()))
147 | 
148 |             if not active:
149 |                 break
150 | 
151 |             # in this section, the sentences that are still active are
152 |             # compacted so that the decoder is not run on completed sentences
153 |             activeIdx = self.tt.LongTensor([batchIdx[k] for k in active])
154 |             batchIdx = {beam: idx for idx, beam in enumerate(active)}
155 | 
156 |             def updateActive(t):
157 |                 # select only the remaining active sentences
158 |                 view = t.data.view(-1, remainingSents, rnnSize)
159 |                 newSize = list(t.size())
160 |                 newSize[-2] = newSize[-2] * len(activeIdx) // remainingSents
161 |                 return Variable(view.index_select(1, activeIdx) \
162 |                                     .view(*newSize))
163 | 
164 |             decStates = (updateActive(decStates[0]), updateActive(decStates[1]))
165 |             decOut = updateActive(decOut)
166 |             context = updateActive(context)
167 |             padMask = padMask.index_select(1, activeIdx)
168 | 
169 |             remainingSents = len(active)
170 | 
171 |         #  (4) package everything up
172 | 
173 |         allHyp, allScores, allAttn = [], [], []
174 |         n_best = self.opt.n_best
175 | 
176 |         for b in range(batchSize):
177 |             scores, ks = beam[b].sortBest()
178 | 
179 |             allScores += [scores[:n_best]]
180 |             valid_attn = srcBatch.transpose(0, 1).data[:, b].ne(onmt.Constants.PAD).nonzero().squeeze(1)
181 |             hyps, attn = zip(*[beam[b].getHyp(k) for k in ks[:n_best]])
182 |             attn = [a.index_select(1, valid_attn) for a in attn]
183 |             allHyp += [hyps]
184 |             allAttn += [attn]
185 | 
186 |         return allHyp, allScores, allAttn, goldScores
187 | 
188 |     def translate(self, srcBatch, goldBatch):
189 |         #  (1) convert words to indexes
190 |         dataset = self.buildData(srcBatch, goldBatch)
191 |         batch = dataset[0]
192 |         batch = [x.transpose(0, 1) for x in batch]
193 | 
194 |         #  (2) translate
195 |         pred, predScore, attn, goldScore = self.translateBatch(batch)
196 | 
197 |         #  (3) convert indexes to words
198 |         predBatch = []
199 |         for b in range(batch[0].size(0)):
200 |             predBatch.append(
201 |                 [self.buildTargetTokens(pred[b][n], srcBatch[b], attn[b][n])
202 |                         for n in range(self.opt.n_best)]
203 |             )
204 | 
205 |         return predBatch, predScore, goldScore
206 | 


--------------------------------------------------------------------------------
/dcgan/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import os
  4 | import random
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.parallel
  8 | import torch.backends.cudnn as cudnn
  9 | import torch.optim as optim
 10 | import torch.utils.data
 11 | import torchvision.datasets as dset
 12 | import torchvision.transforms as transforms
 13 | import torchvision.utils as vutils
 14 | from torch.autograd import Variable
 15 | 
 16 | 
 17 | parser = argparse.ArgumentParser()
 18 | parser.add_argument('--dataset', required=True, help='cifar10 | lsun | imagenet | folder | lfw ')
 19 | parser.add_argument('--dataroot', required=True, help='path to dataset')
 20 | parser.add_argument('--workers', type=int, help='number of data loading workers', default=2)
 21 | parser.add_argument('--batchSize', type=int, default=64, help='input batch size')
 22 | parser.add_argument('--imageSize', type=int, default=64, help='the height / width of the input image to network')
 23 | parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector')
 24 | parser.add_argument('--ngf', type=int, default=64)
 25 | parser.add_argument('--ndf', type=int, default=64)
 26 | parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for')
 27 | parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002')
 28 | parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
 29 | parser.add_argument('--cuda'  , action='store_true', help='enables cuda')
 30 | parser.add_argument('--ngpu'  , type=int, default=1, help='number of GPUs to use')
 31 | parser.add_argument('--netG', default='', help="path to netG (to continue training)")
 32 | parser.add_argument('--netD', default='', help="path to netD (to continue training)")
 33 | parser.add_argument('--outf', default='.', help='folder to output images and model checkpoints')
 34 | 
 35 | opt = parser.parse_args()
 36 | print(opt)
 37 | 
 38 | try:
 39 |     os.makedirs(opt.outf)
 40 | except OSError:
 41 |     pass
 42 | opt.manualSeed = random.randint(1, 10000) # fix seed
 43 | print("Random Seed: ", opt.manualSeed)
 44 | random.seed(opt.manualSeed)
 45 | torch.manual_seed(opt.manualSeed)
 46 | 
 47 | cudnn.benchmark = True
 48 | 
 49 | if torch.cuda.is_available() and not opt.cuda:
 50 |     print("WARNING: You have a CUDA device, so you should probably run with --cuda")
 51 | 
 52 | if opt.dataset in ['imagenet', 'folder', 'lfw']:
 53 |     # folder dataset
 54 |     dataset = dset.ImageFolder(root=opt.dataroot,
 55 |                                transform=transforms.Compose([
 56 |                                    transforms.Scale(opt.imageSize),
 57 |                                    transforms.CenterCrop(opt.imageSize),
 58 |                                    transforms.ToTensor(),
 59 |                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
 60 |                                ]))
 61 | elif opt.dataset == 'lsun':
 62 |     dataset = dset.LSUN(db_path=opt.dataroot, classes=['bedroom_train'],
 63 |                         transform=transforms.Compose([
 64 |                             transforms.Scale(opt.imageSize),
 65 |                             transforms.CenterCrop(opt.imageSize),
 66 |                             transforms.ToTensor(),
 67 |                             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
 68 |                         ]))
 69 | elif opt.dataset == 'cifar10':
 70 |     dataset = dset.CIFAR10(root=opt.dataroot, download=True,
 71 |                            transform=transforms.Compose([
 72 |                                transforms.Scale(opt.imageSize),
 73 |                                transforms.ToTensor(),
 74 |                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
 75 |                            ])
 76 |     )
 77 | assert dataset
 78 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize,
 79 |                                          shuffle=True, num_workers=int(opt.workers))
 80 | 
 81 | ngpu = int(opt.ngpu)
 82 | nz = int(opt.nz)
 83 | ngf = int(opt.ngf)
 84 | ndf = int(opt.ndf)
 85 | nc = 3
 86 | 
 87 | # custom weights initialization called on netG and netD
 88 | def weights_init(m):
 89 |     classname = m.__class__.__name__
 90 |     if classname.find('Conv') != -1:
 91 |         m.weight.data.normal_(0.0, 0.02)
 92 |     elif classname.find('BatchNorm') != -1:
 93 |         m.weight.data.normal_(1.0, 0.02)
 94 |         m.bias.data.fill_(0)
 95 | 
 96 | class _netG(nn.Module):
 97 |     def __init__(self, ngpu):
 98 |         super(_netG, self).__init__()
 99 |         self.ngpu = ngpu
100 |         self.main = nn.Sequential(
101 |             # input is Z, going into a convolution
102 |             nn.ConvTranspose2d(     nz, ngf * 8, 4, 1, 0, bias=False),
103 |             nn.BatchNorm2d(ngf * 8),
104 |             nn.ReLU(True),
105 |             # state size. (ngf*8) x 4 x 4
106 |             nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
107 |             nn.BatchNorm2d(ngf * 4),
108 |             nn.ReLU(True),
109 |             # state size. (ngf*4) x 8 x 8
110 |             nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
111 |             nn.BatchNorm2d(ngf * 2),
112 |             nn.ReLU(True),
113 |             # state size. (ngf*2) x 16 x 16
114 |             nn.ConvTranspose2d(ngf * 2,     ngf, 4, 2, 1, bias=False),
115 |             nn.BatchNorm2d(ngf),
116 |             nn.ReLU(True),
117 |             # state size. (ngf) x 32 x 32
118 |             nn.ConvTranspose2d(    ngf,      nc, 4, 2, 1, bias=False),
119 |             nn.Tanh()
120 |             # state size. (nc) x 64 x 64
121 |         )
122 |     def forward(self, input):
123 |         gpu_ids = None
124 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
125 |             gpu_ids = range(self.ngpu)
126 |         return nn.parallel.data_parallel(self.main, input, gpu_ids)
127 | 
128 | netG = _netG(ngpu)
129 | netG.apply(weights_init)
130 | if opt.netG != '':
131 |     netG.load_state_dict(torch.load(opt.netG))
132 | print(netG)
133 | 
134 | class _netD(nn.Module):
135 |     def __init__(self, ngpu):
136 |         super(_netD, self).__init__()
137 |         self.ngpu = ngpu
138 |         self.main = nn.Sequential(
139 |             # input is (nc) x 64 x 64
140 |             nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
141 |             nn.LeakyReLU(0.2, inplace=True),
142 |             # state size. (ndf) x 32 x 32
143 |             nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
144 |             nn.BatchNorm2d(ndf * 2),
145 |             nn.LeakyReLU(0.2, inplace=True),
146 |             # state size. (ndf*2) x 16 x 16
147 |             nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
148 |             nn.BatchNorm2d(ndf * 4),
149 |             nn.LeakyReLU(0.2, inplace=True),
150 |             # state size. (ndf*4) x 8 x 8
151 |             nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
152 |             nn.BatchNorm2d(ndf * 8),
153 |             nn.LeakyReLU(0.2, inplace=True),
154 |             # state size. (ndf*8) x 4 x 4
155 |             nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
156 |             nn.Sigmoid()
157 |         )
158 |     def forward(self, input):
159 |         gpu_ids = None
160 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
161 |             gpu_ids = range(self.ngpu)
162 |         output = nn.parallel.data_parallel(self.main, input, gpu_ids)
163 |         return output.view(-1, 1)
164 | 
165 | netD = _netD(ngpu)
166 | netD.apply(weights_init)
167 | if opt.netD != '':
168 |     netD.load_state_dict(torch.load(opt.netD))
169 | print(netD)
170 | 
171 | criterion = nn.BCELoss()
172 | 
173 | input = torch.FloatTensor(opt.batchSize, 3, opt.imageSize, opt.imageSize)
174 | noise = torch.FloatTensor(opt.batchSize, nz, 1, 1)
175 | fixed_noise = torch.FloatTensor(opt.batchSize, nz, 1, 1).normal_(0, 1)
176 | label = torch.FloatTensor(opt.batchSize)
177 | real_label = 1
178 | fake_label = 0
179 | 
180 | if opt.cuda:
181 |     netD.cuda()
182 |     netG.cuda()
183 |     criterion.cuda()
184 |     input, label = input.cuda(), label.cuda()
185 |     noise, fixed_noise = noise.cuda(), fixed_noise.cuda()
186 | 
187 | input = Variable(input)
188 | label = Variable(label)
189 | noise = Variable(noise)
190 | fixed_noise = Variable(fixed_noise)
191 | 
192 | # setup optimizer
193 | optimizerD = optim.Adam(netD.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999))
194 | optimizerG = optim.Adam(netG.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999))
195 | 
196 | for epoch in range(opt.niter):
197 |     for i, data in enumerate(dataloader, 0):
198 |         ############################
199 |         # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
200 |         ###########################
201 |         # train with real
202 |         netD.zero_grad()
203 |         real_cpu, _ = data
204 |         batch_size = real_cpu.size(0)
205 |         input.data.resize_(real_cpu.size()).copy_(real_cpu)
206 |         label.data.resize_(batch_size).fill_(real_label)
207 | 
208 |         output = netD(input)
209 |         errD_real = criterion(output, label)
210 |         errD_real.backward()
211 |         D_x = output.data.mean()
212 | 
213 |         # train with fake
214 |         noise.data.resize_(batch_size, nz, 1, 1)
215 |         noise.data.normal_(0, 1)
216 |         fake = netG(noise)
217 |         label.data.fill_(fake_label)
218 |         output = netD(fake.detach())
219 |         errD_fake = criterion(output, label)
220 |         errD_fake.backward()
221 |         D_G_z1 = output.data.mean()
222 |         errD = errD_real + errD_fake
223 |         optimizerD.step()
224 | 
225 |         ############################
226 |         # (2) Update G network: maximize log(D(G(z)))
227 |         ###########################
228 |         netG.zero_grad()
229 |         label.data.fill_(real_label) # fake labels are real for generator cost
230 |         output = netD(fake)
231 |         errG = criterion(output, label)
232 |         errG.backward()
233 |         D_G_z2 = output.data.mean()
234 |         optimizerG.step()
235 | 
236 |         print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f'
237 |               % (epoch, opt.niter, i, len(dataloader),
238 |                  errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2))
239 |         if i % 100 == 0:
240 |             vutils.save_image(real_cpu,
241 |                     '%s/real_samples.png' % opt.outf)
242 |             fake = netG(fixed_noise)
243 |             vutils.save_image(fake.data,
244 |                     '%s/fake_samples_epoch_%03d.png' % (opt.outf, epoch))
245 | 
246 |     # do checkpointing
247 |     torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (opt.outf, epoch))
248 |     torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (opt.outf, epoch))
249 | 


--------------------------------------------------------------------------------
/imagenet/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import shutil
  4 | import time
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.parallel
  9 | import torch.backends.cudnn as cudnn
 10 | import torch.optim
 11 | import torch.utils.data
 12 | import torchvision.transforms as transforms
 13 | import torchvision.datasets as datasets
 14 | import torchvision.models as models
 15 | 
 16 | 
 17 | model_names = sorted(name for name in models.__dict__
 18 |     if name.islower() and not name.startswith("__")
 19 |     and callable(models.__dict__[name]))
 20 | 
 21 | 
 22 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
 23 | parser.add_argument('data', metavar='DIR',
 24 |                     help='path to dataset')
 25 | parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18',
 26 |                     choices=model_names,
 27 |                     help='model architecture: ' +
 28 |                         ' | '.join(model_names) +
 29 |                         ' (default: resnet18)')
 30 | parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
 31 |                     help='number of data loading workers (default: 4)')
 32 | parser.add_argument('--epochs', default=90, type=int, metavar='N',
 33 |                     help='number of total epochs to run')
 34 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
 35 |                     help='manual epoch number (useful on restarts)')
 36 | parser.add_argument('-b', '--batch-size', default=256, type=int,
 37 |                     metavar='N', help='mini-batch size (default: 256)')
 38 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
 39 |                     metavar='LR', help='initial learning rate')
 40 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
 41 |                     help='momentum')
 42 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
 43 |                     metavar='W', help='weight decay (default: 1e-4)')
 44 | parser.add_argument('--print-freq', '-p', default=10, type=int,
 45 |                     metavar='N', help='print frequency (default: 10)')
 46 | parser.add_argument('--resume', default='', type=str, metavar='PATH',
 47 |                     help='path to latest checkpoint (default: none)')
 48 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
 49 |                     help='evaluate model on validation set')
 50 | parser.add_argument('--pretrained', dest='pretrained', action='store_true',
 51 |                     help='use pre-trained model')
 52 | 
 53 | best_prec1 = 0
 54 | 
 55 | 
 56 | def main():
 57 |     global args, best_prec1
 58 |     args = parser.parse_args()
 59 | 
 60 |     # create model
 61 |     if args.pretrained:
 62 |         print("=> using pre-trained model '{}'".format(args.arch))
 63 |         model = models.__dict__[args.arch](pretrained=True)
 64 |     else:
 65 |         print("=> creating model '{}'".format(args.arch))
 66 |         model = models.__dict__[args.arch]()
 67 | 
 68 |     if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
 69 |         model.features = torch.nn.DataParallel(model.features)
 70 |         model.cuda()
 71 |     else:
 72 |         model = torch.nn.DataParallel(model).cuda()
 73 | 
 74 |     # optionally resume from a checkpoint
 75 |     if args.resume:
 76 |         if os.path.isfile(args.resume):
 77 |             print("=> loading checkpoint '{}'".format(args.resume))
 78 |             checkpoint = torch.load(args.resume)
 79 |             args.start_epoch = checkpoint['epoch']
 80 |             best_prec1 = checkpoint['best_prec1']
 81 |             model.load_state_dict(checkpoint['state_dict'])
 82 |             print("=> loaded checkpoint '{}' (epoch {})"
 83 |                   .format(args.resume, checkpoint['epoch']))
 84 |         else:
 85 |             print("=> no checkpoint found at '{}'".format(args.resume))
 86 | 
 87 |     cudnn.benchmark = True
 88 | 
 89 |     # Data loading code
 90 |     traindir = os.path.join(args.data, 'train')
 91 |     valdir = os.path.join(args.data, 'val')
 92 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 93 |                                      std=[0.229, 0.224, 0.225])
 94 | 
 95 |     train_loader = torch.utils.data.DataLoader(
 96 |         datasets.ImageFolder(traindir, transforms.Compose([
 97 |             transforms.RandomSizedCrop(224),
 98 |             transforms.RandomHorizontalFlip(),
 99 |             transforms.ToTensor(),
100 |             normalize,
101 |         ])),
102 |         batch_size=args.batch_size, shuffle=True,
103 |         num_workers=args.workers, pin_memory=True)
104 | 
105 |     val_loader = torch.utils.data.DataLoader(
106 |         datasets.ImageFolder(valdir, transforms.Compose([
107 |             transforms.Scale(256),
108 |             transforms.CenterCrop(224),
109 |             transforms.ToTensor(),
110 |             normalize,
111 |         ])),
112 |         batch_size=args.batch_size, shuffle=False,
113 |         num_workers=args.workers, pin_memory=True)
114 | 
115 |     # define loss function (criterion) and pptimizer
116 |     criterion = nn.CrossEntropyLoss().cuda()
117 | 
118 |     optimizer = torch.optim.SGD(model.parameters(), args.lr,
119 |                                 momentum=args.momentum,
120 |                                 weight_decay=args.weight_decay)
121 | 
122 |     if args.evaluate:
123 |         validate(val_loader, model, criterion)
124 |         return
125 | 
126 |     for epoch in range(args.start_epoch, args.epochs):
127 |         adjust_learning_rate(optimizer, epoch)
128 | 
129 |         # train for one epoch
130 |         train(train_loader, model, criterion, optimizer, epoch)
131 | 
132 |         # evaluate on validation set
133 |         prec1 = validate(val_loader, model, criterion)
134 | 
135 |         # remember best prec@1 and save checkpoint
136 |         is_best = prec1 > best_prec1
137 |         best_prec1 = max(prec1, best_prec1)
138 |         save_checkpoint({
139 |             'epoch': epoch + 1,
140 |             'arch': args.arch,
141 |             'state_dict': model.state_dict(),
142 |             'best_prec1': best_prec1,
143 |         }, is_best)
144 | 
145 | 
146 | def train(train_loader, model, criterion, optimizer, epoch):
147 |     batch_time = AverageMeter()
148 |     data_time = AverageMeter()
149 |     losses = AverageMeter()
150 |     top1 = AverageMeter()
151 |     top5 = AverageMeter()
152 | 
153 |     # switch to train mode
154 |     model.train()
155 | 
156 |     end = time.time()
157 |     for i, (input, target) in enumerate(train_loader):
158 |         # measure data loading time
159 |         data_time.update(time.time() - end)
160 | 
161 |         target = target.cuda(async=True)
162 |         input_var = torch.autograd.Variable(input)
163 |         target_var = torch.autograd.Variable(target)
164 | 
165 |         # compute output
166 |         output = model(input_var)
167 |         loss = criterion(output, target_var)
168 | 
169 |         # measure accuracy and record loss
170 |         prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
171 |         losses.update(loss.data[0], input.size(0))
172 |         top1.update(prec1[0], input.size(0))
173 |         top5.update(prec5[0], input.size(0))
174 | 
175 |         # compute gradient and do SGD step
176 |         optimizer.zero_grad()
177 |         loss.backward()
178 |         optimizer.step()
179 | 
180 |         # measure elapsed time
181 |         batch_time.update(time.time() - end)
182 |         end = time.time()
183 | 
184 |         if i % args.print_freq == 0:
185 |             print('Epoch: [{0}][{1}/{2}]\t'
186 |                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
187 |                   'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
188 |                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
189 |                   'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
190 |                   'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
191 |                    epoch, i, len(train_loader), batch_time=batch_time,
192 |                    data_time=data_time, loss=losses, top1=top1, top5=top5))
193 | 
194 | 
195 | def validate(val_loader, model, criterion):
196 |     batch_time = AverageMeter()
197 |     losses = AverageMeter()
198 |     top1 = AverageMeter()
199 |     top5 = AverageMeter()
200 | 
201 |     # switch to evaluate mode
202 |     model.eval()
203 | 
204 |     end = time.time()
205 |     for i, (input, target) in enumerate(val_loader):
206 |         target = target.cuda(async=True)
207 |         input_var = torch.autograd.Variable(input, volatile=True)
208 |         target_var = torch.autograd.Variable(target, volatile=True)
209 | 
210 |         # compute output
211 |         output = model(input_var)
212 |         loss = criterion(output, target_var)
213 | 
214 |         # measure accuracy and record loss
215 |         prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
216 |         losses.update(loss.data[0], input.size(0))
217 |         top1.update(prec1[0], input.size(0))
218 |         top5.update(prec5[0], input.size(0))
219 | 
220 |         # measure elapsed time
221 |         batch_time.update(time.time() - end)
222 |         end = time.time()
223 | 
224 |         if i % args.print_freq == 0:
225 |             print('Test: [{0}/{1}]\t'
226 |                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
227 |                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
228 |                   'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
229 |                   'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
230 |                    i, len(val_loader), batch_time=batch_time, loss=losses,
231 |                    top1=top1, top5=top5))
232 | 
233 |     print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
234 |           .format(top1=top1, top5=top5))
235 | 
236 |     return top1.avg
237 | 
238 | 
239 | def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
240 |     torch.save(state, filename)
241 |     if is_best:
242 |         shutil.copyfile(filename, 'model_best.pth.tar')
243 | 
244 | 
245 | class AverageMeter(object):
246 |     """Computes and stores the average and current value"""
247 |     def __init__(self):
248 |         self.reset()
249 | 
250 |     def reset(self):
251 |         self.val = 0
252 |         self.avg = 0
253 |         self.sum = 0
254 |         self.count = 0
255 | 
256 |     def update(self, val, n=1):
257 |         self.val = val
258 |         self.sum += val * n
259 |         self.count += n
260 |         self.avg = self.sum / self.count
261 | 
262 | 
263 | def adjust_learning_rate(optimizer, epoch):
264 |     """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
265 |     lr = args.lr * (0.1 ** (epoch // 30))
266 |     for param_group in optimizer.param_groups:
267 |         param_group['lr'] = lr
268 | 
269 | 
270 | def accuracy(output, target, topk=(1,)):
271 |     """Computes the precision@k for the specified values of k"""
272 |     maxk = max(topk)
273 |     batch_size = target.size(0)
274 | 
275 |     _, pred = output.topk(maxk, 1, True, True)
276 |     pred = pred.t()
277 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
278 | 
279 |     res = []
280 |     for k in topk:
281 |         correct_k = correct[:k].view(-1).float().sum(0)
282 |         res.append(correct_k.mul_(100.0 / batch_size))
283 |     return res
284 | 
285 | 
286 | if __name__ == '__main__':
287 |     main()
288 | 


--------------------------------------------------------------------------------
/OpenNMT/train.py:
--------------------------------------------------------------------------------
  1 | import onmt
  2 | import argparse
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch import cuda
  6 | from torch.autograd import Variable
  7 | import math
  8 | import time
  9 | 
 10 | parser = argparse.ArgumentParser(description='train.py')
 11 | 
 12 | ## Data options
 13 | 
 14 | parser.add_argument('-data', required=True,
 15 |                     help='Path to the *-train.pt file from preprocess.py')
 16 | parser.add_argument('-save_model', default='model',
 17 |                     help="""Model filename (the model will be saved as
 18 |                     <save_model>_epochN_PPL.pt where PPL is the
 19 |                     validation perplexity""")
 20 | parser.add_argument('-train_from',
 21 |                     help="""If training from a checkpoint then this is the
 22 |                     path to the pretrained model.""")
 23 | 
 24 | ## Model options
 25 | 
 26 | parser.add_argument('-layers', type=int, default=2,
 27 |                     help='Number of layers in the LSTM encoder/decoder')
 28 | parser.add_argument('-rnn_size', type=int, default=500,
 29 |                     help='Size of LSTM hidden states')
 30 | parser.add_argument('-word_vec_size', type=int, default=500,
 31 |                     help='Word embedding sizes')
 32 | parser.add_argument('-input_feed', type=int, default=1,
 33 |                     help="""Feed the context vector at each time step as
 34 |                     additional input (via concatenation with the word
 35 |                     embeddings) to the decoder.""")
 36 | # parser.add_argument('-residual',   action="store_true",
 37 | #                     help="Add residual connections between RNN layers.")
 38 | parser.add_argument('-brnn', action='store_true',
 39 |                     help='Use a bidirectional encoder')
 40 | parser.add_argument('-brnn_merge', default='concat',
 41 |                     help="""Merge action for the bidirectional hidden states:
 42 |                     [concat|sum]""")
 43 | 
 44 | ## Optimization options
 45 | 
 46 | parser.add_argument('-batch_size', type=int, default=64,
 47 |                     help='Maximum batch size')
 48 | parser.add_argument('-max_generator_batches', type=int, default=32,
 49 |                     help="""Maximum batches of words in a sequence to run
 50 |                     the generator on in parallel. Higher is faster, but uses
 51 |                     more memory.""")
 52 | parser.add_argument('-epochs', type=int, default=13,
 53 |                     help='Number of training epochs')
 54 | parser.add_argument('-start_epoch', type=int, default=1,
 55 |                     help='The epoch from which to start')
 56 | parser.add_argument('-param_init', type=float, default=0.1,
 57 |                     help="""Parameters are initialized over uniform distribution
 58 |                     with support (-param_init, param_init)""")
 59 | parser.add_argument('-optim', default='sgd',
 60 |                     help="Optimization method. [sgd|adagrad|adadelta|adam]")
 61 | parser.add_argument('-learning_rate', type=float, default=1.0,
 62 |                     help="""Starting learning rate. If adagrad/adadelta/adam is
 63 |                     used, then this is the global learning rate. Recommended
 64 |                     settings: sgd = 1, adagrad = 0.1, adadelta = 1, adam = 0.1""")
 65 | parser.add_argument('-max_grad_norm', type=float, default=5,
 66 |                     help="""If the norm of the gradient vector exceeds this,
 67 |                     renormalize it to have the norm equal to max_grad_norm""")
 68 | parser.add_argument('-dropout', type=float, default=0.3,
 69 |                     help='Dropout probability; applied between LSTM stacks.')
 70 | parser.add_argument('-learning_rate_decay', type=float, default=0.5,
 71 |                     help="""Decay learning rate by this much if (i) perplexity
 72 |                     does not decrease on the validation set or (ii) epoch has
 73 |                     gone past the start_decay_at_limit""")
 74 | parser.add_argument('-start_decay_at', default=8,
 75 |                     help="Start decay after this epoch")
 76 | parser.add_argument('-curriculum', action="store_true",
 77 |                     help="""For this many epochs, order the minibatches based
 78 |                     on source sequence length. Sometimes setting this to 1 will
 79 |                     increase convergence speed.""")
 80 | parser.add_argument('-pre_word_vecs_enc',
 81 |                     help="""If a valid path is specified, then this will load
 82 |                     pretrained word embeddings on the encoder side.
 83 |                     See README for specific formatting instructions.""")
 84 | parser.add_argument('-pre_word_vecs_dec',
 85 |                     help="""If a valid path is specified, then this will load
 86 |                     pretrained word embeddings on the decoder side.
 87 |                     See README for specific formatting instructions.""")
 88 | 
 89 | # GPU
 90 | parser.add_argument('-gpus', default=[], nargs='+', type=int,
 91 |                     help="Use CUDA")
 92 | 
 93 | parser.add_argument('-log_interval', type=int, default=50,
 94 |                     help="Print stats at this interval.")
 95 | # parser.add_argument('-seed', type=int, default=3435,
 96 | #                     help="Seed for random initialization")
 97 | 
 98 | opt = parser.parse_args()
 99 | opt.cuda = len(opt.gpus)
100 | 
101 | print(opt)
102 | 
103 | if torch.cuda.is_available() and not opt.cuda:
104 |     print("WARNING: You have a CUDA device, so you should probably run with -cuda")
105 | 
106 | if opt.cuda:
107 |     cuda.set_device(opt.gpus[0])
108 | 
109 | def NMTCriterion(vocabSize):
110 |     weight = torch.ones(vocabSize)
111 |     weight[onmt.Constants.PAD] = 0
112 |     crit = nn.NLLLoss(weight, size_average=False)
113 |     if opt.cuda:
114 |         crit.cuda()
115 |     return crit
116 | 
117 | 
118 | def memoryEfficientLoss(outputs, targets, generator, crit, eval=False):
119 |     # compute generations one piece at a time
120 |     loss = 0
121 |     outputs = Variable(outputs.data, requires_grad=(not eval), volatile=eval).contiguous()
122 | 
123 |     batch_size = outputs.size(1)
124 |     outputs_split = torch.split(outputs, opt.max_generator_batches)
125 |     targets_split = torch.split(targets.contiguous(), opt.max_generator_batches)
126 |     for out_t, targ_t in zip(outputs_split, targets_split):
127 |         out_t = out_t.view(-1, out_t.size(2))
128 |         pred_t = generator(out_t)
129 |         loss_t = crit(pred_t, targ_t.view(-1))
130 |         loss += loss_t.data[0]
131 |         if not eval:
132 |             loss_t.div(batch_size).backward()
133 | 
134 |     grad_output = None if outputs.grad is None else outputs.grad.data
135 |     return loss, grad_output
136 | 
137 | 
138 | def eval(model, criterion, data):
139 |     total_loss = 0
140 |     total_words = 0
141 | 
142 |     model.eval()
143 |     for i in range(len(data)):
144 |         batch = [x.transpose(0, 1) for x in data[i]] # must be batch first for gather/scatter in DataParallel
145 |         outputs = model(batch)  # FIXME volatile
146 |         targets = batch[1][:, 1:]  # exclude <s> from targets
147 |         loss, _ = memoryEfficientLoss(
148 |                 outputs, targets, model.generator, criterion, eval=True)
149 |         total_loss += loss
150 |         total_words += targets.data.ne(onmt.Constants.PAD).sum()
151 | 
152 |     model.train()
153 |     return total_loss / total_words
154 | 
155 | 
156 | def trainModel(model, trainData, validData, dataset, optim):
157 |     print(model)
158 |     model.train()
159 |     if optim.last_ppl is None:
160 |         for p in model.parameters():
161 |             p.data.uniform_(-opt.param_init, opt.param_init)
162 | 
163 |     # define criterion of each GPU
164 |     criterion = NMTCriterion(dataset['dicts']['tgt'].size())
165 | 
166 |     start_time = time.time()
167 |     def trainEpoch(epoch):
168 | 
169 |         # shuffle mini batch order
170 |         batchOrder = torch.randperm(len(trainData))
171 | 
172 |         total_loss, report_loss = 0, 0
173 |         total_words, report_words = 0, 0
174 |         start = time.time()
175 |         for i in range(len(trainData)):
176 | 
177 |             batchIdx = batchOrder[i] if epoch >= opt.curriculum else i
178 |             batch = trainData[batchIdx]
179 |             batch = [x.transpose(0, 1) for x in batch] # must be batch first for gather/scatter in DataParallel
180 | 
181 |             model.zero_grad()
182 |             outputs = model(batch)
183 |             targets = batch[1][:, 1:]  # exclude <s> from targets
184 |             loss, gradOutput = memoryEfficientLoss(
185 |                     outputs, targets, model.generator, criterion)
186 | 
187 |             outputs.backward(gradOutput)
188 | 
189 |             # update the parameters
190 |             grad_norm = optim.step()
191 | 
192 |             report_loss += loss
193 |             total_loss += loss
194 |             num_words = targets.data.ne(onmt.Constants.PAD).sum()
195 |             total_words += num_words
196 |             report_words += num_words
197 |             if i % opt.log_interval == 0 and i > 0:
198 |                 print("Epoch %2d, %5d/%5d batches; perplexity: %6.2f; %3.0f tokens/s; %6.0f s elapsed" %
199 |                       (epoch, i, len(trainData),
200 |                       math.exp(report_loss / report_words),
201 |                       report_words/(time.time()-start),
202 |                       time.time()-start_time))
203 | 
204 |                 report_loss = report_words = 0
205 |                 start = time.time()
206 | 
207 |         return total_loss / total_words
208 | 
209 |     for epoch in range(opt.start_epoch, opt.epochs + 1):
210 |         print('')
211 | 
212 |         #  (1) train for one epoch on the training set
213 |         train_loss = trainEpoch(epoch)
214 |         print('Train perplexity: %g' % math.exp(min(train_loss, 100)))
215 | 
216 |         #  (2) evaluate on the validation set
217 |         valid_loss = eval(model, criterion, validData)
218 |         valid_ppl = math.exp(min(valid_loss, 100))
219 |         print('Validation perplexity: %g' % valid_ppl)
220 | 
221 |         #  (3) maybe update the learning rate
222 |         if opt.optim == 'sgd':
223 |             optim.updateLearningRate(valid_loss, epoch)
224 | 
225 |         #  (4) drop a checkpoint
226 |         checkpoint = {
227 |             'model': model,
228 |             'dicts': dataset['dicts'],
229 |             'opt': opt,
230 |             'epoch': epoch,
231 |             'optim': optim,
232 |         }
233 |         torch.save(checkpoint,
234 |                    '%s_e%d_%.2f.pt' % (opt.save_model, epoch, valid_ppl))
235 | 
236 | 
237 | def main():
238 | 
239 |     print("Loading data from '%s'" % opt.data)
240 | 
241 |     dataset = torch.load(opt.data)
242 | 
243 |     trainData = onmt.Dataset(dataset['train']['src'],
244 |                              dataset['train']['tgt'], opt.batch_size, opt.cuda)
245 |     validData = onmt.Dataset(dataset['valid']['src'],
246 |                              dataset['valid']['tgt'], opt.batch_size, opt.cuda)
247 | 
248 |     dicts = dataset['dicts']
249 |     print(' * vocabulary size. source = %d; target = %d' %
250 |           (dicts['src'].size(), dicts['tgt'].size()))
251 |     print(' * number of training sentences. %d' %
252 |           len(dataset['train']['src']))
253 |     print(' * maximum batch size. %d' % opt.batch_size)
254 | 
255 |     print('Building model...')
256 | 
257 |     if opt.train_from is None:
258 |         encoder = onmt.Models.Encoder(opt, dicts['src'])
259 |         decoder = onmt.Models.Decoder(opt, dicts['tgt'])
260 |         generator = nn.Sequential(
261 |             nn.Linear(opt.rnn_size, dicts['tgt'].size()),
262 |             nn.LogSoftmax())
263 |         if opt.cuda > 1:
264 |             generator = nn.DataParallel(generator, device_ids=opt.gpus)
265 |         model = onmt.Models.NMTModel(encoder, decoder, generator)
266 |         if opt.cuda > 1:
267 |             model = nn.DataParallel(model, device_ids=opt.gpus)
268 |         if opt.cuda:
269 |             model.cuda()
270 |         else:
271 |             model.cpu()
272 | 
273 |         model.generator = generator
274 | 
275 |         for p in model.parameters():
276 |             p.data.uniform_(-opt.param_init, opt.param_init)
277 | 
278 |         optim = onmt.Optim(
279 |             model.parameters(), opt.optim, opt.learning_rate, opt.max_grad_norm,
280 |             lr_decay=opt.learning_rate_decay,
281 |             start_decay_at=opt.start_decay_at
282 |         )
283 |     else:
284 |         print('Loading from checkpoint at %s' % opt.train_from)
285 |         checkpoint = torch.load(opt.train_from)
286 |         model = checkpoint['model']
287 |         if opt.cuda:
288 |             model.cuda()
289 |         else:
290 |             model.cpu()
291 |         optim = checkpoint['optim']
292 |         opt.start_epoch = checkpoint['epoch'] + 1
293 | 
294 |     nParams = sum([p.nelement() for p in model.parameters()])
295 |     print('* number of parameters: %d' % nParams)
296 | 
297 |     trainModel(model, trainData, validData, dataset, optim)
298 | 
299 | 
300 | if __name__ == "__main__":
301 |     main()
302 | 


--------------------------------------------------------------------------------