├── vae ├── results │ └── .gitignore ├── requirements.txt ├── README.md └── main.py ├── fast_neural_style ├── neural_style │ ├── __init__.py │ ├── utils.py │ ├── vgg.py │ ├── transformer_net.py │ └── neural_style.py ├── images │ ├── style-images │ │ ├── candy.jpg │ │ ├── mosaic.jpg │ │ ├── udnie.jpg │ │ ├── rain-princess.jpg │ │ └── rain-princess-cropped.jpg │ ├── content-images │ │ └── amber.jpg │ └── output-images │ │ ├── amber-candy.jpg │ │ ├── amber-mosaic.jpg │ │ ├── amber-udnie.jpg │ │ └── amber-rain-princess.jpg ├── download_saved_models.py └── README.md ├── mnist ├── requirements.txt ├── README.md └── main.py ├── snli ├── requirements.txt ├── util.py ├── model.py └── train.py ├── word_language_model ├── requirements.txt ├── data │ └── wikitext-2 │ │ └── README ├── data.py ├── model.py ├── generate.py ├── README.md ├── main.py └── notebooks │ └── 02_Inference.ipynb ├── imagenet ├── requirements.txt ├── README.md └── main.py ├── dcgan ├── requirements.txt ├── README.md └── main.py ├── mnist_hogwild ├── requirements.txt ├── train.py └── main.py ├── .gitignore ├── reinforcement_learning ├── requirements.txt ├── README.md ├── reinforce.py └── actor_critic.py ├── regression ├── README.md └── main.py ├── time_sequence_prediction ├── generate_sine_wave.py ├── README.md └── train.py ├── README.md ├── super_resolution ├── model.py ├── dataset.py ├── super_resolve.py ├── README.md ├── data.py └── main.py └── LICENSE /vae/results/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | -------------------------------------------------------------------------------- /fast_neural_style/neural_style/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mnist/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | -------------------------------------------------------------------------------- /snli/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchtext 3 | -------------------------------------------------------------------------------- /word_language_model/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | -------------------------------------------------------------------------------- /imagenet/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | -------------------------------------------------------------------------------- /dcgan/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | lmdb 4 | -------------------------------------------------------------------------------- /mnist_hogwild/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dcgan/data 2 | data 3 | *.pyc 4 | OpenNMT/data 5 | -------------------------------------------------------------------------------- /vae/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | tqdm 4 | six 5 | -------------------------------------------------------------------------------- /reinforcement_learning/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | numpy 3 | gym 4 | -------------------------------------------------------------------------------- /regression/README.md: -------------------------------------------------------------------------------- 1 | # Linear regression example 2 | 3 | Trains a single fully-connected layer to fit a 4th degree polynomial. 4 | -------------------------------------------------------------------------------- /fast_neural_style/images/style-images/candy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/style-images/candy.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/style-images/mosaic.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/style-images/mosaic.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/style-images/udnie.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/style-images/udnie.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/content-images/amber.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/content-images/amber.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/output-images/amber-candy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/output-images/amber-candy.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/output-images/amber-mosaic.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/output-images/amber-mosaic.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/output-images/amber-udnie.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/output-images/amber-udnie.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/style-images/rain-princess.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/style-images/rain-princess.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/output-images/amber-rain-princess.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/output-images/amber-rain-princess.jpg -------------------------------------------------------------------------------- /fast_neural_style/images/style-images/rain-princess-cropped.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceshine/examples/HEAD/fast_neural_style/images/style-images/rain-princess-cropped.jpg -------------------------------------------------------------------------------- /mnist/README.md: -------------------------------------------------------------------------------- 1 | # Basic MNIST Example 2 | 3 | ```bash 4 | pip install -r requirements.txt 5 | python main.py 6 | # CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2 7 | ``` 8 | -------------------------------------------------------------------------------- /word_language_model/data/wikitext-2/README: -------------------------------------------------------------------------------- 1 | This is raw data from the wikitext-2 dataset. 2 | 3 | See https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/ 4 | -------------------------------------------------------------------------------- /reinforcement_learning/README.md: -------------------------------------------------------------------------------- 1 | # Reinforcement learning training example 2 | 3 | ```bash 4 | pip install -r requirements.txt 5 | # For REINFORCE: 6 | python reinforce.py 7 | # For actor critic: 8 | python actor_critic.py 9 | ``` 10 | -------------------------------------------------------------------------------- /time_sequence_prediction/generate_sine_wave.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | np.random.seed(2) 5 | 6 | T = 20 7 | L = 1000 8 | N = 100 9 | 10 | x = np.empty((N, L), 'int64') 11 | x[:] = np.array(range(L)) + np.random.randint(-4 * T, 4 * T, N).reshape(N, 1) 12 | data = np.sin(x / 1.0 / T).astype('float64') 13 | torch.save(data, open('traindata.pt', 'wb')) 14 | -------------------------------------------------------------------------------- /vae/README.md: -------------------------------------------------------------------------------- 1 | # Basic VAE Example 2 | 3 | This is an improved implementation of the paper [Stochastic Gradient VB and the 4 | Variational Auto-Encoder](http://arxiv.org/abs/1312.6114) by Kingma and Welling. 5 | It uses ReLUs and the adam optimizer, instead of sigmoids and adagrad. These changes make the network converge much faster. 6 | 7 | ```bash 8 | pip install -r requirements.txt 9 | python main.py 10 | ``` 11 | -------------------------------------------------------------------------------- /fast_neural_style/download_saved_models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | from torch.utils.model_zoo import _download_url_to_file 5 | 6 | 7 | def unzip(source_filename, dest_dir): 8 | with zipfile.ZipFile(source_filename) as zf: 9 | zf.extractall(path=dest_dir) 10 | 11 | 12 | if __name__ == '__main__': 13 | _download_url_to_file('https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=1', 'saved_models.zip', None, True) 14 | unzip('saved_models.zip', '.') 15 | -------------------------------------------------------------------------------- /time_sequence_prediction/README.md: -------------------------------------------------------------------------------- 1 | # Time Sequence Prediction 2 | This is a toy example for beginners to start with. It is helpful for learning both pytorch and time sequence prediction. Two LSTMCell units are used in this example to learn some sine wave signals starting at different phases. After learning the sine waves, the network tries to predict the signal values in the future. The results is shown in the picture below. 3 | 4 | ## Usage 5 | 6 | ``` 7 | python generate_sine_wave.py 8 | python train.py 9 | ``` 10 | 11 | ## Result 12 | The initial signal and the predicted results are shown in the image. We first give some initial signals (full line). The network will subsequently give some predicted results (dash line). It can be concluded that the network can generate new sine waves. 13 | ![image](https://cloud.githubusercontent.com/assets/1419566/24184438/e24f5280-0f08-11e7-8f8b-4d972b527a81.png) 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch Examples 2 | 3 | A repository showcasing examples of using [PyTorch](https://github.com/pytorch/pytorch) 4 | 5 | - MNIST Convnets 6 | - Word level Language Modeling using LSTM RNNs 7 | - Training Imagenet Classifiers with Residual Networks 8 | - Generative Adversarial Networks (DCGAN) 9 | - Variational Auto-Encoders 10 | - Superresolution using an efficient sub-pixel convolutional neural network 11 | - Hogwild training of shared ConvNets across multiple processes on MNIST 12 | - Training a CartPole to balance in OpenAI Gym with actor-critic 13 | - Natural Language Inference (SNLI) with GloVe vectors, LSTMs, and torchtext 14 | - Time sequence prediction - create an LSTM to learn Sine waves 15 | 16 | Additionally, a list of good examples hosted in their own repositories: 17 | 18 | - [Neural Machine Translation using sequence-to-sequence RNN with attention (OpenNMT)](https://github.com/OpenNMT/OpenNMT-py) 19 | -------------------------------------------------------------------------------- /fast_neural_style/neural_style/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from PIL import Image 3 | 4 | 5 | def load_image(filename, size=None, scale=None): 6 | img = Image.open(filename) 7 | if size is not None: 8 | img = img.resize((size, size), Image.ANTIALIAS) 9 | elif scale is not None: 10 | img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS) 11 | return img 12 | 13 | 14 | def save_image(filename, data): 15 | img = data.clone().clamp(0, 255).numpy() 16 | img = img.transpose(1, 2, 0).astype("uint8") 17 | img = Image.fromarray(img) 18 | img.save(filename) 19 | 20 | 21 | def gram_matrix(y): 22 | (b, ch, h, w) = y.size() 23 | features = y.view(b, ch, w * h) 24 | features_t = features.transpose(1, 2) 25 | gram = features.bmm(features_t) / (ch * h * w) 26 | return gram 27 | 28 | 29 | def normalize_batch(batch): 30 | # normalize using imagenet mean and std 31 | mean = batch.new_tensor([0.485, 0.456, 0.406]).view(-1, 1, 1) 32 | std = batch.new_tensor([0.229, 0.224, 0.225]).view(-1, 1, 1) 33 | batch = batch.div_(255.0) 34 | return (batch - mean) / std 35 | -------------------------------------------------------------------------------- /super_resolution/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.init as init 4 | 5 | 6 | class Net(nn.Module): 7 | def __init__(self, upscale_factor): 8 | super(Net, self).__init__() 9 | 10 | self.relu = nn.ReLU() 11 | self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2)) 12 | self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1)) 13 | self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1)) 14 | self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1)) 15 | self.pixel_shuffle = nn.PixelShuffle(upscale_factor) 16 | 17 | self._initialize_weights() 18 | 19 | def forward(self, x): 20 | x = self.relu(self.conv1(x)) 21 | x = self.relu(self.conv2(x)) 22 | x = self.relu(self.conv3(x)) 23 | x = self.pixel_shuffle(self.conv4(x)) 24 | return x 25 | 26 | def _initialize_weights(self): 27 | init.orthogonal_(self.conv1.weight, init.calculate_gain('relu')) 28 | init.orthogonal_(self.conv2.weight, init.calculate_gain('relu')) 29 | init.orthogonal_(self.conv3.weight, init.calculate_gain('relu')) 30 | init.orthogonal_(self.conv4.weight) 31 | -------------------------------------------------------------------------------- /super_resolution/dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | 3 | from os import listdir 4 | from os.path import join 5 | from PIL import Image 6 | 7 | 8 | def is_image_file(filename): 9 | return any(filename.endswith(extension) for extension in [".png", ".jpg", ".jpeg"]) 10 | 11 | 12 | def load_img(filepath): 13 | img = Image.open(filepath).convert('YCbCr') 14 | y, _, _ = img.split() 15 | return y 16 | 17 | 18 | class DatasetFromFolder(data.Dataset): 19 | def __init__(self, image_dir, input_transform=None, target_transform=None): 20 | super(DatasetFromFolder, self).__init__() 21 | self.image_filenames = [join(image_dir, x) for x in listdir(image_dir) if is_image_file(x)] 22 | 23 | self.input_transform = input_transform 24 | self.target_transform = target_transform 25 | 26 | def __getitem__(self, index): 27 | input = load_img(self.image_filenames[index]) 28 | target = input.copy() 29 | if self.input_transform: 30 | input = self.input_transform(input) 31 | if self.target_transform: 32 | target = self.target_transform(target) 33 | 34 | return input, target 35 | 36 | def __len__(self): 37 | return len(self.image_filenames) 38 | -------------------------------------------------------------------------------- /fast_neural_style/neural_style/vgg.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | import torch 4 | from torchvision import models 5 | 6 | 7 | class Vgg16(torch.nn.Module): 8 | def __init__(self, requires_grad=False): 9 | super(Vgg16, self).__init__() 10 | vgg_pretrained_features = models.vgg16(pretrained=True).features 11 | self.slice1 = torch.nn.Sequential() 12 | self.slice2 = torch.nn.Sequential() 13 | self.slice3 = torch.nn.Sequential() 14 | self.slice4 = torch.nn.Sequential() 15 | for x in range(4): 16 | self.slice1.add_module(str(x), vgg_pretrained_features[x]) 17 | for x in range(4, 9): 18 | self.slice2.add_module(str(x), vgg_pretrained_features[x]) 19 | for x in range(9, 16): 20 | self.slice3.add_module(str(x), vgg_pretrained_features[x]) 21 | for x in range(16, 23): 22 | self.slice4.add_module(str(x), vgg_pretrained_features[x]) 23 | if not requires_grad: 24 | for param in self.parameters(): 25 | param.requires_grad = False 26 | 27 | def forward(self, X): 28 | h = self.slice1(X) 29 | h_relu1_2 = h 30 | h = self.slice2(h) 31 | h_relu2_2 = h 32 | h = self.slice3(h) 33 | h_relu3_3 = h 34 | h = self.slice4(h) 35 | h_relu4_3 = h 36 | vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3']) 37 | out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3) 38 | return out 39 | -------------------------------------------------------------------------------- /super_resolution/super_resolve.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | from torch.autograd import Variable 5 | from PIL import Image 6 | from torchvision.transforms import ToTensor 7 | 8 | import numpy as np 9 | 10 | # Training settings 11 | parser = argparse.ArgumentParser(description='PyTorch Super Res Example') 12 | parser.add_argument('--input_image', type=str, required=True, help='input image to use') 13 | parser.add_argument('--model', type=str, required=True, help='model file to use') 14 | parser.add_argument('--output_filename', type=str, help='where to save the output image') 15 | parser.add_argument('--cuda', action='store_true', help='use cuda') 16 | opt = parser.parse_args() 17 | 18 | print(opt) 19 | img = Image.open(opt.input_image).convert('YCbCr') 20 | y, cb, cr = img.split() 21 | 22 | model = torch.load(opt.model) 23 | img_to_tensor = ToTensor() 24 | input = img_to_tensor(y).view(1, -1, y.size[1], y.size[0]) 25 | 26 | if opt.cuda: 27 | model = model.cuda() 28 | input = input.cuda() 29 | 30 | out = model(input) 31 | out = out.cpu() 32 | out_img_y = out[0].detach().numpy() 33 | out_img_y *= 255.0 34 | out_img_y = out_img_y.clip(0, 255) 35 | out_img_y = Image.fromarray(np.uint8(out_img_y[0]), mode='L') 36 | 37 | out_img_cb = cb.resize(out_img_y.size, Image.BICUBIC) 38 | out_img_cr = cr.resize(out_img_y.size, Image.BICUBIC) 39 | out_img = Image.merge('YCbCr', [out_img_y, out_img_cb, out_img_cr]).convert('RGB') 40 | 41 | out_img.save(opt.output_filename) 42 | print('output image saved to ', opt.output_filename) 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /word_language_model/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | class Dictionary(object): 5 | def __init__(self): 6 | self.word2idx = {} 7 | self.idx2word = [] 8 | 9 | def add_word(self, word): 10 | if word not in self.word2idx: 11 | self.idx2word.append(word) 12 | self.word2idx[word] = len(self.idx2word) - 1 13 | return self.word2idx[word] 14 | 15 | def __len__(self): 16 | return len(self.idx2word) 17 | 18 | 19 | class Corpus(object): 20 | def __init__(self, path): 21 | self.dictionary = Dictionary() 22 | self.train = self.tokenize(os.path.join(path, 'train.txt')) 23 | self.valid = self.tokenize(os.path.join(path, 'valid.txt')) 24 | self.test = self.tokenize(os.path.join(path, 'test.txt')) 25 | 26 | def tokenize(self, path): 27 | """Tokenizes a text file.""" 28 | assert os.path.exists(path) 29 | # Add words to the dictionary 30 | with open(path, 'r', encoding="utf8") as f: 31 | tokens = 0 32 | for line in f: 33 | if len(line.strip()) == 0: 34 | continue 35 | words = line.strip().split() + [''] 36 | tokens += len(words) 37 | for word in words: 38 | self.dictionary.add_word(word) 39 | 40 | # Tokenize file content 41 | with open(path, 'r', encoding="utf8") as f: 42 | ids = torch.LongTensor(tokens) 43 | token = 0 44 | for line in f: 45 | if len(line.strip()) == 0: 46 | continue 47 | words = line.strip().split() + [''] 48 | for word in words: 49 | ids[token] = self.dictionary.word2idx[word] 50 | token += 1 51 | 52 | return ids 53 | -------------------------------------------------------------------------------- /super_resolution/README.md: -------------------------------------------------------------------------------- 1 | # Superresolution using an efficient sub-pixel convolutional neural network 2 | 3 | This example illustrates how to use the efficient sub-pixel convolution layer described in ["Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network" - Shi et al.](https://arxiv.org/abs/1609.05158) for increasing spatial resolution within your network for tasks such as superresolution. 4 | 5 | ``` 6 | usage: main.py [-h] --upscale_factor UPSCALE_FACTOR [--batchSize BATCHSIZE] 7 | [--testBatchSize TESTBATCHSIZE] [--nEpochs NEPOCHS] [--lr LR] 8 | [--cuda] [--threads THREADS] [--seed SEED] 9 | 10 | PyTorch Super Res Example 11 | 12 | optional arguments: 13 | -h, --help show this help message and exit 14 | --upscale_factor super resolution upscale factor 15 | --batchSize training batch size 16 | --testBatchSize testing batch size 17 | --nEpochs number of epochs to train for 18 | --lr Learning Rate. Default=0.01 19 | --cuda use cuda 20 | --threads number of threads for data loader to use Default=4 21 | --seed random seed to use. Default=123 22 | ``` 23 | This example trains a super-resolution network on the [BSD300 dataset](https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/), using crops from the 200 training images, and evaluating on crops of the 100 test images. A snapshot of the model after every epoch with filename model_epoch_.pth 24 | 25 | ## Example Usage: 26 | 27 | ### Train 28 | 29 | `python main.py --upscale_factor 3 --batchSize 4 --testBatchSize 100 --nEpochs 30 --lr 0.001` 30 | 31 | ### Super Resolve 32 | `python super_resolve.py --input_image dataset/BSDS300/images/test/16077.jpg --model model_epoch_500.pth --output_filename out.png` 33 | -------------------------------------------------------------------------------- /regression/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | from itertools import count 4 | 5 | import torch 6 | import torch.autograd 7 | import torch.nn.functional as F 8 | 9 | POLY_DEGREE = 4 10 | W_target = torch.randn(POLY_DEGREE, 1) * 5 11 | b_target = torch.randn(1) * 5 12 | 13 | 14 | def make_features(x): 15 | """Builds features i.e. a matrix with columns [x, x^2, x^3, x^4].""" 16 | x = x.unsqueeze(1) 17 | return torch.cat([x ** i for i in range(1, POLY_DEGREE+1)], 1) 18 | 19 | 20 | def f(x): 21 | """Approximated function.""" 22 | return x.mm(W_target) + b_target.item() 23 | 24 | 25 | def poly_desc(W, b): 26 | """Creates a string description of a polynomial.""" 27 | result = 'y = ' 28 | for i, w in enumerate(W): 29 | result += '{:+.2f} x^{} '.format(w, len(W) - i) 30 | result += '{:+.2f}'.format(b[0]) 31 | return result 32 | 33 | 34 | def get_batch(batch_size=32): 35 | """Builds a batch i.e. (x, f(x)) pair.""" 36 | random = torch.randn(batch_size) 37 | x = make_features(random) 38 | y = f(x) 39 | return x, y 40 | 41 | 42 | # Define model 43 | fc = torch.nn.Linear(W_target.size(0), 1) 44 | 45 | for batch_idx in count(1): 46 | # Get data 47 | batch_x, batch_y = get_batch() 48 | 49 | # Reset gradients 50 | fc.zero_grad() 51 | 52 | # Forward pass 53 | output = F.smooth_l1_loss(fc(batch_x), batch_y) 54 | loss = output.item() 55 | 56 | # Backward pass 57 | output.backward() 58 | 59 | # Apply gradients 60 | for param in fc.parameters(): 61 | param.data.add_(-0.1 * param.grad.data) 62 | 63 | # Stop criterion 64 | if loss < 1e-3: 65 | break 66 | 67 | print('Loss: {:.6f} after {} batches'.format(loss, batch_idx)) 68 | print('==> Learned function:\t' + poly_desc(fc.weight.view(-1), fc.bias)) 69 | print('==> Actual function:\t' + poly_desc(W_target.view(-1), b_target)) 70 | -------------------------------------------------------------------------------- /dcgan/README.md: -------------------------------------------------------------------------------- 1 | # Deep Convolution Generative Adversarial Networks 2 | 3 | This example implements the paper [Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks](http://arxiv.org/abs/1511.06434) 4 | 5 | The implementation is very close to the Torch implementation [dcgan.torch](https://github.com/soumith/dcgan.torch) 6 | 7 | After every 100 training iterations, the files `real_samples.png` and `fake_samples.png` are written to disk 8 | with the samples from the generative model. 9 | 10 | After every epoch, models are saved to: `netG_epoch_%d.pth` and `netD_epoch_%d.pth` 11 | 12 | ## Downloading the dataset 13 | You can download the LSUN dataset by cloning [this repo](https://github.com/fyu/lsun) and running 14 | ``` 15 | python download.py -c bedroom 16 | ``` 17 | 18 | ## Usage 19 | ``` 20 | usage: main.py [-h] --dataset DATASET --dataroot DATAROOT [--workers WORKERS] 21 | [--batchSize BATCHSIZE] [--imageSize IMAGESIZE] [--nz NZ] 22 | [--ngf NGF] [--ndf NDF] [--niter NITER] [--lr LR] 23 | [--beta1 BETA1] [--cuda] [--ngpu NGPU] [--netG NETG] 24 | [--netD NETD] 25 | 26 | optional arguments: 27 | -h, --help show this help message and exit 28 | --dataset DATASET cifar10 | lsun | imagenet | folder | lfw 29 | --dataroot DATAROOT path to dataset 30 | --workers WORKERS number of data loading workers 31 | --batchSize BATCHSIZE 32 | input batch size 33 | --imageSize IMAGESIZE 34 | the height / width of the input image to network 35 | --nz NZ size of the latent z vector 36 | --ngf NGF 37 | --ndf NDF 38 | --niter NITER number of epochs to train for 39 | --lr LR learning rate, default=0.0002 40 | --beta1 BETA1 beta1 for adam. default=0.5 41 | --cuda enables cuda 42 | --ngpu NGPU number of GPUs to use 43 | --netG NETG path to netG (to continue training) 44 | --netD NETD path to netD (to continue training) 45 | ``` 46 | -------------------------------------------------------------------------------- /snli/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | from argparse import ArgumentParser 3 | 4 | def makedirs(name): 5 | """helper function for python 2 and 3 to call os.makedirs() 6 | avoiding an error if the directory to be created already exists""" 7 | 8 | import os, errno 9 | 10 | try: 11 | os.makedirs(name) 12 | except OSError as ex: 13 | if ex.errno == errno.EEXIST and os.path.isdir(name): 14 | # ignore existing directory 15 | pass 16 | else: 17 | # a different error happened 18 | raise 19 | 20 | 21 | def get_args(): 22 | parser = ArgumentParser(description='PyTorch/torchtext SNLI example') 23 | parser.add_argument('--epochs', type=int, default=50) 24 | parser.add_argument('--batch_size', type=int, default=128) 25 | parser.add_argument('--d_embed', type=int, default=100) 26 | parser.add_argument('--d_proj', type=int, default=300) 27 | parser.add_argument('--d_hidden', type=int, default=300) 28 | parser.add_argument('--n_layers', type=int, default=1) 29 | parser.add_argument('--log_every', type=int, default=50) 30 | parser.add_argument('--lr', type=float, default=.001) 31 | parser.add_argument('--dev_every', type=int, default=1000) 32 | parser.add_argument('--save_every', type=int, default=1000) 33 | parser.add_argument('--dp_ratio', type=int, default=0.2) 34 | parser.add_argument('--no-bidirectional', action='store_false', dest='birnn') 35 | parser.add_argument('--preserve-case', action='store_false', dest='lower') 36 | parser.add_argument('--no-projection', action='store_false', dest='projection') 37 | parser.add_argument('--train_embed', action='store_false', dest='fix_emb') 38 | parser.add_argument('--gpu', type=int, default=0) 39 | parser.add_argument('--save_path', type=str, default='results') 40 | parser.add_argument('--vector_cache', type=str, default=os.path.join(os.getcwd(), '.vector_cache/input_vectors.pt')) 41 | parser.add_argument('--word_vectors', type=str, default='glove.6B.100d') 42 | parser.add_argument('--resume_snapshot', type=str, default='') 43 | args = parser.parse_args() 44 | return args 45 | -------------------------------------------------------------------------------- /super_resolution/data.py: -------------------------------------------------------------------------------- 1 | from os.path import exists, join, basename 2 | from os import makedirs, remove 3 | from six.moves import urllib 4 | import tarfile 5 | from torchvision.transforms import Compose, CenterCrop, ToTensor, Resize 6 | 7 | from dataset import DatasetFromFolder 8 | 9 | 10 | def download_bsd300(dest="dataset"): 11 | output_image_dir = join(dest, "BSDS300/images") 12 | 13 | if not exists(output_image_dir): 14 | makedirs(dest) 15 | url = "http://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/BSDS300-images.tgz" 16 | print("downloading url ", url) 17 | 18 | data = urllib.request.urlopen(url) 19 | 20 | file_path = join(dest, basename(url)) 21 | with open(file_path, 'wb') as f: 22 | f.write(data.read()) 23 | 24 | print("Extracting data") 25 | with tarfile.open(file_path) as tar: 26 | for item in tar: 27 | tar.extract(item, dest) 28 | 29 | remove(file_path) 30 | 31 | return output_image_dir 32 | 33 | 34 | def calculate_valid_crop_size(crop_size, upscale_factor): 35 | return crop_size - (crop_size % upscale_factor) 36 | 37 | 38 | def input_transform(crop_size, upscale_factor): 39 | return Compose([ 40 | CenterCrop(crop_size), 41 | Resize(crop_size // upscale_factor), 42 | ToTensor(), 43 | ]) 44 | 45 | 46 | def target_transform(crop_size): 47 | return Compose([ 48 | CenterCrop(crop_size), 49 | ToTensor(), 50 | ]) 51 | 52 | 53 | def get_training_set(upscale_factor): 54 | root_dir = download_bsd300() 55 | train_dir = join(root_dir, "train") 56 | crop_size = calculate_valid_crop_size(256, upscale_factor) 57 | 58 | return DatasetFromFolder(train_dir, 59 | input_transform=input_transform(crop_size, upscale_factor), 60 | target_transform=target_transform(crop_size)) 61 | 62 | 63 | def get_test_set(upscale_factor): 64 | root_dir = download_bsd300() 65 | test_dir = join(root_dir, "test") 66 | crop_size = calculate_valid_crop_size(256, upscale_factor) 67 | 68 | return DatasetFromFolder(test_dir, 69 | input_transform=input_transform(crop_size, upscale_factor), 70 | target_transform=target_transform(crop_size)) 71 | -------------------------------------------------------------------------------- /mnist_hogwild/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | from torchvision import datasets, transforms 6 | 7 | def train(rank, args, model): 8 | torch.manual_seed(args.seed + rank) 9 | 10 | train_loader = torch.utils.data.DataLoader( 11 | datasets.MNIST('../data', train=True, download=True, 12 | transform=transforms.Compose([ 13 | transforms.ToTensor(), 14 | transforms.Normalize((0.1307,), (0.3081,)) 15 | ])), 16 | batch_size=args.batch_size, shuffle=True, num_workers=1) 17 | test_loader = torch.utils.data.DataLoader( 18 | datasets.MNIST('../data', train=False, transform=transforms.Compose([ 19 | transforms.ToTensor(), 20 | transforms.Normalize((0.1307,), (0.3081,)) 21 | ])), 22 | batch_size=args.batch_size, shuffle=True, num_workers=1) 23 | 24 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) 25 | for epoch in range(1, args.epochs + 1): 26 | train_epoch(epoch, args, model, train_loader, optimizer) 27 | test_epoch(model, test_loader) 28 | 29 | 30 | def train_epoch(epoch, args, model, data_loader, optimizer): 31 | model.train() 32 | pid = os.getpid() 33 | for batch_idx, (data, target) in enumerate(data_loader): 34 | optimizer.zero_grad() 35 | output = model(data) 36 | loss = F.nll_loss(output, target) 37 | loss.backward() 38 | optimizer.step() 39 | if batch_idx % args.log_interval == 0: 40 | print('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 41 | pid, epoch, batch_idx * len(data), len(data_loader.dataset), 42 | 100. * batch_idx / len(data_loader), loss.item())) 43 | 44 | 45 | def test_epoch(model, data_loader): 46 | model.eval() 47 | test_loss = 0 48 | correct = 0 49 | with torch.no_grad(): 50 | for data, target in data_loader: 51 | output = model(data) 52 | test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss 53 | pred = output.max(1)[1] # get the index of the max log-probability 54 | correct += pred.eq(target).sum().item() 55 | 56 | test_loss /= len(data_loader.dataset) 57 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 58 | test_loss, correct, len(data_loader.dataset), 59 | 100. * correct / len(data_loader.dataset))) 60 | -------------------------------------------------------------------------------- /imagenet/README.md: -------------------------------------------------------------------------------- 1 | # ImageNet training in PyTorch 2 | 3 | This implements training of popular model architectures, such as ResNet, AlexNet, and VGG on the ImageNet dataset. 4 | 5 | ## Requirements 6 | 7 | - Install PyTorch ([pytorch.org](http://pytorch.org)) 8 | - `pip install -r requirements.txt` 9 | - Download the ImageNet dataset and move validation images to labeled subfolders 10 | - To do this, you can use the following script: https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh 11 | 12 | ## Training 13 | 14 | To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset: 15 | 16 | ```bash 17 | python main.py -a resnet18 [imagenet-folder with train and val folders] 18 | ``` 19 | 20 | The default learning rate schedule starts at 0.1 and decays by a factor of 10 every 30 epochs. This is appropriate for ResNet and models with batch normalization, but too high for AlexNet and VGG. Use 0.01 as the initial learning rate for AlexNet or VGG: 21 | 22 | ```bash 23 | python main.py -a alexnet --lr 0.01 [imagenet-folder with train and val folders] 24 | ``` 25 | 26 | ## Usage 27 | 28 | ``` 29 | usage: main.py [-h] [--arch ARCH] [-j N] [--epochs N] [--start-epoch N] [-b N] 30 | [--lr LR] [--momentum M] [--weight-decay W] [--print-freq N] 31 | [--resume PATH] [-e] [--pretrained] 32 | DIR 33 | 34 | PyTorch ImageNet Training 35 | 36 | positional arguments: 37 | DIR path to dataset 38 | 39 | optional arguments: 40 | -h, --help show this help message and exit 41 | --arch ARCH, -a ARCH model architecture: alexnet | resnet | resnet101 | 42 | resnet152 | resnet18 | resnet34 | resnet50 | vgg | 43 | vgg11 | vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn 44 | | vgg19 | vgg19_bn (default: resnet18) 45 | -j N, --workers N number of data loading workers (default: 4) 46 | --epochs N number of total epochs to run 47 | --start-epoch N manual epoch number (useful on restarts) 48 | -b N, --batch-size N mini-batch size (default: 256) 49 | --lr LR, --learning-rate LR 50 | initial learning rate 51 | --momentum M momentum 52 | --weight-decay W, --wd W 53 | weight decay (default: 1e-4) 54 | --print-freq N, -p N print frequency (default: 10) 55 | --resume PATH path to latest checkpoint (default: none) 56 | -e, --evaluate evaluate model on validation set 57 | --pretrained use pre-trained model 58 | ``` 59 | -------------------------------------------------------------------------------- /mnist_hogwild/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch.multiprocessing as mp 7 | 8 | from train import train 9 | 10 | # Training settings 11 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example') 12 | parser.add_argument('--batch-size', type=int, default=64, metavar='N', 13 | help='input batch size for training (default: 64)') 14 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', 15 | help='input batch size for testing (default: 1000)') 16 | parser.add_argument('--epochs', type=int, default=10, metavar='N', 17 | help='number of epochs to train (default: 10)') 18 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR', 19 | help='learning rate (default: 0.01)') 20 | parser.add_argument('--momentum', type=float, default=0.5, metavar='M', 21 | help='SGD momentum (default: 0.5)') 22 | parser.add_argument('--seed', type=int, default=1, metavar='S', 23 | help='random seed (default: 1)') 24 | parser.add_argument('--log-interval', type=int, default=10, metavar='N', 25 | help='how many batches to wait before logging training status') 26 | parser.add_argument('--num-processes', type=int, default=2, metavar='N', 27 | help='how many training processes to use (default: 2)') 28 | 29 | class Net(nn.Module): 30 | def __init__(self): 31 | super(Net, self).__init__() 32 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 33 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 34 | self.conv2_drop = nn.Dropout2d() 35 | self.fc1 = nn.Linear(320, 50) 36 | self.fc2 = nn.Linear(50, 10) 37 | 38 | def forward(self, x): 39 | x = F.relu(F.max_pool2d(self.conv1(x), 2)) 40 | x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) 41 | x = x.view(-1, 320) 42 | x = F.relu(self.fc1(x)) 43 | x = F.dropout(x, training=self.training) 44 | x = self.fc2(x) 45 | return F.log_softmax(x, dim=1) 46 | 47 | if __name__ == '__main__': 48 | args = parser.parse_args() 49 | 50 | torch.manual_seed(args.seed) 51 | 52 | model = Net() 53 | model.share_memory() # gradients are allocated lazily, so they are not shared here 54 | 55 | processes = [] 56 | for rank in range(args.num_processes): 57 | p = mp.Process(target=train, args=(rank, args, model)) 58 | p.start() 59 | processes.append(p) 60 | for p in processes: 61 | p.join() 62 | -------------------------------------------------------------------------------- /word_language_model/model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class RNNModel(nn.Module): 4 | """Container module with an encoder, a recurrent module, and a decoder.""" 5 | 6 | def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False): 7 | super(RNNModel, self).__init__() 8 | self.drop = nn.Dropout(dropout) 9 | self.encoder = nn.Embedding(ntoken, ninp) 10 | if rnn_type in ['LSTM', 'GRU']: 11 | self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) 12 | else: 13 | try: 14 | nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type] 15 | except KeyError: 16 | raise ValueError( """An invalid option for `--model` was supplied, 17 | options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""") 18 | self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) 19 | self.decoder = nn.Linear(nhid, ntoken) 20 | 21 | # Optionally tie weights as in: 22 | # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) 23 | # https://arxiv.org/abs/1608.05859 24 | # and 25 | # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) 26 | # https://arxiv.org/abs/1611.01462 27 | if tie_weights: 28 | if nhid != ninp: 29 | raise ValueError('When using the tied flag, nhid must be equal to emsize') 30 | self.decoder.weight = self.encoder.weight 31 | 32 | self.init_weights() 33 | 34 | self.rnn_type = rnn_type 35 | self.nhid = nhid 36 | self.nlayers = nlayers 37 | 38 | def init_weights(self): 39 | initrange = 0.1 40 | self.encoder.weight.data.uniform_(-initrange, initrange) 41 | self.decoder.bias.data.zero_() 42 | self.decoder.weight.data.uniform_(-initrange, initrange) 43 | 44 | def forward(self, input, hidden): 45 | emb = self.drop(self.encoder(input)) 46 | output, hidden = self.rnn(emb, hidden) 47 | output = self.drop(output) 48 | decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2))) 49 | return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden 50 | 51 | def init_hidden(self, bsz): 52 | weight = next(self.parameters()) 53 | if self.rnn_type == 'LSTM': 54 | return (weight.new_zeros(self.nlayers, bsz, self.nhid), 55 | weight.new_zeros(self.nlayers, bsz, self.nhid)) 56 | else: 57 | return weight.new_zeros(self.nlayers, bsz, self.nhid) 58 | -------------------------------------------------------------------------------- /word_language_model/generate.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Language Modeling on Penn Tree Bank 3 | # 4 | # This file generates new sentences sampled from the language model 5 | # 6 | ############################################################################### 7 | 8 | import argparse 9 | 10 | import torch 11 | from torch.autograd import Variable 12 | 13 | import data 14 | 15 | parser = argparse.ArgumentParser(description='PyTorch Wikitext-2 Language Model') 16 | 17 | # Model parameters. 18 | parser.add_argument('--data', type=str, default='./data/wikitext-2', 19 | help='location of the data corpus') 20 | parser.add_argument('--checkpoint', type=str, default='./model.pt', 21 | help='model checkpoint to use') 22 | parser.add_argument('--outf', type=str, default='generated.txt', 23 | help='output file for generated text') 24 | parser.add_argument('--words', type=int, default='1000', 25 | help='number of words to generate') 26 | parser.add_argument('--seed', type=int, default=1111, 27 | help='random seed') 28 | parser.add_argument('--cuda', action='store_true', 29 | help='use CUDA') 30 | parser.add_argument('--temperature', type=float, default=1.0, 31 | help='temperature - higher will increase diversity') 32 | parser.add_argument('--log-interval', type=int, default=100, 33 | help='reporting interval') 34 | args = parser.parse_args() 35 | 36 | # Set the random seed manually for reproducibility. 37 | torch.manual_seed(args.seed) 38 | if torch.cuda.is_available(): 39 | if not args.cuda: 40 | print("WARNING: You have a CUDA device, so you should probably run with --cuda") 41 | 42 | device = torch.device("cuda" if args.cuda else "cpu") 43 | 44 | if args.temperature < 1e-3: 45 | parser.error("--temperature has to be greater or equal 1e-3") 46 | 47 | with open(args.checkpoint, 'rb') as f: 48 | model = torch.load(f).to(device) 49 | model.eval() 50 | 51 | corpus = data.Corpus(args.data) 52 | ntokens = len(corpus.dictionary) 53 | hidden = model.init_hidden(1) 54 | input = torch.randint(ntokens, (1, 1), dtype=torch.long).to(device) 55 | 56 | with open(args.outf, 'w') as outf: 57 | with torch.no_grad(): # no tracking history 58 | for i in range(args.words): 59 | output, hidden = model(input, hidden) 60 | word_weights = output.squeeze().div(args.temperature).exp().cpu() 61 | word_idx = torch.multinomial(word_weights, 1)[0] 62 | input.fill_(word_idx) 63 | word = corpus.dictionary.idx2word[word_idx] 64 | 65 | outf.write(word + ('\n' if i % 20 == 19 else ' ')) 66 | 67 | if i % args.log_interval == 0: 68 | print('| Generated {}/{} words'.format(i, args.words)) 69 | -------------------------------------------------------------------------------- /snli/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Bottle(nn.Module): 6 | 7 | def forward(self, input): 8 | if len(input.size()) <= 2: 9 | return super(Bottle, self).forward(input) 10 | size = input.size()[:2] 11 | out = super(Bottle, self).forward(input.view(size[0]*size[1], -1)) 12 | return out.view(size[0], size[1], -1) 13 | 14 | 15 | class Linear(Bottle, nn.Linear): 16 | pass 17 | 18 | 19 | class Encoder(nn.Module): 20 | 21 | def __init__(self, config): 22 | super(Encoder, self).__init__() 23 | self.config = config 24 | input_size = config.d_proj if config.projection else config.d_embed 25 | dropout = 0 if config.n_layers == 1 else config.dp_ratio 26 | self.rnn = nn.LSTM(input_size=input_size, hidden_size=config.d_hidden, 27 | num_layers=config.n_layers, dropout=dropout, 28 | bidirectional=config.birnn) 29 | 30 | def forward(self, inputs): 31 | batch_size = inputs.size()[1] 32 | state_shape = self.config.n_cells, batch_size, self.config.d_hidden 33 | h0 = c0 = inputs.new_zeros(state_shape) 34 | outputs, (ht, ct) = self.rnn(inputs, (h0, c0)) 35 | return ht[-1] if not self.config.birnn else ht[-2:].transpose(0, 1).contiguous().view(batch_size, -1) 36 | 37 | 38 | class SNLIClassifier(nn.Module): 39 | 40 | def __init__(self, config): 41 | super(SNLIClassifier, self).__init__() 42 | self.config = config 43 | self.embed = nn.Embedding(config.n_embed, config.d_embed) 44 | self.projection = Linear(config.d_embed, config.d_proj) 45 | self.encoder = Encoder(config) 46 | self.dropout = nn.Dropout(p=config.dp_ratio) 47 | self.relu = nn.ReLU() 48 | seq_in_size = 2*config.d_hidden 49 | if self.config.birnn: 50 | seq_in_size *= 2 51 | lin_config = [seq_in_size]*2 52 | self.out = nn.Sequential( 53 | Linear(*lin_config), 54 | self.relu, 55 | self.dropout, 56 | Linear(*lin_config), 57 | self.relu, 58 | self.dropout, 59 | Linear(*lin_config), 60 | self.relu, 61 | self.dropout, 62 | Linear(seq_in_size, config.d_out)) 63 | 64 | def forward(self, batch): 65 | prem_embed = self.embed(batch.premise) 66 | hypo_embed = self.embed(batch.hypothesis) 67 | if self.config.fix_emb: 68 | prem_embed =prem_embed.detach() 69 | hypo_embed =hypo_embed.detach() 70 | if self.config.projection: 71 | prem_embed = self.relu(self.projection(prem_embed)) 72 | hypo_embed = self.relu(self.projection(hypo_embed)) 73 | premise = self.encoder(prem_embed) 74 | hypothesis = self.encoder(hypo_embed) 75 | scores = self.out(torch.cat([premise, hypothesis], 1)) 76 | return scores 77 | -------------------------------------------------------------------------------- /word_language_model/README.md: -------------------------------------------------------------------------------- 1 | # Word-level language modeling RNN 2 | 3 | This example trains a multi-layer RNN (Elman, GRU, or LSTM) on a language modeling task. 4 | By default, the training script uses the Wikitext-2 dataset, provided. 5 | The trained model can then be used by the generate script to generate new text. 6 | 7 | ```bash 8 | python main.py --cuda --epochs 6 # Train a LSTM on Wikitext-2 with CUDA, reaching perplexity of 117.61 9 | python main.py --cuda --epochs 6 --tied # Train a tied LSTM on Wikitext-2 with CUDA, reaching perplexity of 110.44 10 | python main.py --cuda --tied # Train a tied LSTM on Wikitext-2 with CUDA for 40 epochs, reaching perplexity of 87.17 11 | python generate.py # Generate samples from the trained LSTM model. 12 | ``` 13 | 14 | The model uses the `nn.RNN` module (and its sister modules `nn.GRU` and `nn.LSTM`) 15 | which will automatically use the cuDNN backend if run on CUDA with cuDNN installed. 16 | 17 | During training, if a keyboard interrupt (Ctrl-C) is received, 18 | training is stopped and the current model is evaluated against the test dataset. 19 | 20 | The `main.py` script accepts the following arguments: 21 | 22 | ```bash 23 | optional arguments: 24 | -h, --help show this help message and exit 25 | --data DATA location of the data corpus 26 | --model MODEL type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU) 27 | --emsize EMSIZE size of word embeddings 28 | --nhid NHID number of hidden units per layer 29 | --nlayers NLAYERS number of layers 30 | --lr LR initial learning rate 31 | --clip CLIP gradient clipping 32 | --epochs EPOCHS upper epoch limit 33 | --batch-size N batch size 34 | --bptt BPTT sequence length 35 | --dropout DROPOUT dropout applied to layers (0 = no dropout) 36 | --decay DECAY learning rate decay per epoch 37 | --tied tie the word embedding and softmax weights 38 | --seed SEED random seed 39 | --cuda use CUDA 40 | --log-interval N report interval 41 | --save SAVE path to save the final model 42 | ``` 43 | 44 | With these arguments, a variety of models can be tested. 45 | As an example, the following arguments produce slower but better models: 46 | 47 | ```bash 48 | python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 # Test perplexity of 80.97 49 | python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 --tied # Test perplexity of 75.96 50 | python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 # Test perplexity of 77.42 51 | python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 --tied # Test perplexity of 72.30 52 | ``` 53 | 54 | Perplexities on PTB are equal or better than 55 | [Recurrent Neural Network Regularization (Zaremba et al. 2014)](https://arxiv.org/pdf/1409.2329.pdf) 56 | and are similar to [Using the Output Embedding to Improve Language Models (Press & Wolf 2016](https://arxiv.org/abs/1608.05859) and [Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling (Inan et al. 2016)](https://arxiv.org/pdf/1611.01462.pdf), though both of these papers have improved perplexities by using a form of recurrent dropout [(variational dropout)](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks). 57 | -------------------------------------------------------------------------------- /super_resolution/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | from math import log10 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from torch.utils.data import DataLoader 9 | from model import Net 10 | from data import get_training_set, get_test_set 11 | 12 | # Training settings 13 | parser = argparse.ArgumentParser(description='PyTorch Super Res Example') 14 | parser.add_argument('--upscale_factor', type=int, required=True, help="super resolution upscale factor") 15 | parser.add_argument('--batchSize', type=int, default=64, help='training batch size') 16 | parser.add_argument('--testBatchSize', type=int, default=10, help='testing batch size') 17 | parser.add_argument('--nEpochs', type=int, default=2, help='number of epochs to train for') 18 | parser.add_argument('--lr', type=float, default=0.01, help='Learning Rate. Default=0.01') 19 | parser.add_argument('--cuda', action='store_true', help='use cuda?') 20 | parser.add_argument('--threads', type=int, default=4, help='number of threads for data loader to use') 21 | parser.add_argument('--seed', type=int, default=123, help='random seed to use. Default=123') 22 | opt = parser.parse_args() 23 | 24 | print(opt) 25 | 26 | if opt.cuda and not torch.cuda.is_available(): 27 | raise Exception("No GPU found, please run without --cuda") 28 | 29 | torch.manual_seed(opt.seed) 30 | 31 | device = torch.device("cuda" if opt.cuda else "cpu") 32 | 33 | print('===> Loading datasets') 34 | train_set = get_training_set(opt.upscale_factor) 35 | test_set = get_test_set(opt.upscale_factor) 36 | training_data_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batchSize, shuffle=True) 37 | testing_data_loader = DataLoader(dataset=test_set, num_workers=opt.threads, batch_size=opt.testBatchSize, shuffle=False) 38 | 39 | print('===> Building model') 40 | model = Net(upscale_factor=opt.upscale_factor).to(device) 41 | criterion = nn.MSELoss() 42 | 43 | optimizer = optim.Adam(model.parameters(), lr=opt.lr) 44 | 45 | 46 | def train(epoch): 47 | epoch_loss = 0 48 | for iteration, batch in enumerate(training_data_loader, 1): 49 | input, target = batch[0].to(device), batch[1].to(device) 50 | 51 | optimizer.zero_grad() 52 | loss = criterion(model(input), target) 53 | epoch_loss += loss.item() 54 | loss.backward() 55 | optimizer.step() 56 | 57 | print("===> Epoch[{}]({}/{}): Loss: {:.4f}".format(epoch, iteration, len(training_data_loader), loss.item())) 58 | 59 | print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, epoch_loss / len(training_data_loader))) 60 | 61 | 62 | def test(): 63 | avg_psnr = 0 64 | with torch.no_grad(): 65 | for batch in testing_data_loader: 66 | input, target = batch[0].to(device), batch[1].to(device) 67 | 68 | prediction = model(input) 69 | mse = criterion(prediction, target) 70 | psnr = 10 * log10(1 / mse.item()) 71 | avg_psnr += psnr 72 | print("===> Avg. PSNR: {:.4f} dB".format(avg_psnr / len(testing_data_loader))) 73 | 74 | 75 | def checkpoint(epoch): 76 | model_out_path = "model_epoch_{}.pth".format(epoch) 77 | torch.save(model, model_out_path) 78 | print("Checkpoint saved to {}".format(model_out_path)) 79 | 80 | for epoch in range(1, opt.nEpochs + 1): 81 | train(epoch) 82 | test() 83 | checkpoint(epoch) 84 | -------------------------------------------------------------------------------- /time_sequence_prediction/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | import numpy as np 6 | import matplotlib 7 | matplotlib.use('Agg') 8 | import matplotlib.pyplot as plt 9 | 10 | class Sequence(nn.Module): 11 | def __init__(self): 12 | super(Sequence, self).__init__() 13 | self.lstm1 = nn.LSTMCell(1, 51) 14 | self.lstm2 = nn.LSTMCell(51, 51) 15 | self.linear = nn.Linear(51, 1) 16 | 17 | def forward(self, input, future = 0): 18 | outputs = [] 19 | h_t = torch.zeros(input.size(0), 51, dtype=torch.double) 20 | c_t = torch.zeros(input.size(0), 51, dtype=torch.double) 21 | h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double) 22 | c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double) 23 | 24 | for i, input_t in enumerate(input.chunk(input.size(1), dim=1)): 25 | h_t, c_t = self.lstm1(input_t, (h_t, c_t)) 26 | h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2)) 27 | output = self.linear(h_t2) 28 | outputs += [output] 29 | for i in range(future):# if we should predict the future 30 | h_t, c_t = self.lstm1(output, (h_t, c_t)) 31 | h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2)) 32 | output = self.linear(h_t2) 33 | outputs += [output] 34 | outputs = torch.stack(outputs, 1).squeeze(2) 35 | return outputs 36 | 37 | 38 | if __name__ == '__main__': 39 | # set random seed to 0 40 | np.random.seed(0) 41 | torch.manual_seed(0) 42 | # load data and make training set 43 | data = torch.load('traindata.pt') 44 | input = torch.from_numpy(data[3:, :-1]) 45 | target = torch.from_numpy(data[3:, 1:]) 46 | test_input = torch.from_numpy(data[:3, :-1]) 47 | test_target = torch.from_numpy(data[:3, 1:]) 48 | # build the model 49 | seq = Sequence() 50 | seq.double() 51 | criterion = nn.MSELoss() 52 | # use LBFGS as optimizer since we can load the whole data to train 53 | optimizer = optim.LBFGS(seq.parameters(), lr=0.8) 54 | #begin to train 55 | for i in range(15): 56 | print('STEP: ', i) 57 | def closure(): 58 | optimizer.zero_grad() 59 | out = seq(input) 60 | loss = criterion(out, target) 61 | print('loss:', loss.item()) 62 | loss.backward() 63 | return loss 64 | optimizer.step(closure) 65 | # begin to predict, no need to track gradient here 66 | with torch.no_grad(): 67 | future = 1000 68 | pred = seq(test_input, future=future) 69 | loss = criterion(pred[:, :-future], test_target) 70 | print('test loss:', loss.item()) 71 | y = pred.detach().numpy() 72 | # draw the result 73 | plt.figure(figsize=(30,10)) 74 | plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize=30) 75 | plt.xlabel('x', fontsize=20) 76 | plt.ylabel('y', fontsize=20) 77 | plt.xticks(fontsize=20) 78 | plt.yticks(fontsize=20) 79 | def draw(yi, color): 80 | plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0) 81 | plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0) 82 | draw(y[0], 'r') 83 | draw(y[1], 'g') 84 | draw(y[2], 'b') 85 | plt.savefig('predict%d.pdf'%i) 86 | plt.close() 87 | -------------------------------------------------------------------------------- /reinforcement_learning/reinforce.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gym 3 | import numpy as np 4 | from itertools import count 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import torch.optim as optim 10 | from torch.distributions import Categorical 11 | 12 | 13 | parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') 14 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G', 15 | help='discount factor (default: 0.99)') 16 | parser.add_argument('--seed', type=int, default=543, metavar='N', 17 | help='random seed (default: 543)') 18 | parser.add_argument('--render', action='store_true', 19 | help='render the environment') 20 | parser.add_argument('--log-interval', type=int, default=10, metavar='N', 21 | help='interval between training status logs (default: 10)') 22 | args = parser.parse_args() 23 | 24 | 25 | env = gym.make('CartPole-v0') 26 | env.seed(args.seed) 27 | torch.manual_seed(args.seed) 28 | 29 | 30 | class Policy(nn.Module): 31 | def __init__(self): 32 | super(Policy, self).__init__() 33 | self.affine1 = nn.Linear(4, 128) 34 | self.affine2 = nn.Linear(128, 2) 35 | 36 | self.saved_log_probs = [] 37 | self.rewards = [] 38 | 39 | def forward(self, x): 40 | x = F.relu(self.affine1(x)) 41 | action_scores = self.affine2(x) 42 | return F.softmax(action_scores, dim=1) 43 | 44 | 45 | policy = Policy() 46 | optimizer = optim.Adam(policy.parameters(), lr=1e-2) 47 | eps = np.finfo(np.float32).eps.item() 48 | 49 | 50 | def select_action(state): 51 | state = torch.from_numpy(state).float().unsqueeze(0) 52 | probs = policy(state) 53 | m = Categorical(probs) 54 | action = m.sample() 55 | policy.saved_log_probs.append(m.log_prob(action)) 56 | return action.item() 57 | 58 | 59 | def finish_episode(): 60 | R = 0 61 | policy_loss = [] 62 | rewards = [] 63 | for r in policy.rewards[::-1]: 64 | R = r + args.gamma * R 65 | rewards.insert(0, R) 66 | rewards = torch.tensor(rewards) 67 | rewards = (rewards - rewards.mean()) / (rewards.std() + eps) 68 | for log_prob, reward in zip(policy.saved_log_probs, rewards): 69 | policy_loss.append(-log_prob * reward) 70 | optimizer.zero_grad() 71 | policy_loss = torch.cat(policy_loss).sum() 72 | policy_loss.backward() 73 | optimizer.step() 74 | del policy.rewards[:] 75 | del policy.saved_log_probs[:] 76 | 77 | 78 | def main(): 79 | running_reward = 10 80 | for i_episode in count(1): 81 | state = env.reset() 82 | for t in range(10000): # Don't infinite loop while learning 83 | action = select_action(state) 84 | state, reward, done, _ = env.step(action) 85 | if args.render: 86 | env.render() 87 | policy.rewards.append(reward) 88 | if done: 89 | break 90 | 91 | running_reward = running_reward * 0.99 + t * 0.01 92 | finish_episode() 93 | if i_episode % args.log_interval == 0: 94 | print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format( 95 | i_episode, t, running_reward)) 96 | if running_reward > env.spec.reward_threshold: 97 | print("Solved! Running reward is now {} and " 98 | "the last episode runs to {} time steps!".format(running_reward, t)) 99 | break 100 | 101 | 102 | if __name__ == '__main__': 103 | main() 104 | -------------------------------------------------------------------------------- /fast_neural_style/README.md: -------------------------------------------------------------------------------- 1 | # fast-neural-style :city_sunrise: :rocket: 2 | This repository contains a pytorch implementation of an algorithm for artistic style transfer. The algorithm can be used to mix the content of an image with the style of another image. For example, here is a photograph of a door arch rendered in the style of a stained glass painting. 3 | 4 | The model uses the method described in [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](https://arxiv.org/abs/1603.08155) along with [Instance Normalization](https://arxiv.org/pdf/1607.08022.pdf). The saved-models for examples shown in the README can be downloaded from [here](https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=0). 5 | 6 |

7 | 8 | 9 | 10 |

11 | 12 | ## Requirements 13 | The program is written in Python, and uses [pytorch](http://pytorch.org/), [scipy](https://www.scipy.org). A GPU is not necessary, but can provide a significant speed up especially for training a new model. Regular sized images can be styled on a laptop or desktop using saved models. 14 | 15 | ## Usage 16 | Stylize image 17 | ``` 18 | python neural_style/neural_style.py eval --content-image --model --output-image --cuda 0 19 | ``` 20 | * `--content-image`: path to content image you want to stylize. 21 | * `--model`: saved model to be used for stylizing the image (eg: `mosaic.pth`) 22 | * `--output-image`: path for saving the output image. 23 | * `--content-scale`: factor for scaling down the content image if memory is an issue (eg: value of 2 will halve the height and width of content-image) 24 | * `--cuda`: set it to 1 for running on GPU, 0 for CPU. 25 | 26 | Train model 27 | ```bash 28 | python neural_style/neural_style.py train --dataset --style-image --save-model-dir --epochs 2 --cuda 1 29 | ``` 30 | 31 | There are several command line arguments, the important ones are listed below 32 | * `--dataset`: path to training dataset, the path should point to a folder containing another folder with all the training images. I used COCO 2014 Training images dataset [80K/13GB] [(download)](http://mscoco.org/dataset/#download). 33 | * `--style-image`: path to style-image. 34 | * `--save-model-dir`: path to folder where trained model will be saved. 35 | * `--cuda`: set it to 1 for running on GPU, 0 for CPU. 36 | 37 | Refer to ``neural_style/neural_style.py`` for other command line arguments. For training new models you might have to tune the values of `--content-weight` and `--style-weight`. The mosaic style model shown above was trained with `--content-weight 1e5` and `--style-weight 1e10`. The remaining 3 models were also trained with similar order of weight parameters with slight variation in the `--style-weight` (`5e10` or `1e11`). 38 | 39 | ## Models 40 | 41 | Models for the examples shown below can be downloaded from [here](https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=0) or by running the script ``download_saved_models.py``. 42 | 43 |
44 | 45 |
46 | 47 |
48 | 49 | 50 | 51 | 52 |
53 | 54 | 55 | 56 | 57 |
58 | -------------------------------------------------------------------------------- /reinforcement_learning/actor_critic.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gym 3 | import numpy as np 4 | from itertools import count 5 | from collections import namedtuple 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | from torch.distributions import Categorical 12 | 13 | 14 | parser = argparse.ArgumentParser(description='PyTorch actor-critic example') 15 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G', 16 | help='discount factor (default: 0.99)') 17 | parser.add_argument('--seed', type=int, default=543, metavar='N', 18 | help='random seed (default: 1)') 19 | parser.add_argument('--render', action='store_true', 20 | help='render the environment') 21 | parser.add_argument('--log-interval', type=int, default=10, metavar='N', 22 | help='interval between training status logs (default: 10)') 23 | args = parser.parse_args() 24 | 25 | 26 | env = gym.make('CartPole-v0') 27 | env.seed(args.seed) 28 | torch.manual_seed(args.seed) 29 | 30 | 31 | SavedAction = namedtuple('SavedAction', ['log_prob', 'value']) 32 | 33 | 34 | class Policy(nn.Module): 35 | def __init__(self): 36 | super(Policy, self).__init__() 37 | self.affine1 = nn.Linear(4, 128) 38 | self.action_head = nn.Linear(128, 2) 39 | self.value_head = nn.Linear(128, 1) 40 | 41 | self.saved_actions = [] 42 | self.rewards = [] 43 | 44 | def forward(self, x): 45 | x = F.relu(self.affine1(x)) 46 | action_scores = self.action_head(x) 47 | state_values = self.value_head(x) 48 | return F.softmax(action_scores, dim=-1), state_values 49 | 50 | 51 | model = Policy() 52 | optimizer = optim.Adam(model.parameters(), lr=3e-2) 53 | eps = np.finfo(np.float32).eps.item() 54 | 55 | 56 | def select_action(state): 57 | state = torch.from_numpy(state).float() 58 | probs, state_value = model(state) 59 | m = Categorical(probs) 60 | action = m.sample() 61 | model.saved_actions.append(SavedAction(m.log_prob(action), state_value)) 62 | return action.item() 63 | 64 | 65 | def finish_episode(): 66 | R = 0 67 | saved_actions = model.saved_actions 68 | policy_losses = [] 69 | value_losses = [] 70 | rewards = [] 71 | for r in model.rewards[::-1]: 72 | R = r + args.gamma * R 73 | rewards.insert(0, R) 74 | rewards = torch.tensor(rewards) 75 | rewards = (rewards - rewards.mean()) / (rewards.std() + eps) 76 | for (log_prob, value), r in zip(saved_actions, rewards): 77 | reward = r - value.item() 78 | policy_losses.append(-log_prob * reward) 79 | value_losses.append(F.smooth_l1_loss(value, torch.tensor([r]))) 80 | optimizer.zero_grad() 81 | loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum() 82 | loss.backward() 83 | optimizer.step() 84 | del model.rewards[:] 85 | del model.saved_actions[:] 86 | 87 | 88 | def main(): 89 | running_reward = 10 90 | for i_episode in count(1): 91 | state = env.reset() 92 | for t in range(10000): # Don't infinite loop while learning 93 | action = select_action(state) 94 | state, reward, done, _ = env.step(action) 95 | if args.render: 96 | env.render() 97 | model.rewards.append(reward) 98 | if done: 99 | break 100 | 101 | running_reward = running_reward * 0.99 + t * 0.01 102 | finish_episode() 103 | if i_episode % args.log_interval == 0: 104 | print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format( 105 | i_episode, t, running_reward)) 106 | if running_reward > env.spec.reward_threshold: 107 | print("Solved! Running reward is now {} and " 108 | "the last episode runs to {} time steps!".format(running_reward, t)) 109 | break 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | -------------------------------------------------------------------------------- /fast_neural_style/neural_style/transformer_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class TransformerNet(torch.nn.Module): 5 | def __init__(self): 6 | super(TransformerNet, self).__init__() 7 | # Initial convolution layers 8 | self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1) 9 | self.in1 = torch.nn.InstanceNorm2d(32, affine=True) 10 | self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2) 11 | self.in2 = torch.nn.InstanceNorm2d(64, affine=True) 12 | self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2) 13 | self.in3 = torch.nn.InstanceNorm2d(128, affine=True) 14 | # Residual layers 15 | self.res1 = ResidualBlock(128) 16 | self.res2 = ResidualBlock(128) 17 | self.res3 = ResidualBlock(128) 18 | self.res4 = ResidualBlock(128) 19 | self.res5 = ResidualBlock(128) 20 | # Upsampling Layers 21 | self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2) 22 | self.in4 = torch.nn.InstanceNorm2d(64, affine=True) 23 | self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2) 24 | self.in5 = torch.nn.InstanceNorm2d(32, affine=True) 25 | self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1) 26 | # Non-linearities 27 | self.relu = torch.nn.ReLU() 28 | 29 | def forward(self, X): 30 | y = self.relu(self.in1(self.conv1(X))) 31 | y = self.relu(self.in2(self.conv2(y))) 32 | y = self.relu(self.in3(self.conv3(y))) 33 | y = self.res1(y) 34 | y = self.res2(y) 35 | y = self.res3(y) 36 | y = self.res4(y) 37 | y = self.res5(y) 38 | y = self.relu(self.in4(self.deconv1(y))) 39 | y = self.relu(self.in5(self.deconv2(y))) 40 | y = self.deconv3(y) 41 | return y 42 | 43 | 44 | class ConvLayer(torch.nn.Module): 45 | def __init__(self, in_channels, out_channels, kernel_size, stride): 46 | super(ConvLayer, self).__init__() 47 | reflection_padding = kernel_size // 2 48 | self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding) 49 | self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride) 50 | 51 | def forward(self, x): 52 | out = self.reflection_pad(x) 53 | out = self.conv2d(out) 54 | return out 55 | 56 | 57 | class ResidualBlock(torch.nn.Module): 58 | """ResidualBlock 59 | introduced in: https://arxiv.org/abs/1512.03385 60 | recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html 61 | """ 62 | 63 | def __init__(self, channels): 64 | super(ResidualBlock, self).__init__() 65 | self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1) 66 | self.in1 = torch.nn.InstanceNorm2d(channels, affine=True) 67 | self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1) 68 | self.in2 = torch.nn.InstanceNorm2d(channels, affine=True) 69 | self.relu = torch.nn.ReLU() 70 | 71 | def forward(self, x): 72 | residual = x 73 | out = self.relu(self.in1(self.conv1(x))) 74 | out = self.in2(self.conv2(out)) 75 | out = out + residual 76 | return out 77 | 78 | 79 | class UpsampleConvLayer(torch.nn.Module): 80 | """UpsampleConvLayer 81 | Upsamples the input and then does a convolution. This method gives better results 82 | compared to ConvTranspose2d. 83 | ref: http://distill.pub/2016/deconv-checkerboard/ 84 | """ 85 | 86 | def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None): 87 | super(UpsampleConvLayer, self).__init__() 88 | self.upsample = upsample 89 | if upsample: 90 | self.upsample_layer = torch.nn.Upsample(mode='nearest', scale_factor=upsample) 91 | reflection_padding = kernel_size // 2 92 | self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding) 93 | self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride) 94 | 95 | def forward(self, x): 96 | x_in = x 97 | if self.upsample: 98 | x_in = self.upsample_layer(x_in) 99 | out = self.reflection_pad(x_in) 100 | out = self.conv2d(out) 101 | return out 102 | -------------------------------------------------------------------------------- /mnist/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch.optim as optim 7 | from torchvision import datasets, transforms 8 | 9 | class Net(nn.Module): 10 | def __init__(self): 11 | super(Net, self).__init__() 12 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 13 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 14 | self.conv2_drop = nn.Dropout2d() 15 | self.fc1 = nn.Linear(320, 50) 16 | self.fc2 = nn.Linear(50, 10) 17 | 18 | def forward(self, x): 19 | x = F.relu(F.max_pool2d(self.conv1(x), 2)) 20 | x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) 21 | x = x.view(-1, 320) 22 | x = F.relu(self.fc1(x)) 23 | x = F.dropout(x, training=self.training) 24 | x = self.fc2(x) 25 | return F.log_softmax(x, dim=1) 26 | 27 | def train(args, model, device, train_loader, optimizer, epoch): 28 | model.train() 29 | for batch_idx, (data, target) in enumerate(train_loader): 30 | data, target = data.to(device), target.to(device) 31 | optimizer.zero_grad() 32 | output = model(data) 33 | loss = F.nll_loss(output, target) 34 | loss.backward() 35 | optimizer.step() 36 | if batch_idx % args.log_interval == 0: 37 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 38 | epoch, batch_idx * len(data), len(train_loader.dataset), 39 | 100. * batch_idx / len(train_loader), loss.item())) 40 | 41 | def test(args, model, device, test_loader): 42 | model.eval() 43 | test_loss = 0 44 | correct = 0 45 | with torch.no_grad(): 46 | for data, target in test_loader: 47 | data, target = data.to(device), target.to(device) 48 | output = model(data) 49 | test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss 50 | pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability 51 | correct += pred.eq(target.view_as(pred)).sum().item() 52 | 53 | test_loss /= len(test_loader.dataset) 54 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 55 | test_loss, correct, len(test_loader.dataset), 56 | 100. * correct / len(test_loader.dataset))) 57 | 58 | def main(): 59 | # Training settings 60 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example') 61 | parser.add_argument('--batch-size', type=int, default=64, metavar='N', 62 | help='input batch size for training (default: 64)') 63 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', 64 | help='input batch size for testing (default: 1000)') 65 | parser.add_argument('--epochs', type=int, default=10, metavar='N', 66 | help='number of epochs to train (default: 10)') 67 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR', 68 | help='learning rate (default: 0.01)') 69 | parser.add_argument('--momentum', type=float, default=0.5, metavar='M', 70 | help='SGD momentum (default: 0.5)') 71 | parser.add_argument('--no-cuda', action='store_true', default=False, 72 | help='disables CUDA training') 73 | parser.add_argument('--seed', type=int, default=1, metavar='S', 74 | help='random seed (default: 1)') 75 | parser.add_argument('--log-interval', type=int, default=10, metavar='N', 76 | help='how many batches to wait before logging training status') 77 | args = parser.parse_args() 78 | use_cuda = not args.no_cuda and torch.cuda.is_available() 79 | 80 | torch.manual_seed(args.seed) 81 | 82 | device = torch.device("cuda" if use_cuda else "cpu") 83 | 84 | kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} 85 | train_loader = torch.utils.data.DataLoader( 86 | datasets.MNIST('../data', train=True, download=True, 87 | transform=transforms.Compose([ 88 | transforms.ToTensor(), 89 | transforms.Normalize((0.1307,), (0.3081,)) 90 | ])), 91 | batch_size=args.batch_size, shuffle=True, **kwargs) 92 | test_loader = torch.utils.data.DataLoader( 93 | datasets.MNIST('../data', train=False, transform=transforms.Compose([ 94 | transforms.ToTensor(), 95 | transforms.Normalize((0.1307,), (0.3081,)) 96 | ])), 97 | batch_size=args.test_batch_size, shuffle=True, **kwargs) 98 | 99 | 100 | model = Net().to(device) 101 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) 102 | 103 | for epoch in range(1, args.epochs + 1): 104 | train(args, model, device, train_loader, optimizer, epoch) 105 | test(args, model, device, test_loader) 106 | 107 | 108 | if __name__ == '__main__': 109 | main() -------------------------------------------------------------------------------- /vae/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | import torch.utils.data 5 | from torch import nn, optim 6 | from torch.nn import functional as F 7 | from torchvision import datasets, transforms 8 | from torchvision.utils import save_image 9 | 10 | 11 | parser = argparse.ArgumentParser(description='VAE MNIST Example') 12 | parser.add_argument('--batch-size', type=int, default=128, metavar='N', 13 | help='input batch size for training (default: 128)') 14 | parser.add_argument('--epochs', type=int, default=10, metavar='N', 15 | help='number of epochs to train (default: 10)') 16 | parser.add_argument('--no-cuda', action='store_true', default=False, 17 | help='enables CUDA training') 18 | parser.add_argument('--seed', type=int, default=1, metavar='S', 19 | help='random seed (default: 1)') 20 | parser.add_argument('--log-interval', type=int, default=10, metavar='N', 21 | help='how many batches to wait before logging training status') 22 | args = parser.parse_args() 23 | args.cuda = not args.no_cuda and torch.cuda.is_available() 24 | 25 | torch.manual_seed(args.seed) 26 | 27 | device = torch.device("cuda" if args.cuda else "cpu") 28 | 29 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} 30 | train_loader = torch.utils.data.DataLoader( 31 | datasets.MNIST('../data', train=True, download=True, 32 | transform=transforms.ToTensor()), 33 | batch_size=args.batch_size, shuffle=True, **kwargs) 34 | test_loader = torch.utils.data.DataLoader( 35 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()), 36 | batch_size=args.batch_size, shuffle=True, **kwargs) 37 | 38 | 39 | class VAE(nn.Module): 40 | def __init__(self): 41 | super(VAE, self).__init__() 42 | 43 | self.fc1 = nn.Linear(784, 400) 44 | self.fc21 = nn.Linear(400, 20) 45 | self.fc22 = nn.Linear(400, 20) 46 | self.fc3 = nn.Linear(20, 400) 47 | self.fc4 = nn.Linear(400, 784) 48 | 49 | def encode(self, x): 50 | h1 = F.relu(self.fc1(x)) 51 | return self.fc21(h1), self.fc22(h1) 52 | 53 | def reparameterize(self, mu, logvar): 54 | std = torch.exp(0.5*logvar) 55 | eps = torch.randn_like(std) 56 | return eps.mul(std).add_(mu) 57 | 58 | def decode(self, z): 59 | h3 = F.relu(self.fc3(z)) 60 | return torch.sigmoid(self.fc4(h3)) 61 | 62 | def forward(self, x): 63 | mu, logvar = self.encode(x.view(-1, 784)) 64 | z = self.reparameterize(mu, logvar) 65 | return self.decode(z), mu, logvar 66 | 67 | 68 | model = VAE().to(device) 69 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 70 | 71 | 72 | # Reconstruction + KL divergence losses summed over all elements and batch 73 | def loss_function(recon_x, x, mu, logvar): 74 | BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), reduction='sum') 75 | 76 | # see Appendix B from VAE paper: 77 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 78 | # https://arxiv.org/abs/1312.6114 79 | # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 80 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) 81 | 82 | return BCE + KLD 83 | 84 | 85 | def train(epoch): 86 | model.train() 87 | train_loss = 0 88 | for batch_idx, (data, _) in enumerate(train_loader): 89 | data = data.to(device) 90 | optimizer.zero_grad() 91 | recon_batch, mu, logvar = model(data) 92 | loss = loss_function(recon_batch, data, mu, logvar) 93 | loss.backward() 94 | train_loss += loss.item() 95 | optimizer.step() 96 | if batch_idx % args.log_interval == 0: 97 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 98 | epoch, batch_idx * len(data), len(train_loader.dataset), 99 | 100. * batch_idx / len(train_loader), 100 | loss.item() / len(data))) 101 | 102 | print('====> Epoch: {} Average loss: {:.4f}'.format( 103 | epoch, train_loss / len(train_loader.dataset))) 104 | 105 | 106 | def test(epoch): 107 | model.eval() 108 | test_loss = 0 109 | with torch.no_grad(): 110 | for i, (data, _) in enumerate(test_loader): 111 | data = data.to(device) 112 | recon_batch, mu, logvar = model(data) 113 | test_loss += loss_function(recon_batch, data, mu, logvar).item() 114 | if i == 0: 115 | n = min(data.size(0), 8) 116 | comparison = torch.cat([data[:n], 117 | recon_batch.view(args.batch_size, 1, 28, 28)[:n]]) 118 | save_image(comparison.cpu(), 119 | 'results/reconstruction_' + str(epoch) + '.png', nrow=n) 120 | 121 | test_loss /= len(test_loader.dataset) 122 | print('====> Test set loss: {:.4f}'.format(test_loss)) 123 | 124 | if __name__ == "__main__": 125 | for epoch in range(1, args.epochs + 1): 126 | train(epoch) 127 | test(epoch) 128 | with torch.no_grad(): 129 | sample = torch.randn(64, 20).to(device) 130 | sample = model.decode(sample).cpu() 131 | save_image(sample.view(64, 1, 28, 28), 132 | 'results/sample_' + str(epoch) + '.png') 133 | -------------------------------------------------------------------------------- /snli/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import glob 4 | 5 | import torch 6 | import torch.optim as O 7 | import torch.nn as nn 8 | 9 | from torchtext import data 10 | from torchtext import datasets 11 | 12 | from model import SNLIClassifier 13 | from util import get_args, makedirs 14 | 15 | 16 | args = get_args() 17 | torch.cuda.set_device(args.gpu) 18 | device = torch.device('cuda:{}'.format(args.gpu)) 19 | 20 | inputs = data.Field(lower=args.lower, tokenize='spacy') 21 | answers = data.Field(sequential=False) 22 | 23 | train, dev, test = datasets.SNLI.splits(inputs, answers) 24 | 25 | inputs.build_vocab(train, dev, test) 26 | if args.word_vectors: 27 | if os.path.isfile(args.vector_cache): 28 | inputs.vocab.vectors = torch.load(args.vector_cache) 29 | else: 30 | inputs.vocab.load_vectors(args.word_vectors) 31 | makedirs(os.path.dirname(args.vector_cache)) 32 | torch.save(inputs.vocab.vectors, args.vector_cache) 33 | answers.build_vocab(train) 34 | 35 | train_iter, dev_iter, test_iter = data.BucketIterator.splits( 36 | (train, dev, test), batch_size=args.batch_size, device=device) 37 | 38 | config = args 39 | config.n_embed = len(inputs.vocab) 40 | config.d_out = len(answers.vocab) 41 | config.n_cells = config.n_layers 42 | 43 | # double the number of cells for bidirectional networks 44 | if config.birnn: 45 | config.n_cells *= 2 46 | 47 | if args.resume_snapshot: 48 | model = torch.load(args.resume_snapshot, map_location=device) 49 | else: 50 | model = SNLIClassifier(config) 51 | if args.word_vectors: 52 | model.embed.weight.data.copy_(inputs.vocab.vectors) 53 | model.to(device) 54 | 55 | criterion = nn.CrossEntropyLoss() 56 | opt = O.Adam(model.parameters(), lr=args.lr) 57 | 58 | iterations = 0 59 | start = time.time() 60 | best_dev_acc = -1 61 | train_iter.repeat = False 62 | header = ' Time Epoch Iteration Progress (%Epoch) Loss Dev/Loss Accuracy Dev/Accuracy' 63 | dev_log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:8.6f},{:12.4f},{:12.4f}'.split(',')) 64 | log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{},{:12.4f},{}'.split(',')) 65 | makedirs(args.save_path) 66 | print(header) 67 | 68 | for epoch in range(args.epochs): 69 | train_iter.init_epoch() 70 | n_correct, n_total = 0, 0 71 | for batch_idx, batch in enumerate(train_iter): 72 | 73 | # switch model to training mode, clear gradient accumulators 74 | model.train(); opt.zero_grad() 75 | 76 | iterations += 1 77 | 78 | # forward pass 79 | answer = model(batch) 80 | 81 | # calculate accuracy of predictions in the current batch 82 | n_correct += (torch.max(answer, 1)[1].view(batch.label.size()) == batch.label).sum().item() 83 | n_total += batch.batch_size 84 | train_acc = 100. * n_correct/n_total 85 | 86 | # calculate loss of the network output with respect to training labels 87 | loss = criterion(answer, batch.label) 88 | 89 | # backpropagate and update optimizer learning rate 90 | loss.backward(); opt.step() 91 | 92 | # checkpoint model periodically 93 | if iterations % args.save_every == 0: 94 | snapshot_prefix = os.path.join(args.save_path, 'snapshot') 95 | snapshot_path = snapshot_prefix + '_acc_{:.4f}_loss_{:.6f}_iter_{}_model.pt'.format(train_acc, loss.item(), iterations) 96 | torch.save(model, snapshot_path) 97 | for f in glob.glob(snapshot_prefix + '*'): 98 | if f != snapshot_path: 99 | os.remove(f) 100 | 101 | # evaluate performance on validation set periodically 102 | if iterations % args.dev_every == 0: 103 | 104 | # switch model to evaluation mode 105 | model.eval(); dev_iter.init_epoch() 106 | 107 | # calculate accuracy on validation set 108 | n_dev_correct, dev_loss = 0, 0 109 | with torch.no_grad(): 110 | for dev_batch_idx, dev_batch in enumerate(dev_iter): 111 | answer = model(dev_batch) 112 | n_dev_correct += (torch.max(answer, 1)[1].view(dev_batch.label.size()) == dev_batch.label).sum().item() 113 | dev_loss = criterion(answer, dev_batch.label) 114 | dev_acc = 100. * n_dev_correct / len(dev) 115 | 116 | print(dev_log_template.format(time.time()-start, 117 | epoch, iterations, 1+batch_idx, len(train_iter), 118 | 100. * (1+batch_idx) / len(train_iter), loss.item(), dev_loss.item(), train_acc, dev_acc)) 119 | 120 | # update best valiation set accuracy 121 | if dev_acc > best_dev_acc: 122 | 123 | # found a model with better validation set accuracy 124 | 125 | best_dev_acc = dev_acc 126 | snapshot_prefix = os.path.join(args.save_path, 'best_snapshot') 127 | snapshot_path = snapshot_prefix + '_devacc_{}_devloss_{}__iter_{}_model.pt'.format(dev_acc, dev_loss.item(), iterations) 128 | 129 | # save model, delete previous 'best_snapshot' files 130 | torch.save(model, snapshot_path) 131 | for f in glob.glob(snapshot_prefix + '*'): 132 | if f != snapshot_path: 133 | os.remove(f) 134 | 135 | elif iterations % args.log_every == 0: 136 | 137 | # print progress message 138 | print(log_template.format(time.time()-start, 139 | epoch, iterations, 1+batch_idx, len(train_iter), 140 | 100. * (1+batch_idx) / len(train_iter), loss.item(), ' '*8, n_correct/n_total*100, ' '*12)) 141 | 142 | 143 | -------------------------------------------------------------------------------- /word_language_model/main.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import argparse 3 | import time 4 | import math 5 | import os 6 | import torch 7 | import torch.nn as nn 8 | import torch.onnx 9 | 10 | import data 11 | import model 12 | 13 | parser = argparse.ArgumentParser(description='PyTorch Wikitext-2 RNN/LSTM Language Model') 14 | parser.add_argument('--data', type=str, default='./data/wikitext-2', 15 | help='location of the data corpus') 16 | parser.add_argument('--model', type=str, default='LSTM', 17 | help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)') 18 | parser.add_argument('--emsize', type=int, default=200, 19 | help='size of word embeddings') 20 | parser.add_argument('--nhid', type=int, default=200, 21 | help='number of hidden units per layer') 22 | parser.add_argument('--nlayers', type=int, default=2, 23 | help='number of layers') 24 | parser.add_argument('--lr', type=float, default=20, 25 | help='initial learning rate') 26 | parser.add_argument('--clip', type=float, default=0.25, 27 | help='gradient clipping') 28 | parser.add_argument('--epochs', type=int, default=40, 29 | help='upper epoch limit') 30 | parser.add_argument('--batch_size', type=int, default=20, metavar='N', 31 | help='batch size') 32 | parser.add_argument('--bptt', type=int, default=35, 33 | help='sequence length') 34 | parser.add_argument('--dropout', type=float, default=0.2, 35 | help='dropout applied to layers (0 = no dropout)') 36 | parser.add_argument('--tied', action='store_true', 37 | help='tie the word embedding and softmax weights') 38 | parser.add_argument('--seed', type=int, default=1111, 39 | help='random seed') 40 | parser.add_argument('--cuda', action='store_true', 41 | help='use CUDA') 42 | parser.add_argument('--log-interval', type=int, default=200, metavar='N', 43 | help='report interval') 44 | parser.add_argument('--save', type=str, default='model.pt', 45 | help='path to save the final model') 46 | parser.add_argument('--onnx-export', type=str, default='', 47 | help='path to export the final model in onnx format') 48 | args = parser.parse_args() 49 | 50 | # Set the random seed manually for reproducibility. 51 | torch.manual_seed(args.seed) 52 | if torch.cuda.is_available(): 53 | if not args.cuda: 54 | print("WARNING: You have a CUDA device, so you should probably run with --cuda") 55 | 56 | device = torch.device("cuda" if args.cuda else "cpu") 57 | 58 | ############################################################################### 59 | # Load data 60 | ############################################################################### 61 | 62 | corpus = data.Corpus(args.data) 63 | 64 | print("Number of tokens:") 65 | print("Train: ", len(corpus.train)) 66 | print("Valid: ", len(corpus.valid)) 67 | print("Test: ", len(corpus.test)) 68 | 69 | # Starting from sequential data, batchify arranges the dataset into columns. 70 | # For instance, with the alphabet as the sequence and batch size 4, we'd get 71 | # ┌ a g m s ┐ 72 | # │ b h n t │ 73 | # │ c i o u │ 74 | # │ d j p v │ 75 | # │ e k q w │ 76 | # └ f l r x ┘. 77 | # These columns are treated as independent by the model, which means that the 78 | # dependence of e. g. 'g' on 'f' can not be learned, but allows more efficient 79 | # batch processing. 80 | 81 | def batchify(data, bsz): 82 | # Work out how cleanly we can divide the dataset into bsz parts. 83 | nbatch = data.size(0) // bsz 84 | # Trim off any extra elements that wouldn't cleanly fit (remainders). 85 | data = data.narrow(0, 0, nbatch * bsz) 86 | # Evenly divide the data across the bsz batches. 87 | data = data.view(bsz, -1).t().contiguous() 88 | return data.to(device) 89 | 90 | eval_batch_size = 10 91 | train_data = batchify(corpus.train, args.batch_size) 92 | val_data = batchify(corpus.valid, eval_batch_size) 93 | test_data = batchify(corpus.test, eval_batch_size) 94 | 95 | ############################################################################### 96 | # Build the model 97 | ############################################################################### 98 | 99 | ntokens = len(corpus.dictionary) 100 | model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied).to(device) 101 | 102 | criterion = nn.CrossEntropyLoss() 103 | 104 | ############################################################################### 105 | # Training code 106 | ############################################################################### 107 | 108 | def repackage_hidden(h): 109 | """Wraps hidden states in new Tensors, to detach them from their history.""" 110 | if isinstance(h, torch.Tensor): 111 | return h.detach() 112 | else: 113 | return tuple(repackage_hidden(v) for v in h) 114 | 115 | 116 | # get_batch subdivides the source data into chunks of length args.bptt. 117 | # If source is equal to the example output of the batchify function, with 118 | # a bptt-limit of 2, we'd get the following two Variables for i = 0: 119 | # ┌ a g m s ┐ ┌ b h n t ┐ 120 | # └ b h n t ┘ └ c i o u ┘ 121 | # Note that despite the name of the function, the subdivison of data is not 122 | # done along the batch dimension (i.e. dimension 1), since that was handled 123 | # by the batchify function. The chunks are along dimension 0, corresponding 124 | # to the seq_len dimension in the LSTM. 125 | 126 | def get_batch(source, i): 127 | seq_len = min(args.bptt, len(source) - 1 - i) 128 | data = source[i:i+seq_len] 129 | target = source[i+1:i+1+seq_len].view(-1) 130 | return data, target 131 | 132 | 133 | def evaluate(data_source): 134 | # Turn on evaluation mode which disables dropout. 135 | model.eval() 136 | total_loss = 0. 137 | ntokens = len(corpus.dictionary) 138 | hidden = model.init_hidden(eval_batch_size) 139 | with torch.no_grad(): 140 | for i in range(0, data_source.size(0) - 1, args.bptt): 141 | data, targets = get_batch(data_source, i) 142 | output, hidden = model(data, hidden) 143 | output_flat = output.view(-1, ntokens) 144 | total_loss += len(data) * criterion(output_flat, targets).item() 145 | hidden = repackage_hidden(hidden) 146 | return total_loss / len(data_source) 147 | 148 | 149 | def train(): 150 | # Turn on training mode which enables dropout. 151 | model.train() 152 | total_loss = 0. 153 | start_time = time.time() 154 | ntokens = len(corpus.dictionary) 155 | hidden = model.init_hidden(args.batch_size) 156 | for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): 157 | data, targets = get_batch(train_data, i) 158 | # Starting each batch, we detach the hidden state from how it was previously produced. 159 | # If we didn't, the model would try backpropagating all the way to start of the dataset. 160 | hidden = repackage_hidden(hidden) 161 | model.zero_grad() 162 | output, hidden = model(data, hidden) 163 | loss = criterion(output.view(-1, ntokens), targets) 164 | loss.backward() 165 | 166 | # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. 167 | torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) 168 | for p in model.parameters(): 169 | p.data.add_(-lr, p.grad.data) 170 | 171 | total_loss += loss.item() 172 | 173 | if batch % args.log_interval == 0 and batch > 0: 174 | cur_loss = total_loss / args.log_interval 175 | elapsed = time.time() - start_time 176 | print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 177 | 'loss {:5.2f} | ppl {:8.2f}'.format( 178 | epoch, batch, len(train_data) // args.bptt, lr, 179 | elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) 180 | total_loss = 0 181 | start_time = time.time() 182 | 183 | 184 | def export_onnx(path, batch_size, seq_len): 185 | print('The model is also exported in ONNX format at {}'. 186 | format(os.path.realpath(args.onnx_export))) 187 | model.eval() 188 | dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(-1, batch_size).to(device) 189 | hidden = model.init_hidden(batch_size) 190 | torch.onnx.export(model, (dummy_input, hidden), path) 191 | 192 | 193 | # Loop over epochs. 194 | lr = args.lr 195 | best_val_loss = None 196 | 197 | # At any point you can hit Ctrl + C to break out of training early. 198 | try: 199 | for epoch in range(1, args.epochs+1): 200 | epoch_start_time = time.time() 201 | train() 202 | val_loss = evaluate(val_data) 203 | print('-' * 89) 204 | print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 205 | 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), 206 | val_loss, math.exp(val_loss))) 207 | print('-' * 89) 208 | # Save the model if the validation loss is the best we've seen so far. 209 | if not best_val_loss or val_loss < best_val_loss: 210 | with open(args.save, 'wb') as f: 211 | torch.save(model, f) 212 | best_val_loss = val_loss 213 | else: 214 | # Anneal the learning rate if no improvement has been seen in the validation dataset. 215 | lr /= 4.0 216 | except KeyboardInterrupt: 217 | print('-' * 89) 218 | print('Exiting from training early') 219 | 220 | # Load the best saved model. 221 | with open(args.save, 'rb') as f: 222 | model = torch.load(f) 223 | # after load the rnn params are not a continuous chunk of memory 224 | # this makes them a continuous chunk, and will speed up forward pass 225 | model.rnn.flatten_parameters() 226 | 227 | # Run on test data. 228 | test_loss = evaluate(test_data) 229 | print('=' * 89) 230 | print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( 231 | test_loss, math.exp(test_loss))) 232 | print('=' * 89) 233 | 234 | if len(args.onnx_export) > 0: 235 | # Export the model in ONNX format. 236 | export_onnx(args.onnx_export, batch_size=1, seq_len=args.bptt) 237 | -------------------------------------------------------------------------------- /dcgan/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import os 4 | import random 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.parallel 8 | import torch.backends.cudnn as cudnn 9 | import torch.optim as optim 10 | import torch.utils.data 11 | import torchvision.datasets as dset 12 | import torchvision.transforms as transforms 13 | import torchvision.utils as vutils 14 | 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('--dataset', required=True, help='cifar10 | lsun | imagenet | folder | lfw | fake') 18 | parser.add_argument('--dataroot', required=True, help='path to dataset') 19 | parser.add_argument('--workers', type=int, help='number of data loading workers', default=2) 20 | parser.add_argument('--batchSize', type=int, default=64, help='input batch size') 21 | parser.add_argument('--imageSize', type=int, default=64, help='the height / width of the input image to network') 22 | parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector') 23 | parser.add_argument('--ngf', type=int, default=64) 24 | parser.add_argument('--ndf', type=int, default=64) 25 | parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for') 26 | parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002') 27 | parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5') 28 | parser.add_argument('--cuda', action='store_true', help='enables cuda') 29 | parser.add_argument('--ngpu', type=int, default=1, help='number of GPUs to use') 30 | parser.add_argument('--netG', default='', help="path to netG (to continue training)") 31 | parser.add_argument('--netD', default='', help="path to netD (to continue training)") 32 | parser.add_argument('--outf', default='.', help='folder to output images and model checkpoints') 33 | parser.add_argument('--manualSeed', type=int, help='manual seed') 34 | 35 | opt = parser.parse_args() 36 | print(opt) 37 | 38 | try: 39 | os.makedirs(opt.outf) 40 | except OSError: 41 | pass 42 | 43 | if opt.manualSeed is None: 44 | opt.manualSeed = random.randint(1, 10000) 45 | print("Random Seed: ", opt.manualSeed) 46 | random.seed(opt.manualSeed) 47 | torch.manual_seed(opt.manualSeed) 48 | 49 | cudnn.benchmark = True 50 | 51 | if torch.cuda.is_available() and not opt.cuda: 52 | print("WARNING: You have a CUDA device, so you should probably run with --cuda") 53 | 54 | if opt.dataset in ['imagenet', 'folder', 'lfw']: 55 | # folder dataset 56 | dataset = dset.ImageFolder(root=opt.dataroot, 57 | transform=transforms.Compose([ 58 | transforms.Resize(opt.imageSize), 59 | transforms.CenterCrop(opt.imageSize), 60 | transforms.ToTensor(), 61 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 62 | ])) 63 | elif opt.dataset == 'lsun': 64 | dataset = dset.LSUN(root=opt.dataroot, classes=['bedroom_train'], 65 | transform=transforms.Compose([ 66 | transforms.Resize(opt.imageSize), 67 | transforms.CenterCrop(opt.imageSize), 68 | transforms.ToTensor(), 69 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 70 | ])) 71 | elif opt.dataset == 'cifar10': 72 | dataset = dset.CIFAR10(root=opt.dataroot, download=True, 73 | transform=transforms.Compose([ 74 | transforms.Resize(opt.imageSize), 75 | transforms.ToTensor(), 76 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 77 | ])) 78 | elif opt.dataset == 'fake': 79 | dataset = dset.FakeData(image_size=(3, opt.imageSize, opt.imageSize), 80 | transform=transforms.ToTensor()) 81 | assert dataset 82 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize, 83 | shuffle=True, num_workers=int(opt.workers)) 84 | 85 | device = torch.device("cuda:0" if opt.cuda else "cpu") 86 | ngpu = int(opt.ngpu) 87 | nz = int(opt.nz) 88 | ngf = int(opt.ngf) 89 | ndf = int(opt.ndf) 90 | nc = 3 91 | 92 | 93 | # custom weights initialization called on netG and netD 94 | def weights_init(m): 95 | classname = m.__class__.__name__ 96 | if classname.find('Conv') != -1: 97 | m.weight.data.normal_(0.0, 0.02) 98 | elif classname.find('BatchNorm') != -1: 99 | m.weight.data.normal_(1.0, 0.02) 100 | m.bias.data.fill_(0) 101 | 102 | 103 | class Generator(nn.Module): 104 | def __init__(self, ngpu): 105 | super(Generator, self).__init__() 106 | self.ngpu = ngpu 107 | self.main = nn.Sequential( 108 | # input is Z, going into a convolution 109 | nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False), 110 | nn.BatchNorm2d(ngf * 8), 111 | nn.ReLU(True), 112 | # state size. (ngf*8) x 4 x 4 113 | nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), 114 | nn.BatchNorm2d(ngf * 4), 115 | nn.ReLU(True), 116 | # state size. (ngf*4) x 8 x 8 117 | nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False), 118 | nn.BatchNorm2d(ngf * 2), 119 | nn.ReLU(True), 120 | # state size. (ngf*2) x 16 x 16 121 | nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False), 122 | nn.BatchNorm2d(ngf), 123 | nn.ReLU(True), 124 | # state size. (ngf) x 32 x 32 125 | nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False), 126 | nn.Tanh() 127 | # state size. (nc) x 64 x 64 128 | ) 129 | 130 | def forward(self, input): 131 | if input.is_cuda and self.ngpu > 1: 132 | output = nn.parallel.data_parallel(self.main, input, range(self.ngpu)) 133 | else: 134 | output = self.main(input) 135 | return output 136 | 137 | 138 | netG = Generator(ngpu).to(device) 139 | netG.apply(weights_init) 140 | if opt.netG != '': 141 | netG.load_state_dict(torch.load(opt.netG)) 142 | print(netG) 143 | 144 | 145 | class Discriminator(nn.Module): 146 | def __init__(self, ngpu): 147 | super(Discriminator, self).__init__() 148 | self.ngpu = ngpu 149 | self.main = nn.Sequential( 150 | # input is (nc) x 64 x 64 151 | nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), 152 | nn.LeakyReLU(0.2, inplace=True), 153 | # state size. (ndf) x 32 x 32 154 | nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), 155 | nn.BatchNorm2d(ndf * 2), 156 | nn.LeakyReLU(0.2, inplace=True), 157 | # state size. (ndf*2) x 16 x 16 158 | nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), 159 | nn.BatchNorm2d(ndf * 4), 160 | nn.LeakyReLU(0.2, inplace=True), 161 | # state size. (ndf*4) x 8 x 8 162 | nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), 163 | nn.BatchNorm2d(ndf * 8), 164 | nn.LeakyReLU(0.2, inplace=True), 165 | # state size. (ndf*8) x 4 x 4 166 | nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), 167 | nn.Sigmoid() 168 | ) 169 | 170 | def forward(self, input): 171 | if input.is_cuda and self.ngpu > 1: 172 | output = nn.parallel.data_parallel(self.main, input, range(self.ngpu)) 173 | else: 174 | output = self.main(input) 175 | 176 | return output.view(-1, 1).squeeze(1) 177 | 178 | 179 | netD = Discriminator(ngpu).to(device) 180 | netD.apply(weights_init) 181 | if opt.netD != '': 182 | netD.load_state_dict(torch.load(opt.netD)) 183 | print(netD) 184 | 185 | criterion = nn.BCELoss() 186 | 187 | fixed_noise = torch.randn(opt.batchSize, nz, 1, 1, device=device) 188 | real_label = 1 189 | fake_label = 0 190 | 191 | # setup optimizer 192 | optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) 193 | optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) 194 | 195 | for epoch in range(opt.niter): 196 | for i, data in enumerate(dataloader, 0): 197 | ############################ 198 | # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) 199 | ########################### 200 | # train with real 201 | netD.zero_grad() 202 | real_cpu = data[0].to(device) 203 | batch_size = real_cpu.size(0) 204 | label = torch.full((batch_size,), real_label, device=device) 205 | 206 | output = netD(real_cpu) 207 | errD_real = criterion(output, label) 208 | errD_real.backward() 209 | D_x = output.mean().item() 210 | 211 | # train with fake 212 | noise = torch.randn(batch_size, nz, 1, 1, device=device) 213 | fake = netG(noise) 214 | label.fill_(fake_label) 215 | output = netD(fake.detach()) 216 | errD_fake = criterion(output, label) 217 | errD_fake.backward() 218 | D_G_z1 = output.mean().item() 219 | errD = errD_real + errD_fake 220 | optimizerD.step() 221 | 222 | ############################ 223 | # (2) Update G network: maximize log(D(G(z))) 224 | ########################### 225 | netG.zero_grad() 226 | label.fill_(real_label) # fake labels are real for generator cost 227 | output = netD(fake) 228 | errG = criterion(output, label) 229 | errG.backward() 230 | D_G_z2 = output.mean().item() 231 | optimizerG.step() 232 | 233 | print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f' 234 | % (epoch, opt.niter, i, len(dataloader), 235 | errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) 236 | if i % 100 == 0: 237 | vutils.save_image(real_cpu, 238 | '%s/real_samples.png' % opt.outf, 239 | normalize=True) 240 | fake = netG(fixed_noise) 241 | vutils.save_image(fake.detach(), 242 | '%s/fake_samples_epoch_%03d.png' % (opt.outf, epoch), 243 | normalize=True) 244 | 245 | # do checkpointing 246 | torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (opt.outf, epoch)) 247 | torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (opt.outf, epoch)) 248 | -------------------------------------------------------------------------------- /fast_neural_style/neural_style/neural_style.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | import time 5 | import re 6 | 7 | import numpy as np 8 | import torch 9 | from torch.optim import Adam 10 | from torch.utils.data import DataLoader 11 | from torchvision import datasets 12 | from torchvision import transforms 13 | import torch.onnx 14 | 15 | import utils 16 | from transformer_net import TransformerNet 17 | from vgg import Vgg16 18 | 19 | 20 | def check_paths(args): 21 | try: 22 | if not os.path.exists(args.save_model_dir): 23 | os.makedirs(args.save_model_dir) 24 | if args.checkpoint_model_dir is not None and not (os.path.exists(args.checkpoint_model_dir)): 25 | os.makedirs(args.checkpoint_model_dir) 26 | except OSError as e: 27 | print(e) 28 | sys.exit(1) 29 | 30 | 31 | def train(args): 32 | device = torch.device("cuda" if args.cuda else "cpu") 33 | 34 | np.random.seed(args.seed) 35 | torch.manual_seed(args.seed) 36 | 37 | transform = transforms.Compose([ 38 | transforms.Resize(args.image_size), 39 | transforms.CenterCrop(args.image_size), 40 | transforms.ToTensor(), 41 | transforms.Lambda(lambda x: x.mul(255)) 42 | ]) 43 | train_dataset = datasets.ImageFolder(args.dataset, transform) 44 | train_loader = DataLoader(train_dataset, batch_size=args.batch_size) 45 | 46 | transformer = TransformerNet().to(device) 47 | optimizer = Adam(transformer.parameters(), args.lr) 48 | mse_loss = torch.nn.MSELoss() 49 | 50 | vgg = Vgg16(requires_grad=False).to(device) 51 | style_transform = transforms.Compose([ 52 | transforms.ToTensor(), 53 | transforms.Lambda(lambda x: x.mul(255)) 54 | ]) 55 | style = utils.load_image(args.style_image, size=args.style_size) 56 | style = style_transform(style) 57 | style = style.repeat(args.batch_size, 1, 1, 1).to(device) 58 | 59 | features_style = vgg(utils.normalize_batch(style)) 60 | gram_style = [utils.gram_matrix(y) for y in features_style] 61 | 62 | for e in range(args.epochs): 63 | transformer.train() 64 | agg_content_loss = 0. 65 | agg_style_loss = 0. 66 | count = 0 67 | for batch_id, (x, _) in enumerate(train_loader): 68 | n_batch = len(x) 69 | count += n_batch 70 | optimizer.zero_grad() 71 | 72 | x = x.to(device) 73 | y = transformer(x) 74 | 75 | y = utils.normalize_batch(y) 76 | x = utils.normalize_batch(x) 77 | 78 | features_y = vgg(y) 79 | features_x = vgg(x) 80 | 81 | content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2) 82 | 83 | style_loss = 0. 84 | for ft_y, gm_s in zip(features_y, gram_style): 85 | gm_y = utils.gram_matrix(ft_y) 86 | style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) 87 | style_loss *= args.style_weight 88 | 89 | total_loss = content_loss + style_loss 90 | total_loss.backward() 91 | optimizer.step() 92 | 93 | agg_content_loss += content_loss.item() 94 | agg_style_loss += style_loss.item() 95 | 96 | if (batch_id + 1) % args.log_interval == 0: 97 | mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( 98 | time.ctime(), e + 1, count, len(train_dataset), 99 | agg_content_loss / (batch_id + 1), 100 | agg_style_loss / (batch_id + 1), 101 | (agg_content_loss + agg_style_loss) / (batch_id + 1) 102 | ) 103 | print(mesg) 104 | 105 | if args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0: 106 | transformer.eval().cpu() 107 | ckpt_model_filename = "ckpt_epoch_" + str(e) + "_batch_id_" + str(batch_id + 1) + ".pth" 108 | ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) 109 | torch.save(transformer.state_dict(), ckpt_model_path) 110 | transformer.to(device).train() 111 | 112 | # save model 113 | transformer.eval().cpu() 114 | save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( 115 | args.content_weight) + "_" + str(args.style_weight) + ".model" 116 | save_model_path = os.path.join(args.save_model_dir, save_model_filename) 117 | torch.save(transformer.state_dict(), save_model_path) 118 | 119 | print("\nDone, trained model saved at", save_model_path) 120 | 121 | 122 | def stylize(args): 123 | device = torch.device("cuda" if args.cuda else "cpu") 124 | 125 | content_image = utils.load_image(args.content_image, scale=args.content_scale) 126 | content_transform = transforms.Compose([ 127 | transforms.ToTensor(), 128 | transforms.Lambda(lambda x: x.mul(255)) 129 | ]) 130 | content_image = content_transform(content_image) 131 | content_image = content_image.unsqueeze(0).to(device) 132 | 133 | if args.model.endswith(".onnx"): 134 | output = stylize_onnx_caffe2(content_image, args) 135 | else: 136 | with torch.no_grad(): 137 | style_model = TransformerNet() 138 | state_dict = torch.load(args.model) 139 | # remove saved deprecated running_* keys in InstanceNorm from the checkpoint 140 | for k in list(state_dict.keys()): 141 | if re.search(r'in\d+\.running_(mean|var)$', k): 142 | del state_dict[k] 143 | style_model.load_state_dict(state_dict) 144 | style_model.to(device) 145 | if args.export_onnx: 146 | assert args.export_onnx.endswith(".onnx"), "Export model file should end with .onnx" 147 | output = torch.onnx._export(style_model, content_image, args.export_onnx).cpu() 148 | else: 149 | output = style_model(content_image).cpu() 150 | utils.save_image(args.output_image, output[0]) 151 | 152 | 153 | def stylize_onnx_caffe2(content_image, args): 154 | """ 155 | Read ONNX model and run it using Caffe2 156 | """ 157 | 158 | assert not args.export_onnx 159 | 160 | import onnx 161 | import onnx_caffe2.backend 162 | 163 | model = onnx.load(args.model) 164 | 165 | prepared_backend = onnx_caffe2.backend.prepare(model, device='CUDA' if args.cuda else 'CPU') 166 | inp = {model.graph.input[0].name: content_image.numpy()} 167 | c2_out = prepared_backend.run(inp)[0] 168 | 169 | return torch.from_numpy(c2_out) 170 | 171 | 172 | def main(): 173 | main_arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style") 174 | subparsers = main_arg_parser.add_subparsers(title="subcommands", dest="subcommand") 175 | 176 | train_arg_parser = subparsers.add_parser("train", help="parser for training arguments") 177 | train_arg_parser.add_argument("--epochs", type=int, default=2, 178 | help="number of training epochs, default is 2") 179 | train_arg_parser.add_argument("--batch-size", type=int, default=4, 180 | help="batch size for training, default is 4") 181 | train_arg_parser.add_argument("--dataset", type=str, required=True, 182 | help="path to training dataset, the path should point to a folder " 183 | "containing another folder with all the training images") 184 | train_arg_parser.add_argument("--style-image", type=str, default="images/style-images/mosaic.jpg", 185 | help="path to style-image") 186 | train_arg_parser.add_argument("--save-model-dir", type=str, required=True, 187 | help="path to folder where trained model will be saved.") 188 | train_arg_parser.add_argument("--checkpoint-model-dir", type=str, default=None, 189 | help="path to folder where checkpoints of trained models will be saved") 190 | train_arg_parser.add_argument("--image-size", type=int, default=256, 191 | help="size of training images, default is 256 X 256") 192 | train_arg_parser.add_argument("--style-size", type=int, default=None, 193 | help="size of style-image, default is the original size of style image") 194 | train_arg_parser.add_argument("--cuda", type=int, required=True, 195 | help="set it to 1 for running on GPU, 0 for CPU") 196 | train_arg_parser.add_argument("--seed", type=int, default=42, 197 | help="random seed for training") 198 | train_arg_parser.add_argument("--content-weight", type=float, default=1e5, 199 | help="weight for content-loss, default is 1e5") 200 | train_arg_parser.add_argument("--style-weight", type=float, default=1e10, 201 | help="weight for style-loss, default is 1e10") 202 | train_arg_parser.add_argument("--lr", type=float, default=1e-3, 203 | help="learning rate, default is 1e-3") 204 | train_arg_parser.add_argument("--log-interval", type=int, default=500, 205 | help="number of images after which the training loss is logged, default is 500") 206 | train_arg_parser.add_argument("--checkpoint-interval", type=int, default=2000, 207 | help="number of batches after which a checkpoint of the trained model will be created") 208 | 209 | eval_arg_parser = subparsers.add_parser("eval", help="parser for evaluation/stylizing arguments") 210 | eval_arg_parser.add_argument("--content-image", type=str, required=True, 211 | help="path to content image you want to stylize") 212 | eval_arg_parser.add_argument("--content-scale", type=float, default=None, 213 | help="factor for scaling down the content image") 214 | eval_arg_parser.add_argument("--output-image", type=str, required=True, 215 | help="path for saving the output image") 216 | eval_arg_parser.add_argument("--model", type=str, required=True, 217 | help="saved model to be used for stylizing the image. If file ends in .pth - PyTorch path is used, if in .onnx - Caffe2 path") 218 | eval_arg_parser.add_argument("--cuda", type=int, required=True, 219 | help="set it to 1 for running on GPU, 0 for CPU") 220 | eval_arg_parser.add_argument("--export_onnx", type=str, 221 | help="export ONNX model to a given file") 222 | 223 | args = main_arg_parser.parse_args() 224 | 225 | if args.subcommand is None: 226 | print("ERROR: specify either train or eval") 227 | sys.exit(1) 228 | if args.cuda and not torch.cuda.is_available(): 229 | print("ERROR: cuda is not available, try running on CPU") 230 | sys.exit(1) 231 | 232 | if args.subcommand == "train": 233 | check_paths(args) 234 | train(args) 235 | else: 236 | stylize(args) 237 | 238 | 239 | if __name__ == "__main__": 240 | main() 241 | -------------------------------------------------------------------------------- /imagenet/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | import shutil 5 | import time 6 | import warnings 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.parallel 11 | import torch.backends.cudnn as cudnn 12 | import torch.distributed as dist 13 | import torch.optim 14 | import torch.utils.data 15 | import torch.utils.data.distributed 16 | import torchvision.transforms as transforms 17 | import torchvision.datasets as datasets 18 | import torchvision.models as models 19 | 20 | model_names = sorted(name for name in models.__dict__ 21 | if name.islower() and not name.startswith("__") 22 | and callable(models.__dict__[name])) 23 | 24 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 25 | parser.add_argument('data', metavar='DIR', 26 | help='path to dataset') 27 | parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18', 28 | choices=model_names, 29 | help='model architecture: ' + 30 | ' | '.join(model_names) + 31 | ' (default: resnet18)') 32 | parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', 33 | help='number of data loading workers (default: 4)') 34 | parser.add_argument('--epochs', default=90, type=int, metavar='N', 35 | help='number of total epochs to run') 36 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', 37 | help='manual epoch number (useful on restarts)') 38 | parser.add_argument('-b', '--batch-size', default=256, type=int, 39 | metavar='N', help='mini-batch size (default: 256)') 40 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, 41 | metavar='LR', help='initial learning rate') 42 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', 43 | help='momentum') 44 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, 45 | metavar='W', help='weight decay (default: 1e-4)') 46 | parser.add_argument('--print-freq', '-p', default=10, type=int, 47 | metavar='N', help='print frequency (default: 10)') 48 | parser.add_argument('--resume', default='', type=str, metavar='PATH', 49 | help='path to latest checkpoint (default: none)') 50 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', 51 | help='evaluate model on validation set') 52 | parser.add_argument('--pretrained', dest='pretrained', action='store_true', 53 | help='use pre-trained model') 54 | parser.add_argument('--world-size', default=1, type=int, 55 | help='number of distributed processes') 56 | parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, 57 | help='url used to set up distributed training') 58 | parser.add_argument('--dist-backend', default='gloo', type=str, 59 | help='distributed backend') 60 | parser.add_argument('--seed', default=None, type=int, 61 | help='seed for initializing training. ') 62 | parser.add_argument('--gpu', default=None, type=int, 63 | help='GPU id to use.') 64 | 65 | best_prec1 = 0 66 | 67 | 68 | def main(): 69 | global args, best_prec1 70 | args = parser.parse_args() 71 | 72 | if args.seed is not None: 73 | random.seed(args.seed) 74 | torch.manual_seed(args.seed) 75 | cudnn.deterministic = True 76 | warnings.warn('You have chosen to seed training. ' 77 | 'This will turn on the CUDNN deterministic setting, ' 78 | 'which can slow down your training considerably! ' 79 | 'You may see unexpected behavior when restarting ' 80 | 'from checkpoints.') 81 | 82 | if args.gpu is not None: 83 | warnings.warn('You have chosen a specific GPU. This will completely ' 84 | 'disable data parallelism.') 85 | 86 | args.distributed = args.world_size > 1 87 | 88 | if args.distributed: 89 | dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 90 | world_size=args.world_size) 91 | 92 | # create model 93 | if args.pretrained: 94 | print("=> using pre-trained model '{}'".format(args.arch)) 95 | model = models.__dict__[args.arch](pretrained=True) 96 | else: 97 | print("=> creating model '{}'".format(args.arch)) 98 | model = models.__dict__[args.arch]() 99 | 100 | if args.gpu is not None: 101 | model = model.cuda(args.gpu) 102 | elif args.distributed: 103 | model.cuda() 104 | model = torch.nn.parallel.DistributedDataParallel(model) 105 | else: 106 | if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): 107 | model.features = torch.nn.DataParallel(model.features) 108 | model.cuda() 109 | else: 110 | model = torch.nn.DataParallel(model).cuda() 111 | 112 | # define loss function (criterion) and optimizer 113 | criterion = nn.CrossEntropyLoss().cuda(args.gpu) 114 | 115 | optimizer = torch.optim.SGD(model.parameters(), args.lr, 116 | momentum=args.momentum, 117 | weight_decay=args.weight_decay) 118 | 119 | # optionally resume from a checkpoint 120 | if args.resume: 121 | if os.path.isfile(args.resume): 122 | print("=> loading checkpoint '{}'".format(args.resume)) 123 | checkpoint = torch.load(args.resume) 124 | args.start_epoch = checkpoint['epoch'] 125 | best_prec1 = checkpoint['best_prec1'] 126 | model.load_state_dict(checkpoint['state_dict']) 127 | optimizer.load_state_dict(checkpoint['optimizer']) 128 | print("=> loaded checkpoint '{}' (epoch {})" 129 | .format(args.resume, checkpoint['epoch'])) 130 | else: 131 | print("=> no checkpoint found at '{}'".format(args.resume)) 132 | 133 | cudnn.benchmark = True 134 | 135 | # Data loading code 136 | traindir = os.path.join(args.data, 'train') 137 | valdir = os.path.join(args.data, 'val') 138 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 139 | std=[0.229, 0.224, 0.225]) 140 | 141 | train_dataset = datasets.ImageFolder( 142 | traindir, 143 | transforms.Compose([ 144 | transforms.RandomResizedCrop(224), 145 | transforms.RandomHorizontalFlip(), 146 | transforms.ToTensor(), 147 | normalize, 148 | ])) 149 | 150 | if args.distributed: 151 | train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) 152 | else: 153 | train_sampler = None 154 | 155 | train_loader = torch.utils.data.DataLoader( 156 | train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), 157 | num_workers=args.workers, pin_memory=True, sampler=train_sampler) 158 | 159 | val_loader = torch.utils.data.DataLoader( 160 | datasets.ImageFolder(valdir, transforms.Compose([ 161 | transforms.Resize(256), 162 | transforms.CenterCrop(224), 163 | transforms.ToTensor(), 164 | normalize, 165 | ])), 166 | batch_size=args.batch_size, shuffle=False, 167 | num_workers=args.workers, pin_memory=True) 168 | 169 | if args.evaluate: 170 | validate(val_loader, model, criterion) 171 | return 172 | 173 | for epoch in range(args.start_epoch, args.epochs): 174 | if args.distributed: 175 | train_sampler.set_epoch(epoch) 176 | adjust_learning_rate(optimizer, epoch) 177 | 178 | # train for one epoch 179 | train(train_loader, model, criterion, optimizer, epoch) 180 | 181 | # evaluate on validation set 182 | prec1 = validate(val_loader, model, criterion) 183 | 184 | # remember best prec@1 and save checkpoint 185 | is_best = prec1 > best_prec1 186 | best_prec1 = max(prec1, best_prec1) 187 | save_checkpoint({ 188 | 'epoch': epoch + 1, 189 | 'arch': args.arch, 190 | 'state_dict': model.state_dict(), 191 | 'best_prec1': best_prec1, 192 | 'optimizer' : optimizer.state_dict(), 193 | }, is_best) 194 | 195 | 196 | def train(train_loader, model, criterion, optimizer, epoch): 197 | batch_time = AverageMeter() 198 | data_time = AverageMeter() 199 | losses = AverageMeter() 200 | top1 = AverageMeter() 201 | top5 = AverageMeter() 202 | 203 | # switch to train mode 204 | model.train() 205 | 206 | end = time.time() 207 | for i, (input, target) in enumerate(train_loader): 208 | # measure data loading time 209 | data_time.update(time.time() - end) 210 | 211 | if args.gpu is not None: 212 | input = input.cuda(args.gpu, non_blocking=True) 213 | target = target.cuda(args.gpu, non_blocking=True) 214 | 215 | # compute output 216 | output = model(input) 217 | loss = criterion(output, target) 218 | 219 | # measure accuracy and record loss 220 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 221 | losses.update(loss.item(), input.size(0)) 222 | top1.update(prec1[0], input.size(0)) 223 | top5.update(prec5[0], input.size(0)) 224 | 225 | # compute gradient and do SGD step 226 | optimizer.zero_grad() 227 | loss.backward() 228 | optimizer.step() 229 | 230 | # measure elapsed time 231 | batch_time.update(time.time() - end) 232 | end = time.time() 233 | 234 | if i % args.print_freq == 0: 235 | print('Epoch: [{0}][{1}/{2}]\t' 236 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 237 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 238 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 239 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 240 | 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( 241 | epoch, i, len(train_loader), batch_time=batch_time, 242 | data_time=data_time, loss=losses, top1=top1, top5=top5)) 243 | 244 | 245 | def validate(val_loader, model, criterion): 246 | batch_time = AverageMeter() 247 | losses = AverageMeter() 248 | top1 = AverageMeter() 249 | top5 = AverageMeter() 250 | 251 | # switch to evaluate mode 252 | model.eval() 253 | 254 | with torch.no_grad(): 255 | end = time.time() 256 | for i, (input, target) in enumerate(val_loader): 257 | if args.gpu is not None: 258 | input = input.cuda(args.gpu, non_blocking=True) 259 | target = target.cuda(args.gpu, non_blocking=True) 260 | 261 | # compute output 262 | output = model(input) 263 | loss = criterion(output, target) 264 | 265 | # measure accuracy and record loss 266 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 267 | losses.update(loss.item(), input.size(0)) 268 | top1.update(prec1[0], input.size(0)) 269 | top5.update(prec5[0], input.size(0)) 270 | 271 | # measure elapsed time 272 | batch_time.update(time.time() - end) 273 | end = time.time() 274 | 275 | if i % args.print_freq == 0: 276 | print('Test: [{0}/{1}]\t' 277 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 278 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 279 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 280 | 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( 281 | i, len(val_loader), batch_time=batch_time, loss=losses, 282 | top1=top1, top5=top5)) 283 | 284 | print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}' 285 | .format(top1=top1, top5=top5)) 286 | 287 | return top1.avg 288 | 289 | 290 | def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): 291 | torch.save(state, filename) 292 | if is_best: 293 | shutil.copyfile(filename, 'model_best.pth.tar') 294 | 295 | 296 | class AverageMeter(object): 297 | """Computes and stores the average and current value""" 298 | def __init__(self): 299 | self.reset() 300 | 301 | def reset(self): 302 | self.val = 0 303 | self.avg = 0 304 | self.sum = 0 305 | self.count = 0 306 | 307 | def update(self, val, n=1): 308 | self.val = val 309 | self.sum += val * n 310 | self.count += n 311 | self.avg = self.sum / self.count 312 | 313 | 314 | def adjust_learning_rate(optimizer, epoch): 315 | """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" 316 | lr = args.lr * (0.1 ** (epoch // 30)) 317 | for param_group in optimizer.param_groups: 318 | param_group['lr'] = lr 319 | 320 | 321 | def accuracy(output, target, topk=(1,)): 322 | """Computes the accuracy over the k top predictions for the specified values of k""" 323 | with torch.no_grad(): 324 | maxk = max(topk) 325 | batch_size = target.size(0) 326 | 327 | _, pred = output.topk(maxk, 1, True, True) 328 | pred = pred.t() 329 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 330 | 331 | res = [] 332 | for k in topk: 333 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 334 | res.append(correct_k.mul_(100.0 / batch_size)) 335 | return res 336 | 337 | 338 | if __name__ == '__main__': 339 | main() 340 | -------------------------------------------------------------------------------- /word_language_model/notebooks/02_Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "02_Inference.ipynb", 7 | "version": "0.3.2", 8 | "provenance": [], 9 | "collapsed_sections": [], 10 | "toc_visible": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "metadata": { 20 | "id": "15yLeJZw8ncp", 21 | "colab_type": "text" 22 | }, 23 | "cell_type": "markdown", 24 | "source": [ 25 | "## Prepare the Environment" 26 | ] 27 | }, 28 | { 29 | "metadata": { 30 | "id": "b2uU_hgOtGur", 31 | "colab_type": "code", 32 | "colab": { 33 | "base_uri": "https://localhost:8080/", 34 | "height": 51 35 | }, 36 | "outputId": "3f490e97-3902-4ebe-e1c6-f5166cc8b6a8" 37 | }, 38 | "cell_type": "code", 39 | "source": [ 40 | "!pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html" 41 | ], 42 | "execution_count": 6, 43 | "outputs": [ 44 | { 45 | "output_type": "stream", 46 | "text": [ 47 | "Looking in links: https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html\n", 48 | "Requirement already satisfied: torch_nightly in /usr/local/lib/python3.6/dist-packages (1.0.0.dev20181011)\n" 49 | ], 50 | "name": "stdout" 51 | } 52 | ] 53 | }, 54 | { 55 | "metadata": { 56 | "id": "OSsB1s-p4kPc", 57 | "colab_type": "code", 58 | "colab": { 59 | "base_uri": "https://localhost:8080/", 60 | "height": 34 61 | }, 62 | "outputId": "09363dc9-a617-41aa-f61e-1c44efcae94b" 63 | }, 64 | "cell_type": "code", 65 | "source": [ 66 | "!git clone https://github.com/ceshine/examples.git pytorch_examples" 67 | ], 68 | "execution_count": 7, 69 | "outputs": [ 70 | { 71 | "output_type": "stream", 72 | "text": [ 73 | "fatal: destination path 'pytorch_examples' already exists and is not an empty directory.\n" 74 | ], 75 | "name": "stdout" 76 | } 77 | ] 78 | }, 79 | { 80 | "metadata": { 81 | "id": "vtJtfZas4oS6", 82 | "colab_type": "code", 83 | "colab": { 84 | "base_uri": "https://localhost:8080/", 85 | "height": 85 86 | }, 87 | "outputId": "153c2cd0-cb8f-4de7-c22d-67e3ad706086" 88 | }, 89 | "cell_type": "code", 90 | "source": [ 91 | "%cd pytorch_examples/word_language_model\n", 92 | "%ls" 93 | ], 94 | "execution_count": 8, 95 | "outputs": [ 96 | { 97 | "output_type": "stream", 98 | "text": [ 99 | "/content/pytorch_examples/word_language_model\n", 100 | "\u001b[0m\u001b[01;34mdata\u001b[0m/ lm_model.pt model.py requirements.txt\n", 101 | "data.py main.py \u001b[01;34m__pycache__\u001b[0m/ train_new.log\n", 102 | "generate.py model_new.pt README.md\n" 103 | ], 104 | "name": "stdout" 105 | } 106 | ] 107 | }, 108 | { 109 | "metadata": { 110 | "id": "yAAdydfL6vcs", 111 | "colab_type": "text" 112 | }, 113 | "cell_type": "markdown", 114 | "source": [ 115 | "Upload the trained model (from notebook 01_Training.ipynb):" 116 | ] 117 | }, 118 | { 119 | "metadata": { 120 | "id": "PpgaP3x1icla", 121 | "colab_type": "text" 122 | }, 123 | "cell_type": "markdown", 124 | "source": [ 125 | "" 126 | ] 127 | }, 128 | { 129 | "metadata": { 130 | "id": "9hByUNAB6F6_", 131 | "colab_type": "code", 132 | "colab": { 133 | "resources": { 134 | "http://localhost:8080/nbextensions/google.colab/files.js": { 135 | "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7Ci8vIE1heCBhbW91bnQgb2YgdGltZSB0byBibG9jayB3YWl0aW5nIGZvciB0aGUgdXNlci4KY29uc3QgRklMRV9DSEFOR0VfVElNRU9VVF9NUyA9IDMwICogMTAwMDsKCmZ1bmN0aW9uIF91cGxvYWRGaWxlcyhpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IHN0ZXBzID0gdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKTsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIC8vIENhY2hlIHN0ZXBzIG9uIHRoZSBvdXRwdXRFbGVtZW50IHRvIG1ha2UgaXQgYXZhaWxhYmxlIGZvciB0aGUgbmV4dCBjYWxsCiAgLy8gdG8gdXBsb2FkRmlsZXNDb250aW51ZSBmcm9tIFB5dGhvbi4KICBvdXRwdXRFbGVtZW50LnN0ZXBzID0gc3RlcHM7CgogIHJldHVybiBfdXBsb2FkRmlsZXNDb250aW51ZShvdXRwdXRJZCk7Cn0KCi8vIFRoaXMgaXMgcm91Z2hseSBhbiBhc3luYyBnZW5lcmF0b3IgKG5vdCBzdXBwb3J0ZWQgaW4gdGhlIGJyb3dzZXIgeWV0KSwKLy8gd2hlcmUgdGhlcmUgYXJlIG11bHRpcGxlIGFzeW5jaHJvbm91cyBzdGVwcyBhbmQgdGhlIFB5dGhvbiBzaWRlIGlzIGdvaW5nCi8vIHRvIHBvbGwgZm9yIGNvbXBsZXRpb24gb2YgZWFjaCBzdGVwLgovLyBUaGlzIHVzZXMgYSBQcm9taXNlIHRvIGJsb2NrIHRoZSBweXRob24gc2lkZSBvbiBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcCwKLy8gdGhlbiBwYXNzZXMgdGhlIHJlc3VsdCBvZiB0aGUgcHJldmlvdXMgc3RlcCBhcyB0aGUgaW5wdXQgdG8gdGhlIG5leHQgc3RlcC4KZnVuY3Rpb24gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpIHsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIGNvbnN0IHN0ZXBzID0gb3V0cHV0RWxlbWVudC5zdGVwczsKCiAgY29uc3QgbmV4dCA9IHN0ZXBzLm5leHQob3V0cHV0RWxlbWVudC5sYXN0UHJvbWlzZVZhbHVlKTsKICByZXR1cm4gUHJvbWlzZS5yZXNvbHZlKG5leHQudmFsdWUucHJvbWlzZSkudGhlbigodmFsdWUpID0+IHsKICAgIC8vIENhY2hlIHRoZSBsYXN0IHByb21pc2UgdmFsdWUgdG8gbWFrZSBpdCBhdmFpbGFibGUgdG8gdGhlIG5leHQKICAgIC8vIHN0ZXAgb2YgdGhlIGdlbmVyYXRvci4KICAgIG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSA9IHZhbHVlOwogICAgcmV0dXJuIG5leHQudmFsdWUucmVzcG9uc2U7CiAgfSk7Cn0KCi8qKgogKiBHZW5lcmF0b3IgZnVuY3Rpb24gd2hpY2ggaXMgY2FsbGVkIGJldHdlZW4gZWFjaCBhc3luYyBzdGVwIG9mIHRoZSB1cGxvYWQKICogcHJvY2Vzcy4KICogQHBhcmFtIHtzdHJpbmd9IGlucHV0SWQgRWxlbWVudCBJRCBvZiB0aGUgaW5wdXQgZmlsZSBwaWNrZXIgZWxlbWVudC4KICogQHBhcmFtIHtzdHJpbmd9IG91dHB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIG91dHB1dCBkaXNwbGF5LgogKiBAcmV0dXJuIHshSXRlcmFibGU8IU9iamVjdD59IEl0ZXJhYmxlIG9mIG5leHQgc3RlcHMuCiAqLwpmdW5jdGlvbiogdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKSB7CiAgY29uc3QgaW5wdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoaW5wdXRJZCk7CiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gZmFsc2U7CgogIGNvbnN0IG91dHB1dEVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50QnlJZChvdXRwdXRJZCk7CiAgb3V0cHV0RWxlbWVudC5pbm5lckhUTUwgPSAnJzsKCiAgY29uc3QgcGlja2VkUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBpbnB1dEVsZW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignY2hhbmdlJywgKGUpID0+IHsKICAgICAgcmVzb2x2ZShlLnRhcmdldC5maWxlcyk7CiAgICB9KTsKICB9KTsKCiAgY29uc3QgY2FuY2VsID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnYnV0dG9uJyk7CiAgaW5wdXRFbGVtZW50LnBhcmVudEVsZW1lbnQuYXBwZW5kQ2hpbGQoY2FuY2VsKTsKICBjYW5jZWwudGV4dENvbnRlbnQgPSAnQ2FuY2VsIHVwbG9hZCc7CiAgY29uc3QgY2FuY2VsUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBjYW5jZWwub25jbGljayA9ICgpID0+IHsKICAgICAgcmVzb2x2ZShudWxsKTsKICAgIH07CiAgfSk7CgogIC8vIENhbmNlbCB1cGxvYWQgaWYgdXNlciBoYXNuJ3QgcGlja2VkIGFueXRoaW5nIGluIHRpbWVvdXQuCiAgY29uc3QgdGltZW91dFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgc2V0VGltZW91dCgoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9LCBGSUxFX0NIQU5HRV9USU1FT1VUX01TKTsKICB9KTsKCiAgLy8gV2FpdCBmb3IgdGhlIHVzZXIgdG8gcGljayB0aGUgZmlsZXMuCiAgY29uc3QgZmlsZXMgPSB5aWVsZCB7CiAgICBwcm9taXNlOiBQcm9taXNlLnJhY2UoW3BpY2tlZFByb21pc2UsIHRpbWVvdXRQcm9taXNlLCBjYW5jZWxQcm9taXNlXSksCiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdzdGFydGluZycsCiAgICB9CiAgfTsKCiAgaWYgKCFmaWxlcykgewogICAgcmV0dXJuIHsKICAgICAgcmVzcG9uc2U6IHsKICAgICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICAgIH0KICAgIH07CiAgfQoKICBjYW5jZWwucmVtb3ZlKCk7CgogIC8vIERpc2FibGUgdGhlIGlucHV0IGVsZW1lbnQgc2luY2UgZnVydGhlciBwaWNrcyBhcmUgbm90IGFsbG93ZWQuCiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gdHJ1ZTsKCiAgZm9yIChjb25zdCBmaWxlIG9mIGZpbGVzKSB7CiAgICBjb25zdCBsaSA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2xpJyk7CiAgICBsaS5hcHBlbmQoc3BhbihmaWxlLm5hbWUsIHtmb250V2VpZ2h0OiAnYm9sZCd9KSk7CiAgICBsaS5hcHBlbmQoc3BhbigKICAgICAgICBgKCR7ZmlsZS50eXBlIHx8ICduL2EnfSkgLSAke2ZpbGUuc2l6ZX0gYnl0ZXMsIGAgKwogICAgICAgIGBsYXN0IG1vZGlmaWVkOiAkewogICAgICAgICAgICBmaWxlLmxhc3RNb2RpZmllZERhdGUgPyBmaWxlLmxhc3RNb2RpZmllZERhdGUudG9Mb2NhbGVEYXRlU3RyaW5nKCkgOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnbi9hJ30gLSBgKSk7CiAgICBjb25zdCBwZXJjZW50ID0gc3BhbignMCUgZG9uZScpOwogICAgbGkuYXBwZW5kQ2hpbGQocGVyY2VudCk7CgogICAgb3V0cHV0RWxlbWVudC5hcHBlbmRDaGlsZChsaSk7CgogICAgY29uc3QgZmlsZURhdGFQcm9taXNlID0gbmV3IFByb21pc2UoKHJlc29sdmUpID0+IHsKICAgICAgY29uc3QgcmVhZGVyID0gbmV3IEZpbGVSZWFkZXIoKTsKICAgICAgcmVhZGVyLm9ubG9hZCA9IChlKSA9PiB7CiAgICAgICAgcmVzb2x2ZShlLnRhcmdldC5yZXN1bHQpOwogICAgICB9OwogICAgICByZWFkZXIucmVhZEFzQXJyYXlCdWZmZXIoZmlsZSk7CiAgICB9KTsKICAgIC8vIFdhaXQgZm9yIHRoZSBkYXRhIHRvIGJlIHJlYWR5LgogICAgbGV0IGZpbGVEYXRhID0geWllbGQgewogICAgICBwcm9taXNlOiBmaWxlRGF0YVByb21pc2UsCiAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgYWN0aW9uOiAnY29udGludWUnLAogICAgICB9CiAgICB9OwoKICAgIC8vIFVzZSBhIGNodW5rZWQgc2VuZGluZyB0byBhdm9pZCBtZXNzYWdlIHNpemUgbGltaXRzLiBTZWUgYi82MjExNTY2MC4KICAgIGxldCBwb3NpdGlvbiA9IDA7CiAgICB3aGlsZSAocG9zaXRpb24gPCBmaWxlRGF0YS5ieXRlTGVuZ3RoKSB7CiAgICAgIGNvbnN0IGxlbmd0aCA9IE1hdGgubWluKGZpbGVEYXRhLmJ5dGVMZW5ndGggLSBwb3NpdGlvbiwgTUFYX1BBWUxPQURfU0laRSk7CiAgICAgIGNvbnN0IGNodW5rID0gbmV3IFVpbnQ4QXJyYXkoZmlsZURhdGEsIHBvc2l0aW9uLCBsZW5ndGgpOwogICAgICBwb3NpdGlvbiArPSBsZW5ndGg7CgogICAgICBjb25zdCBiYXNlNjQgPSBidG9hKFN0cmluZy5mcm9tQ2hhckNvZGUuYXBwbHkobnVsbCwgY2h1bmspKTsKICAgICAgeWllbGQgewogICAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgICBhY3Rpb246ICdhcHBlbmQnLAogICAgICAgICAgZmlsZTogZmlsZS5uYW1lLAogICAgICAgICAgZGF0YTogYmFzZTY0LAogICAgICAgIH0sCiAgICAgIH07CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPQogICAgICAgICAgYCR7TWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCl9JSBkb25lYDsKICAgIH0KICB9CgogIC8vIEFsbCBkb25lLgogIHlpZWxkIHsKICAgIHJlc3BvbnNlOiB7CiAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgIH0KICB9Owp9CgpzY29wZS5nb29nbGUgPSBzY29wZS5nb29nbGUgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYiA9IHNjb3BlLmdvb2dsZS5jb2xhYiB8fCB7fTsKc2NvcGUuZ29vZ2xlLmNvbGFiLl9maWxlcyA9IHsKICBfdXBsb2FkRmlsZXMsCiAgX3VwbG9hZEZpbGVzQ29udGludWUsCn07Cn0pKHNlbGYpOwo=", 136 | "ok": true, 137 | "headers": [ 138 | [ 139 | "content-type", 140 | "application/javascript" 141 | ] 142 | ], 143 | "status": 200, 144 | "status_text": "" 145 | } 146 | }, 147 | "base_uri": "https://localhost:8080/", 148 | "height": 38 149 | }, 150 | "outputId": "02a93afb-8cc1-45cc-bc27-6bb05c1f9260" 151 | }, 152 | "cell_type": "code", 153 | "source": [ 154 | "from google.colab import files\n", 155 | "\n", 156 | "uploaded = files.upload()\n", 157 | "\n", 158 | "for fn in uploaded.keys():\n", 159 | " print('User uploaded file \"{name}\" with length {length} bytes'.format(\n", 160 | " name=fn, length=len(uploaded[fn])))" 161 | ], 162 | "execution_count": 19, 163 | "outputs": [ 164 | { 165 | "output_type": "display_data", 166 | "data": { 167 | "text/html": [ 168 | "\n", 169 | " \n", 170 | " \n", 171 | " Upload widget is only available when the cell has been executed in the\n", 172 | " current browser session. Please rerun this cell to enable.\n", 173 | " \n", 174 | " " 175 | ], 176 | "text/plain": [ 177 | "" 178 | ] 179 | }, 180 | "metadata": { 181 | "tags": [] 182 | } 183 | } 184 | ] 185 | }, 186 | { 187 | "metadata": { 188 | "id": "M5fre8JFEO8R", 189 | "colab_type": "text" 190 | }, 191 | "cell_type": "markdown", 192 | "source": [ 193 | "The above did not work for me (because I constantly failed to download the entire file). Using gsutil instead here:" 194 | ] 195 | }, 196 | { 197 | "metadata": { 198 | "id": "vNhKUTtMEXZZ", 199 | "colab_type": "code", 200 | "colab": { 201 | "base_uri": "https://localhost:8080/", 202 | "height": 34 203 | }, 204 | "outputId": "74ceda78-abfe-4fb7-d278-b62bf0ed35d8" 205 | }, 206 | "cell_type": "code", 207 | "source": [ 208 | "from google.colab import auth\n", 209 | "auth.authenticate_user()\n", 210 | "\n", 211 | "# https://cloud.google.com/resource-manager/docs/creating-managing-projects\n", 212 | "project_id = 'personal-project-196600'\n", 213 | "!gcloud config set project {project_id}" 214 | ], 215 | "execution_count": 138, 216 | "outputs": [ 217 | { 218 | "output_type": "stream", 219 | "text": [ 220 | "Updated property [core/project].\n" 221 | ], 222 | "name": "stdout" 223 | } 224 | ] 225 | }, 226 | { 227 | "metadata": { 228 | "id": "5GIVuGa7Ei4e", 229 | "colab_type": "code", 230 | "colab": { 231 | "base_uri": "https://localhost:8080/", 232 | "height": 68 233 | }, 234 | "outputId": "5e591b04-690d-4e73-cbd9-2e28112624eb" 235 | }, 236 | "cell_type": "code", 237 | "source": [ 238 | "!gsutil cp gs://ceshine-colab-tmp/lm_model.pt lm_model.pt" 239 | ], 240 | "execution_count": 140, 241 | "outputs": [ 242 | { 243 | "output_type": "stream", 244 | "text": [ 245 | "Copying gs://ceshine-colab-tmp/lm_model.pt...\n", 246 | "\\ [1 files][108.5 MiB/108.5 MiB] \n", 247 | "Operation completed over 1 objects/108.5 MiB. \n" 248 | ], 249 | "name": "stdout" 250 | } 251 | ] 252 | }, 253 | { 254 | "metadata": { 255 | "id": "CZAw4xjq9wSH", 256 | "colab_type": "text" 257 | }, 258 | "cell_type": "markdown", 259 | "source": [ 260 | "Import libraries, functions and classes:" 261 | ] 262 | }, 263 | { 264 | "metadata": { 265 | "id": "PX8bxlu_7wYX", 266 | "colab_type": "code", 267 | "colab": {} 268 | }, 269 | "cell_type": "code", 270 | "source": [ 271 | "import torch\n", 272 | "import numpy as np\n", 273 | "import pandas as pd\n", 274 | "\n", 275 | "from model import RNNModel\n", 276 | "from data import Dictionary, Corpus" 277 | ], 278 | "execution_count": 0, 279 | "outputs": [] 280 | }, 281 | { 282 | "metadata": { 283 | "id": "A5FDZG9y46of", 284 | "colab_type": "text" 285 | }, 286 | "cell_type": "markdown", 287 | "source": [ 288 | "## Prepare Dictionary" 289 | ] 290 | }, 291 | { 292 | "metadata": { 293 | "id": "AOUPtIdJ-0qC", 294 | "colab_type": "code", 295 | "colab": { 296 | "base_uri": "https://localhost:8080/", 297 | "height": 102 298 | }, 299 | "outputId": "711f133f-b466-48ff-ae07-56ebd84368dd" 300 | }, 301 | "cell_type": "code", 302 | "source": [ 303 | "DATA_PATH = \"./data/wikitext-2\"\n", 304 | "corpus = Corpus(DATA_PATH)\n", 305 | "\n", 306 | "print(\"Number of tokens:\")\n", 307 | "print(\"Train: \", len(corpus.train))\n", 308 | "print(\"Valid: \", len(corpus.valid))\n", 309 | "print(\"Test: \", len(corpus.test))\n", 310 | "\n", 311 | "print(\"Vocabulary size:\", len(corpus.dictionary.idx2word))" 312 | ], 313 | "execution_count": 161, 314 | "outputs": [ 315 | { 316 | "output_type": "stream", 317 | "text": [ 318 | "Number of tokens:\n", 319 | "Train: 2075677\n", 320 | "Valid: 216347\n", 321 | "Test: 244102\n", 322 | "Vocabulary size: 33278\n" 323 | ], 324 | "name": "stdout" 325 | } 326 | ] 327 | }, 328 | { 329 | "metadata": { 330 | "id": "FU4dIZ68_pxl", 331 | "colab_type": "text" 332 | }, 333 | "cell_type": "markdown", 334 | "source": [ 335 | "## Load Model" 336 | ] 337 | }, 338 | { 339 | "metadata": { 340 | "id": "Yr54aXIS_PZO", 341 | "colab_type": "code", 342 | "colab": {} 343 | }, 344 | "cell_type": "code", 345 | "source": [ 346 | "DEVICE = torch.device(\"cpu\")\n", 347 | "# model = model.RNNModel(\n", 348 | "# \"LSTM\", len(corpus.dictionary), 650,\n", 349 | "# 650, 2, 0.5, True\n", 350 | "# ).to(DEVICE)" 351 | ], 352 | "execution_count": 0, 353 | "outputs": [] 354 | }, 355 | { 356 | "metadata": { 357 | "id": "7QWwNQyPAfSa", 358 | "colab_type": "code", 359 | "colab": {} 360 | }, 361 | "cell_type": "code", 362 | "source": [ 363 | "with open(\"lm_model.pt\", 'rb') as f:\n", 364 | " model = torch.load(f, map_location='cpu')\n", 365 | "model = model.to(DEVICE)" 366 | ], 367 | "execution_count": 0, 368 | "outputs": [] 369 | }, 370 | { 371 | "metadata": { 372 | "id": "BYerbMO0FEwd", 373 | "colab_type": "code", 374 | "colab": { 375 | "base_uri": "https://localhost:8080/", 376 | "height": 119 377 | }, 378 | "outputId": "6d25897e-e2e4-4948-8794-333c98d09324" 379 | }, 380 | "cell_type": "code", 381 | "source": [ 382 | "model.eval()" 383 | ], 384 | "execution_count": 143, 385 | "outputs": [ 386 | { 387 | "output_type": "execute_result", 388 | "data": { 389 | "text/plain": [ 390 | "RNNModel(\n", 391 | " (drop): Dropout(p=0.5)\n", 392 | " (encoder): Embedding(33278, 650)\n", 393 | " (rnn): LSTM(650, 650, num_layers=2, dropout=0.5)\n", 394 | " (decoder): Linear(in_features=650, out_features=33278, bias=True)\n", 395 | ")" 396 | ] 397 | }, 398 | "metadata": { 399 | "tags": [] 400 | }, 401 | "execution_count": 143 402 | } 403 | ] 404 | }, 405 | { 406 | "metadata": { 407 | "id": "oJM06o9eFKpt", 408 | "colab_type": "text" 409 | }, 410 | "cell_type": "markdown", 411 | "source": [ 412 | "## Evaluate with Test Documents" 413 | ] 414 | }, 415 | { 416 | "metadata": { 417 | "id": "z7EuOn6idH_7", 418 | "colab_type": "text" 419 | }, 420 | "cell_type": "markdown", 421 | "source": [ 422 | "### Calculate the Perplexity of the Test Predictions\n", 423 | "To confirm we have loaded the correct model." 424 | ] 425 | }, 426 | { 427 | "metadata": { 428 | "id": "eFjgGcqcbLk6", 429 | "colab_type": "code", 430 | "colab": { 431 | "base_uri": "https://localhost:8080/", 432 | "height": 51 433 | }, 434 | "outputId": "0d45ab36-fbdb-43df-93d2-707b9e8933e5" 435 | }, 436 | "cell_type": "code", 437 | "source": [ 438 | "%%time\n", 439 | "BPTT = 50\n", 440 | "CRITERION = torch.nn.CrossEntropyLoss()\n", 441 | "\n", 442 | "def batchify(data, bsz):\n", 443 | " # Work out how cleanly we can divide the dataset into bsz parts.\n", 444 | " nbatch = data.size(0) // bsz\n", 445 | " # Trim off any extra elements that wouldn't cleanly fit (remainders).\n", 446 | " data = data.narrow(0, 0, nbatch * bsz)\n", 447 | " # Evenly divide the data across the bsz batches.\n", 448 | " data = data.view(bsz, -1).t().contiguous()\n", 449 | " return data.to(DEVICE)\n", 450 | "\n", 451 | "def get_batch(source, i):\n", 452 | " seq_len = min(BPTT, len(source) - 1 - i)\n", 453 | " data = source[i:i+seq_len]\n", 454 | " target = source[i+1:i+1+seq_len].view(-1)\n", 455 | " return data, target\n", 456 | "\n", 457 | "def evaluate(data_source):\n", 458 | " # Turn on evaluation mode which disables dropout.\n", 459 | " model.eval()\n", 460 | " total_loss = 0.\n", 461 | " ntokens = len(corpus.dictionary)\n", 462 | " hidden = model.init_hidden(10)\n", 463 | " with torch.no_grad():\n", 464 | " for i in range(0, data_source.size(0) - 1, BPTT):\n", 465 | " data, targets = get_batch(data_source, i)\n", 466 | " output, hidden = model(data, hidden)\n", 467 | " output_flat = output.view(-1, ntokens)\n", 468 | " total_loss += len(data) * CRITERION(output_flat, targets).item()\n", 469 | " hidden = repackage_hidden(hidden)\n", 470 | " return total_loss / len(data_source)\n", 471 | "\n", 472 | "def repackage_hidden(h):\n", 473 | " \"\"\"Wraps hidden states in new Tensors, to detach them from their history.\"\"\"\n", 474 | " if isinstance(h, torch.Tensor):\n", 475 | " return h.detach()\n", 476 | " else:\n", 477 | " return tuple(repackage_hidden(v) for v in h)\n", 478 | " \n", 479 | "test_data = batchify(corpus.test, 10)\n", 480 | "loss = evaluate(test_data)" 481 | ], 482 | "execution_count": 146, 483 | "outputs": [ 484 | { 485 | "output_type": "stream", 486 | "text": [ 487 | "CPU times: user 5min 55s, sys: 1.68 s, total: 5min 57s\n", 488 | "Wall time: 5min 57s\n" 489 | ], 490 | "name": "stdout" 491 | } 492 | ] 493 | }, 494 | { 495 | "metadata": { 496 | "id": "mt5E12qghArC", 497 | "colab_type": "code", 498 | "colab": { 499 | "base_uri": "https://localhost:8080/", 500 | "height": 34 501 | }, 502 | "outputId": "13da0e7d-d3ce-44af-910d-d0a3a4265d90" 503 | }, 504 | "cell_type": "code", 505 | "source": [ 506 | "loss, np.exp(loss)" 507 | ], 508 | "execution_count": 147, 509 | "outputs": [ 510 | { 511 | "output_type": "execute_result", 512 | "data": { 513 | "text/plain": [ 514 | "(4.486460813329338, 88.8065859480267)" 515 | ] 516 | }, 517 | "metadata": { 518 | "tags": [] 519 | }, 520 | "execution_count": 147 521 | } 522 | ] 523 | }, 524 | { 525 | "metadata": { 526 | "id": "mRWnZR7oM9DO", 527 | "colab_type": "text" 528 | }, 529 | "cell_type": "markdown", 530 | "source": [ 531 | "### Check the Next Word Predictions" 532 | ] 533 | }, 534 | { 535 | "metadata": { 536 | "id": "UYzjsoksGn59", 537 | "colab_type": "code", 538 | "colab": { 539 | "base_uri": "https://localhost:8080/", 540 | "height": 34 541 | }, 542 | "outputId": "11a12226-12d2-4dcd-86ca-85de4ba11849" 543 | }, 544 | "cell_type": "code", 545 | "source": [ 546 | "test_tokens = corpus.test.numpy()\n", 547 | "eos_pos = np.where(test_tokens == corpus.dictionary.word2idx[\"\"])[0]\n", 548 | "print(\"Number of lines in test:\", len(eos_pos))" 549 | ], 550 | "execution_count": 148, 551 | "outputs": [ 552 | { 553 | "output_type": "stream", 554 | "text": [ 555 | "Number of lines in test: 2891\n" 556 | ], 557 | "name": "stdout" 558 | } 559 | ] 560 | }, 561 | { 562 | "metadata": { 563 | "id": "DJRJZL-SF943", 564 | "colab_type": "code", 565 | "colab": { 566 | "base_uri": "https://localhost:8080/", 567 | "height": 54 568 | }, 569 | "outputId": "9df5a937-86fa-4dae-aafa-65f87b123870" 570 | }, 571 | "cell_type": "code", 572 | "source": [ 573 | "# A random line from test dataset\n", 574 | "print(\" \".join([corpus.dictionary.idx2word[c] for c in test_tokens[eos_pos[28]+1:eos_pos[29]]]))" 575 | ], 576 | "execution_count": 149, 577 | "outputs": [ 578 | { 579 | "output_type": "stream", 580 | "text": [ 581 | "The An Rebellion began in December , and was not completely suppressed for almost eight years . It caused enormous disruption to Chinese society : the census of 754 recorded 52 @.@ 9 million people , but ten years later , the census counted just 16 @.@ 9 million , the remainder having been displaced or killed . During this time , Du Fu led a largely itinerant life by wars , associated and imperial . This period of was the making of Du Fu as a poet : Even Shan Chou has written that , \" What he saw around him — the lives of his family , neighbors , and strangers – what he heard , and what he hoped for or feared from the progress of various campaigns — these became the enduring themes of his poetry \" . Even when he learned of the death of his youngest child , he turned to the suffering of others in his poetry instead of dwelling upon his own . Du Fu wrote :\n" 582 | ], 583 | "name": "stdout" 584 | } 585 | ] 586 | }, 587 | { 588 | "metadata": { 589 | "id": "4Lsf43zAFKDT", 590 | "colab_type": "code", 591 | "colab": {} 592 | }, 593 | "cell_type": "code", 594 | "source": [ 595 | "def eval_chunk(start, end):\n", 596 | " token_tensor = corpus.test[eos_pos[start]+1:eos_pos[end]]\n", 597 | " hidden = model.init_hidden(1)\n", 598 | " with torch.no_grad():\n", 599 | " targets = token_tensor[1:]\n", 600 | " output, hidden = model(token_tensor.unsqueeze(1), hidden)\n", 601 | " output_flat = output.squeeze(1)\n", 602 | " loss = CRITERION(output_flat[:-1], targets).item()\n", 603 | " \n", 604 | " sorted_idx = np.argsort(output_flat.numpy(), 1)\n", 605 | " preds = []\n", 606 | " for i in range(1, 4):\n", 607 | " preds.append(list(map(lambda x: corpus.dictionary.idx2word[x], sorted_idx[:, -i])))\n", 608 | " # preds = list(map(lambda x: itos[x], np.argmax(logits.data.cpu().numpy(), 1)))\n", 609 | " return (\n", 610 | " loss,\n", 611 | " pd.DataFrame({\n", 612 | " \"orig\": [corpus.dictionary.idx2word[x] for x in token_tensor.numpy()] + [\" \"], \n", 613 | " \"pred_1\": [\"\"] + preds[0], \"pred_2\": [\"\"] + preds[1], \"pred_3\": [\"\"] + preds[2]\n", 614 | " })\n", 615 | " )" 616 | ], 617 | "execution_count": 0, 618 | "outputs": [] 619 | }, 620 | { 621 | "metadata": { 622 | "id": "zwgGIiA8MwmO", 623 | "colab_type": "text" 624 | }, 625 | "cell_type": "markdown", 626 | "source": [ 627 | "Let's try using only one line:" 628 | ] 629 | }, 630 | { 631 | "metadata": { 632 | "id": "_gbOxcgBLSv8", 633 | "colab_type": "code", 634 | "colab": { 635 | "base_uri": "https://localhost:8080/", 636 | "height": 1616 637 | }, 638 | "outputId": "813549de-18af-4f60-85dd-ae5f9c1ef2ef" 639 | }, 640 | "cell_type": "code", 641 | "source": [ 642 | "loss, df = eval_chunk(28, 29)\n", 643 | "print(\"Loss:\", np.exp(loss))\n", 644 | "df.iloc[-50:]" 645 | ], 646 | "execution_count": 151, 647 | "outputs": [ 648 | { 649 | "output_type": "stream", 650 | "text": [ 651 | "Loss: 163.91555818335866\n" 652 | ], 653 | "name": "stdout" 654 | }, 655 | { 656 | "output_type": "execute_result", 657 | "data": { 658 | "text/html": [ 659 | "
\n", 660 | "\n", 673 | "\n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | "
origpred_1pred_2pred_3
133progress<unk>worldtime
134ofof.,
135variousthehisa
136campaignspeoplethings<unk>
137.,\"
138theseandthe\"
139becamearewerepeople
140theathemore
141enduringmost<unk>first
142themes<unk>subjectthing
143ofofin.
144histhehisall
145poetrylifeown<unk>
146\".,and
147..,and
148Even<eos>TheHe
149whenthoughinafter
150hehethe,
151learnedwashaddied
152ofofthatabout
153thethehisher
154death<unk>bookdeath
155ofof,he
156hishisthea
157youngestwifefatherbrother
158childwifesonbrother
159,,inand
160hehetheJohn
161turnedwashadwrote
162toouttodown
163thethea<unk>
164suffering<unk>publichouse
165ofofand,
166othershisthea
167in,and.
168histhehisa
169poetrylifeown<unk>
170instead.,and
171ofof.,
172dwellinghisthea
173upon.,and
174hishisthea
175owndeatharrival<unk>
176<unk>death<unk>work
177..,and
178Du<eos>TheHe
179Fu<unk>BraunJarl
180wrote,wasand
181:athethat
182\"<eos>'
\n", 1036 | "
" 1037 | ], 1038 | "text/plain": [ 1039 | " orig pred_1 pred_2 pred_3\n", 1040 | "133 progress world time\n", 1041 | "134 of of . ,\n", 1042 | "135 various the his a\n", 1043 | "136 campaigns people things \n", 1044 | "137 — . , \"\n", 1045 | "138 these and the \"\n", 1046 | "139 became are were people\n", 1047 | "140 the a the more\n", 1048 | "141 enduring most first\n", 1049 | "142 themes subject thing\n", 1050 | "143 of of in .\n", 1051 | "144 his the his all\n", 1052 | "145 poetry life own \n", 1053 | "146 \" . , and\n", 1054 | "147 . . , and\n", 1055 | "148 Even The He\n", 1056 | "149 when though in after\n", 1057 | "150 he he the ,\n", 1058 | "151 learned was had died\n", 1059 | "152 of of that about\n", 1060 | "153 the the his her\n", 1061 | "154 death book death\n", 1062 | "155 of of , he\n", 1063 | "156 his his the a\n", 1064 | "157 youngest wife father brother\n", 1065 | "158 child wife son brother\n", 1066 | "159 , , in and\n", 1067 | "160 he he the John\n", 1068 | "161 turned was had wrote\n", 1069 | "162 to out to down\n", 1070 | "163 the the a \n", 1071 | "164 suffering public house\n", 1072 | "165 of of and ,\n", 1073 | "166 others his the a\n", 1074 | "167 in , and .\n", 1075 | "168 his the his a\n", 1076 | "169 poetry life own \n", 1077 | "170 instead . , and\n", 1078 | "171 of of . ,\n", 1079 | "172 dwelling his the a\n", 1080 | "173 upon . , and\n", 1081 | "174 his his the a\n", 1082 | "175 own death arrival \n", 1083 | "176 death work\n", 1084 | "177 . . , and\n", 1085 | "178 Du The He\n", 1086 | "179 Fu Braun Jarl\n", 1087 | "180 wrote , was and\n", 1088 | "181 : a the that\n", 1089 | "182 \" '" 1090 | ] 1091 | }, 1092 | "metadata": { 1093 | "tags": [] 1094 | }, 1095 | "execution_count": 151 1096 | } 1097 | ] 1098 | }, 1099 | { 1100 | "metadata": { 1101 | "id": "5NT3hejgMzcH", 1102 | "colab_type": "text" 1103 | }, 1104 | "cell_type": "markdown", 1105 | "source": [ 1106 | "Now try providing more context:" 1107 | ] 1108 | }, 1109 | { 1110 | "metadata": { 1111 | "id": "hIw64ToYMUJp", 1112 | "colab_type": "code", 1113 | "colab": { 1114 | "base_uri": "https://localhost:8080/", 1115 | "height": 1616 1116 | }, 1117 | "outputId": "f8b76fb2-dad3-4eb4-cd47-a419ef260f34" 1118 | }, 1119 | "cell_type": "code", 1120 | "source": [ 1121 | "loss, df = eval_chunk(28, 34)\n", 1122 | "print(\"Loss:\", np.exp(loss))\n", 1123 | "df.iloc[-50:]" 1124 | ], 1125 | "execution_count": 152, 1126 | "outputs": [ 1127 | { 1128 | "output_type": "stream", 1129 | "text": [ 1130 | "Loss: 104.32415212207026\n" 1131 | ], 1132 | "name": "stdout" 1133 | }, 1134 | { 1135 | "output_type": "execute_result", 1136 | "data": { 1137 | "text/html": [ 1138 | "
\n", 1139 | "\n", 1152 | "\n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | "
origpred_1pred_2pred_3
489intothea
490thetheahis
491summer<unk>middlemorning
492ofofand,
493<unk>1918the1916
494;,and.
495thishethehis
496haswastimeis
497traditionallybeenaalso
498beenbeenoccurredcome
499ascribeddescribedaused
500totobythe
501faminethehisa
502,.,and
503butandbutas
504<unk>theheit
505believes,<unk>the
506thatthatthehe
507frustrationthehehis
508isinfromwas
509anotthea
510more\"<unk>great
511likelyimportant<unk>powerful
512reason<unk>partsubject
513.forto.
514He<eos>HeThe
515nextwasalsois
516spent,<unk>was
517aroundtheahis
518sixtheahis
519weeksyearsmonthsdays
520in,ofin
521<unk>thehisa
522(,and.
523now<unk>aand
524<unk><unk>thea
525,),<unk>
526Gansu<unk>thenow
527province,)and
528)),and
529,,and.
530whereandwherebut
531hehethehis
532wrotewashaddied
533moretoathe
534thanthanof<unk>
535sixtyaonethe
536poemsyears@-@men
537.,.and
538<eos>HeThe
\n", 1515 | "
" 1516 | ], 1517 | "text/plain": [ 1518 | " orig pred_1 pred_2 pred_3\n", 1519 | "489 in to the a\n", 1520 | "490 the the a his\n", 1521 | "491 summer middle morning\n", 1522 | "492 of of and ,\n", 1523 | "493 1918 the 1916\n", 1524 | "494 ; , and .\n", 1525 | "495 this he the his\n", 1526 | "496 has was time is\n", 1527 | "497 traditionally been a also\n", 1528 | "498 been been occurred come\n", 1529 | "499 ascribed described a used\n", 1530 | "500 to to by the\n", 1531 | "501 famine the his a\n", 1532 | "502 , . , and\n", 1533 | "503 but and but as\n", 1534 | "504 the he it\n", 1535 | "505 believes , the\n", 1536 | "506 that that the he\n", 1537 | "507 frustration the he his\n", 1538 | "508 is in from was\n", 1539 | "509 a not the a\n", 1540 | "510 more \" great\n", 1541 | "511 likely important powerful\n", 1542 | "512 reason part subject\n", 1543 | "513 . for to .\n", 1544 | "514 He He The\n", 1545 | "515 next was also is\n", 1546 | "516 spent , was\n", 1547 | "517 around the a his\n", 1548 | "518 six the a his\n", 1549 | "519 weeks years months days\n", 1550 | "520 in , of in\n", 1551 | "521 the his a\n", 1552 | "522 ( , and .\n", 1553 | "523 now a and\n", 1554 | "524 the a\n", 1555 | "525 , ) , \n", 1556 | "526 Gansu the now\n", 1557 | "527 province , ) and\n", 1558 | "528 ) ) , and\n", 1559 | "529 , , and .\n", 1560 | "530 where and where but\n", 1561 | "531 he he the his\n", 1562 | "532 wrote was had died\n", 1563 | "533 more to a the\n", 1564 | "534 than than of \n", 1565 | "535 sixty a one the\n", 1566 | "536 poems years @-@ men\n", 1567 | "537 . , . and\n", 1568 | "538 He The" 1569 | ] 1570 | }, 1571 | "metadata": { 1572 | "tags": [] 1573 | }, 1574 | "execution_count": 152 1575 | } 1576 | ] 1577 | }, 1578 | { 1579 | "metadata": { 1580 | "id": "n1J3aRTNNCtJ", 1581 | "colab_type": "text" 1582 | }, 1583 | "cell_type": "markdown", 1584 | "source": [ 1585 | "### Try to Generate Texts" 1586 | ] 1587 | }, 1588 | { 1589 | "metadata": { 1590 | "id": "5vZ1f4HVUHRH", 1591 | "colab_type": "code", 1592 | "colab": { 1593 | "base_uri": "https://localhost:8080/", 1594 | "height": 34 1595 | }, 1596 | "outputId": "6e813046-6f40-4626-f68d-6abc8e1dbbb0" 1597 | }, 1598 | "cell_type": "code", 1599 | "source": [ 1600 | "UNK = corpus.dictionary.word2idx[\"\"]\n", 1601 | "UNK" 1602 | ], 1603 | "execution_count": 153, 1604 | "outputs": [ 1605 | { 1606 | "output_type": "execute_result", 1607 | "data": { 1608 | "text/plain": [ 1609 | "9" 1610 | ] 1611 | }, 1612 | "metadata": { 1613 | "tags": [] 1614 | }, 1615 | "execution_count": 153 1616 | } 1617 | ] 1618 | }, 1619 | { 1620 | "metadata": { 1621 | "id": "RcC-UdUZVrD2", 1622 | "colab_type": "text" 1623 | }, 1624 | "cell_type": "markdown", 1625 | "source": [ 1626 | "#### Greedy Selection" 1627 | ] 1628 | }, 1629 | { 1630 | "metadata": { 1631 | "id": "A-1CnFWVNGTX", 1632 | "colab_type": "code", 1633 | "colab": {} 1634 | }, 1635 | "cell_type": "code", 1636 | "source": [ 1637 | "def generate_text_from_chunk(start, end, target_length=20):\n", 1638 | " \"\"\"Greedy selection of the next token.\"\"\"\n", 1639 | " token_tensor = corpus.test[eos_pos[start]+1:eos_pos[end]]\n", 1640 | " return generate_text_from_tensor(token_tensor, target_length)\n", 1641 | " \n", 1642 | "def generate_text_from_tensor(token_tensor, target_length):\n", 1643 | " hidden = model.init_hidden(1)\n", 1644 | " output, hidden = model(token_tensor.unsqueeze(1), hidden)\n", 1645 | " index = output[-1, -0, :].argmax()\n", 1646 | " res = [index.numpy()]\n", 1647 | " with torch.no_grad(): \n", 1648 | " for i in range(target_length):\n", 1649 | " output, hidden = model(index.unsqueeze(0).unsqueeze(0), hidden)\n", 1650 | " index = output[-1, 0, ].argmax()\n", 1651 | " res.append(index.numpy())\n", 1652 | " return [\n", 1653 | " [\n", 1654 | " corpus.dictionary.idx2word[x] for x in arr \n", 1655 | " ] for arr in (token_tensor.numpy(), res)\n", 1656 | " ]" 1657 | ], 1658 | "execution_count": 0, 1659 | "outputs": [] 1660 | }, 1661 | { 1662 | "metadata": { 1663 | "id": "QceMDqccPTNl", 1664 | "colab_type": "code", 1665 | "colab": { 1666 | "base_uri": "https://localhost:8080/", 1667 | "height": 51 1668 | }, 1669 | "outputId": "e4a60054-a406-4e24-cbdc-cecf13d0782e" 1670 | }, 1671 | "cell_type": "code", 1672 | "source": [ 1673 | "context, new_texts = generate_text_from_chunk(28, 29)\n", 1674 | "print(\" \".join(context[-10:]))\n", 1675 | "print(\" \".join(new_texts))" 1676 | ], 1677 | "execution_count": 156, 1678 | "outputs": [ 1679 | { 1680 | "output_type": "stream", 1681 | "text": [ 1682 | "dwelling upon his own . Du Fu wrote :\n", 1683 | "\" I 'm not going to be a , and I am not going to be a . \"\n" 1684 | ], 1685 | "name": "stdout" 1686 | } 1687 | ] 1688 | }, 1689 | { 1690 | "metadata": { 1691 | "id": "L_eca7VhS76z", 1692 | "colab_type": "code", 1693 | "colab": { 1694 | "base_uri": "https://localhost:8080/", 1695 | "height": 51 1696 | }, 1697 | "outputId": "e5b88ba5-295a-42ec-91f9-27d78b917065" 1698 | }, 1699 | "cell_type": "code", 1700 | "source": [ 1701 | "context, new_texts = generate_text_from_chunk(28, 38)\n", 1702 | "print(\" \".join(context[-10:]))\n", 1703 | "print(\" \".join(new_texts))" 1704 | ], 1705 | "execution_count": 162, 1706 | "outputs": [ 1707 | { 1708 | "output_type": "stream", 1709 | "text": [ 1710 | "Fu financially and employed him as his unofficial secretary .\n", 1711 | "The Latin chronicler John C. also described him as his \" liberal @-@ confident \" . He described them\n" 1712 | ], 1713 | "name": "stdout" 1714 | } 1715 | ] 1716 | }, 1717 | { 1718 | "metadata": { 1719 | "id": "A7LXS_pnVvnY", 1720 | "colab_type": "text" 1721 | }, 1722 | "cell_type": "markdown", 1723 | "source": [ 1724 | "#### Sampling from the Predicted Distribution with a Temeperature Knob" 1725 | ] 1726 | }, 1727 | { 1728 | "metadata": { 1729 | "id": "kSBkyqogV-M7", 1730 | "colab_type": "code", 1731 | "colab": {} 1732 | }, 1733 | "cell_type": "code", 1734 | "source": [ 1735 | "def generate_text_from_chunk(start, end, target_length=20, temperature=1.0):\n", 1736 | " token_tensor = corpus.test[eos_pos[start]+1:eos_pos[end]]\n", 1737 | " return generate_text_from_tensor(token_tensor, target_length, temperature)\n", 1738 | " \n", 1739 | "\n", 1740 | "def generate_text_from_tensor(token_tensor, target_length, temperature):\n", 1741 | " \"\"\"Sampling from the softmax distribution.\"\"\" \n", 1742 | " hidden = model.init_hidden(1)\n", 1743 | " _, hidden = model(token_tensor[:-1].unsqueeze(1), hidden)\n", 1744 | " input_tensor = torch.zeros((1, 1)).long().to(DEVICE)\n", 1745 | " input_tensor[0, 0].fill_(token_tensor[-1])\n", 1746 | " res = []\n", 1747 | " with torch.no_grad(): \n", 1748 | " for i in range(target_length): \n", 1749 | " output, hidden = model(input_tensor, hidden)\n", 1750 | " word_weights = output.squeeze().div(temperature).exp()\n", 1751 | " word_idx = torch.multinomial(word_weights, 1)[0]\n", 1752 | " input_tensor[0, 0].fill_(word_idx)\n", 1753 | " res.append(word_idx.item())\n", 1754 | " return [\n", 1755 | " [\n", 1756 | " corpus.dictionary.idx2word[x] for x in arr \n", 1757 | " ] for arr in (token_tensor.numpy(), res)\n", 1758 | " ]" 1759 | ], 1760 | "execution_count": 0, 1761 | "outputs": [] 1762 | }, 1763 | { 1764 | "metadata": { 1765 | "id": "qpZVvNGpXHxj", 1766 | "colab_type": "code", 1767 | "colab": { 1768 | "base_uri": "https://localhost:8080/", 1769 | "height": 119 1770 | }, 1771 | "outputId": "4861f329-bb55-45d4-b5ea-2d822065194f" 1772 | }, 1773 | "cell_type": "code", 1774 | "source": [ 1775 | "context, new_texts = generate_text_from_chunk(28, 33, target_length=50)\n", 1776 | "print(\" \".join(context[-10:]))\n", 1777 | "for i in range(0, len(new_texts), 10):\n", 1778 | " print(\" \".join(new_texts[i:i+10]))" 1779 | ], 1780 | "execution_count": 172, 1781 | "outputs": [ 1782 | { 1783 | "output_type": "stream", 1784 | "text": [ 1785 | "bring more papers to pile higher on my desk .\n", 1786 | "\" ( two ) and Cristina 's army in\n", 1787 | " where all historians discovered that the German sniper was\n", 1788 | "still from and one out of the Sisler children\n", 1789 | ". A brother , the friend of Richard ,\n", 1790 | "senior of the island , was therefore procured in the\n" 1791 | ], 1792 | "name": "stdout" 1793 | } 1794 | ] 1795 | }, 1796 | { 1797 | "metadata": { 1798 | "id": "uataLNWWYqK8", 1799 | "colab_type": "code", 1800 | "colab": {} 1801 | }, 1802 | "cell_type": "code", 1803 | "source": [ 1804 | "def generate_text_from_texts(texts, target_length=20, temperature=1.0):\n", 1805 | " \"\"\"texts needs to be tokens seperated by space characters.\"\"\"\n", 1806 | " token_tensor = torch.LongTensor([\n", 1807 | " corpus.dictionary.word2idx[x] for x in texts.split(\" \")\n", 1808 | " ]).to(DEVICE)\n", 1809 | " return generate_text_from_tensor(token_tensor, target_length, temperature)" 1810 | ], 1811 | "execution_count": 0, 1812 | "outputs": [] 1813 | }, 1814 | { 1815 | "metadata": { 1816 | "id": "4DNHQVUQjE2w", 1817 | "colab_type": "code", 1818 | "colab": { 1819 | "base_uri": "https://localhost:8080/", 1820 | "height": 204 1821 | }, 1822 | "outputId": "577a9287-dc35-42bb-ccca-8d8f17fc5332" 1823 | }, 1824 | "cell_type": "code", 1825 | "source": [ 1826 | "context, new_texts = generate_text_from_texts(\"In the fall of 1944 , enrolled at the University of Michigan . The United Press syndicate\", target_length=100)\n", 1827 | "print(\" \".join(context[-10:]))\n", 1828 | "for i in range(0, len(new_texts), 10):\n", 1829 | " print(\" \".join(new_texts[i:i+10]))" 1830 | ], 1831 | "execution_count": 173, 1832 | "outputs": [ 1833 | { 1834 | "output_type": "stream", 1835 | "text": [ 1836 | "at the University of Michigan . The United Press syndicate\n", 1837 | "and officials was interpreted by the searing complaints being used\n", 1838 | "as the musician by another mixed review , but expressed\n", 1839 | "concern that the laws would be found out in the\n", 1840 | "United States and during a transmission control of the same\n", 1841 | "second landscapes . Lisa that he managed to visit the\n", 1842 | "relationship with Carey and Marvel 's general president for food\n", 1843 | "was \" desperate and looking , based on their own\n", 1844 | "wing . \" Asked in this , the company was\n", 1845 | "told by the US Bureau of Education , who decided\n", 1846 | ", and eventually admitted to the 1920s , and \"\n" 1847 | ], 1848 | "name": "stdout" 1849 | } 1850 | ] 1851 | }, 1852 | { 1853 | "metadata": { 1854 | "id": "S4jHkK4NjaFm", 1855 | "colab_type": "code", 1856 | "colab": {} 1857 | }, 1858 | "cell_type": "code", 1859 | "source": [ 1860 | "" 1861 | ], 1862 | "execution_count": 0, 1863 | "outputs": [] 1864 | } 1865 | ] 1866 | } --------------------------------------------------------------------------------