161 |
162 |
163 |
164 |
165 |
166 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
195 |
196 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
--------------------------------------------------------------------------------
/docs/searchindex.js:
--------------------------------------------------------------------------------
1 | Search.setIndex({docnames:["index","tensorboard"],envversion:51,filenames:["index.rst","tensorboard.rst"],objects:{"":{tensorboard:[1,0,0,"-"]},"tensorboard.SummaryWriter":{__init__:[1,2,1,""],add_audio:[1,2,1,""],add_graph:[1,2,1,""],add_histogram:[1,2,1,""],add_image:[1,2,1,""],add_scalar:[1,2,1,""],add_text:[1,2,1,""]},"tensorboard.embedding":{add_embedding:[1,3,1,""]},tensorboard:{SummaryWriter:[1,1,1,""]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:function"},terms:{"break":1,"case":1,"class":1,"default":1,"final":1,"float":1,"function":1,"import":1,"int":1,"true":1,"while":1,The:1,Use:1,With:[],__init__:1,add:1,add_audio:1,add_embed:1,add_graph:1,add_histogram:1,add_imag:1,add_scalar:1,add_text:1,addit:1,after:1,allow:1,along:1,anyth:1,api:1,argument:1,arrai:1,asynchron:1,attribut:1,audio:1,auto:1,autograd:1,back:1,backward:1,base:1,between:1,bin:1,build:1,call:1,can:1,cannot:1,choic:[],code:1,com:1,complet:1,content:[0,1],conv2d:[],convert:1,correct:1,correspond:1,creat:1,current:1,data:1,dataparallel:1,determin:1,differ:1,dimens:1,directli:1,directori:1,displai:1,doc:1,doe:1,down:1,draw:1,dump:1,each:1,either:[],element:1,embed:1,embedding1:1,embedding2:1,embedding3:1,encount:1,enumer:1,equal:1,event:1,exampl:1,experiment:1,extern:[],featur:1,file:1,file_writ:1,find:1,fix:1,from:1,funtion:1,gener:1,get:1,get_logdir:1,github:1,given:1,glitch:1,global:1,global_step:1,goe:1,good:1,gradient:1,graph:1,have:1,high:1,histogram:1,how:1,html:1,http:1,hyperlink:[],idea:1,identifi:1,imag:1,img_tensor:1,index:0,input:1,instal:1,instead:1,intern:[],invok:1,issu:1,kernel:[],keyword:1,khz:1,kwlist:1,label:1,label_img:1,lanpa:1,lastvar:1,leav:1,len:1,level:1,like:[],list:1,locat:1,log_dir:1,logdir:1,loop:1,lstm:1,made:1,make_grid:1,mat:1,math:[],matrix:1,meta:1,metadata:1,method:1,model:1,modul:[0,1],name:1,need:1,next_funct:1,node:1,none:1,number:1,numpi:1,omit:1,one:1,option:1,org:1,other:1,otherwis:1,page:0,paramet:1,pass:1,path:1,pleas:1,point:1,prepar:1,prevent:1,program:1,provid:1,python:[],rand:1,randn:1,rang:1,rate:1,record:1,recurs:1,refer:1,requir:1,requires_grad:1,result:1,rnn:1,root:1,row:1,rrr:[],run:1,sai:1,sampl:1,save:1,save_path:1,scalar:1,scalar_valu:1,scipi:1,search:0,see:1,separ:1,shape:1,should:1,show:1,size:1,slow:1,snd_tensor:1,some:1,sound:1,sourc:1,squar:[],start:1,step:1,str:1,strang:1,stride:[],string:1,summari:1,summarywrit:1,summarywritt:1,tag:1,tensor:1,tensorflow:1,text:1,text_str:1,than:1,therefor:1,thi:1,torch:1,torchvis:1,trace:1,train:1,updat:1,use:1,util:1,valu:1,variabl:1,vector:1,visual:1,when:1,where:1,which:1,without:1,write:1,writer:1,www:[],you:1,your:1},titles:["Welcome to tensorboard-pytorch’s documentation!","tensorboard-pytorch"],titleterms:{document:0,indic:0,pytorch:[0,1],tabl:0,tensorboard:[0,1],welcom:0}})
--------------------------------------------------------------------------------
/fast_neural_style/README.md:
--------------------------------------------------------------------------------
1 | # fast-neural-style :city_sunrise: :rocket:
2 | This repository contains a pytorch implementation of an algorithm for artistic style transfer. The algorithm can be used to mix the content of an image with the style of another image. For example, here is a photograph of a door arch rendered in the style of a stained glass painting.
3 |
4 | The model uses the method described in [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](https://arxiv.org/abs/1603.08155) along with [Instance Normalization](https://arxiv.org/pdf/1607.08022.pdf). The saved-models for examples shown in the README can be downloaded from [here](https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=0).
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | ## Requirements
13 | The program is written in Python, and uses [pytorch](http://pytorch.org/), [scipy](https://www.scipy.org). A GPU is not necessary, but can provide a significant speed up especially for training a new model. Regular sized images can be styled on a laptop or desktop using saved models.
14 |
15 | ## Usage
16 | Stylize image
17 | ```
18 | python neural_style/neural_style.py eval --content-image --model --output-image --cuda 0
19 | ```
20 | * `--content-image`: path to content image you want to stylize.
21 | * `--model`: saved model to be used for stylizing the image (eg: `mosaic.pth`)
22 | * `--output-image`: path for saving the output image.
23 | * `--content-scale`: factor for scaling down the content image if memory is an issue (eg: value of 2 will halve the height and width of content-image)
24 | * `--cuda`: set it to 1 for running on GPU, 0 for CPU.
25 |
26 | Train model
27 | ```bash
28 | python neural_style/neural_style.py train --dataset --style-image --save-model-dir --epochs 2 --cuda 1
29 | ```
30 |
31 | There are several command line arguments, the important ones are listed below
32 | * `--dataset`: path to training dataset, the path should point to a folder containing another folder with all the training images. I used COCO 2014 Training images dataset [80K/13GB] [(download)](http://mscoco.org/dataset/#download).
33 | * `--style-image`: path to style-image.
34 | * `--save-model-dir`: path to folder where trained model will be saved.
35 | * `--cuda`: set it to 1 for running on GPU, 0 for CPU.
36 |
37 | Refer to ``neural_style/neural_style.py`` for other command line arguments. For training new models you might have to tune the values of `--content-weight` and `--style-weight`. The mosaic style model shown above was trained with `--content-weight 1e5` and `--style-weight 1e10`. The remaining 3 models were also trained with similar order of weight parameters with slight variation in the `--style-weight` (`5e10` or `1e11`).
38 |
39 | ## Models
40 |
41 | Models for the examples shown below can be downloaded from [here](https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=0) or by running the script ``download_saved_models.sh``.
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/fast_neural_style/download_saved_models.sh:
--------------------------------------------------------------------------------
1 | wget https://www.dropbox.com/s/lrvwfehqdcxoza8/saved_models.zip?dl=1
2 | unzip saved_models.zip?dl=1
3 |
--------------------------------------------------------------------------------
/fast_neural_style/images/content-images/amber.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/92c56fbcf44628930a8ae8b896133dff6a1fb857/fast_neural_style/images/content-images/amber.jpg
--------------------------------------------------------------------------------
/fast_neural_style/images/output-images/amber-candy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/92c56fbcf44628930a8ae8b896133dff6a1fb857/fast_neural_style/images/output-images/amber-candy.jpg
--------------------------------------------------------------------------------
/fast_neural_style/images/output-images/amber-mosaic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/92c56fbcf44628930a8ae8b896133dff6a1fb857/fast_neural_style/images/output-images/amber-mosaic.jpg
--------------------------------------------------------------------------------
/fast_neural_style/images/output-images/amber-rain-princess.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/92c56fbcf44628930a8ae8b896133dff6a1fb857/fast_neural_style/images/output-images/amber-rain-princess.jpg
--------------------------------------------------------------------------------
/fast_neural_style/images/output-images/amber-udnie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/92c56fbcf44628930a8ae8b896133dff6a1fb857/fast_neural_style/images/output-images/amber-udnie.jpg
--------------------------------------------------------------------------------
/fast_neural_style/images/style-images/candy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/92c56fbcf44628930a8ae8b896133dff6a1fb857/fast_neural_style/images/style-images/candy.jpg
--------------------------------------------------------------------------------
/fast_neural_style/images/style-images/mosaic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/92c56fbcf44628930a8ae8b896133dff6a1fb857/fast_neural_style/images/style-images/mosaic.jpg
--------------------------------------------------------------------------------
/fast_neural_style/images/style-images/rain-princess-cropped.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/92c56fbcf44628930a8ae8b896133dff6a1fb857/fast_neural_style/images/style-images/rain-princess-cropped.jpg
--------------------------------------------------------------------------------
/fast_neural_style/images/style-images/rain-princess.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/92c56fbcf44628930a8ae8b896133dff6a1fb857/fast_neural_style/images/style-images/rain-princess.jpg
--------------------------------------------------------------------------------
/fast_neural_style/images/style-images/udnie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/92c56fbcf44628930a8ae8b896133dff6a1fb857/fast_neural_style/images/style-images/udnie.jpg
--------------------------------------------------------------------------------
/fast_neural_style/neural_style/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lanpa/tensorboard-pytorch-examples/92c56fbcf44628930a8ae8b896133dff6a1fb857/fast_neural_style/neural_style/__init__.py
--------------------------------------------------------------------------------
/fast_neural_style/neural_style/transformer_net.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class TransformerNet(torch.nn.Module):
5 | def __init__(self):
6 | super(TransformerNet, self).__init__()
7 | # Initial convolution layers
8 | self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1)
9 | self.in1 = torch.nn.InstanceNorm2d(32, affine=True)
10 | self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2)
11 | self.in2 = torch.nn.InstanceNorm2d(64, affine=True)
12 | self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2)
13 | self.in3 = torch.nn.InstanceNorm2d(128, affine=True)
14 | # Residual layers
15 | self.res1 = ResidualBlock(128)
16 | self.res2 = ResidualBlock(128)
17 | self.res3 = ResidualBlock(128)
18 | self.res4 = ResidualBlock(128)
19 | self.res5 = ResidualBlock(128)
20 | # Upsampling Layers
21 | self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2)
22 | self.in4 = torch.nn.InstanceNorm2d(64, affine=True)
23 | self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2)
24 | self.in5 = torch.nn.InstanceNorm2d(32, affine=True)
25 | self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1)
26 | # Non-linearities
27 | self.relu = torch.nn.ReLU()
28 |
29 | def forward(self, X):
30 | y = self.relu(self.in1(self.conv1(X)))
31 | y = self.relu(self.in2(self.conv2(y)))
32 | y = self.relu(self.in3(self.conv3(y)))
33 | y = self.res1(y)
34 | y = self.res2(y)
35 | y = self.res3(y)
36 | y = self.res4(y)
37 | y = self.res5(y)
38 | y = self.relu(self.in4(self.deconv1(y)))
39 | y = self.relu(self.in5(self.deconv2(y)))
40 | y = self.deconv3(y)
41 | return y
42 |
43 |
44 | class ConvLayer(torch.nn.Module):
45 | def __init__(self, in_channels, out_channels, kernel_size, stride):
46 | super(ConvLayer, self).__init__()
47 | reflection_padding = kernel_size // 2
48 | self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
49 | self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
50 |
51 | def forward(self, x):
52 | out = self.reflection_pad(x)
53 | out = self.conv2d(out)
54 | return out
55 |
56 |
57 | class ResidualBlock(torch.nn.Module):
58 | """ResidualBlock
59 | introduced in: https://arxiv.org/abs/1512.03385
60 | recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
61 | """
62 |
63 | def __init__(self, channels):
64 | super(ResidualBlock, self).__init__()
65 | self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
66 | self.in1 = torch.nn.InstanceNorm2d(channels, affine=True)
67 | self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
68 | self.in2 = torch.nn.InstanceNorm2d(channels, affine=True)
69 | self.relu = torch.nn.ReLU()
70 |
71 | def forward(self, x):
72 | residual = x
73 | out = self.relu(self.in1(self.conv1(x)))
74 | out = self.in2(self.conv2(out))
75 | out = out + residual
76 | return out
77 |
78 |
79 | class UpsampleConvLayer(torch.nn.Module):
80 | """UpsampleConvLayer
81 | Upsamples the input and then does a convolution. This method gives better results
82 | compared to ConvTranspose2d.
83 | ref: http://distill.pub/2016/deconv-checkerboard/
84 | """
85 |
86 | def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
87 | super(UpsampleConvLayer, self).__init__()
88 | self.upsample = upsample
89 | if upsample:
90 | self.upsample_layer = torch.nn.UpsamplingNearest2d(scale_factor=upsample)
91 | reflection_padding = kernel_size // 2
92 | self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
93 | self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
94 |
95 | def forward(self, x):
96 | x_in = x
97 | if self.upsample:
98 | x_in = self.upsample_layer(x_in)
99 | out = self.reflection_pad(x_in)
100 | out = self.conv2d(out)
101 | return out
102 |
--------------------------------------------------------------------------------
/fast_neural_style/neural_style/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from PIL import Image
3 | from torch.autograd import Variable
4 |
5 |
6 | def load_image(filename, size=None, scale=None):
7 | img = Image.open(filename)
8 | if size is not None:
9 | img = img.resize((size, size), Image.ANTIALIAS)
10 | elif scale is not None:
11 | img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS)
12 | return img
13 |
14 |
15 | def save_image(filename, data):
16 | img = data.clone().clamp(0, 255).numpy()
17 | img = img.transpose(1, 2, 0).astype("uint8")
18 | img = Image.fromarray(img)
19 | img.save(filename)
20 |
21 |
22 | def gram_matrix(y):
23 | (b, ch, h, w) = y.size()
24 | features = y.view(b, ch, w * h)
25 | features_t = features.transpose(1, 2)
26 | gram = features.bmm(features_t) / (ch * h * w)
27 | return gram
28 |
29 |
30 | def normalize_batch(batch):
31 | # normalize using imagenet mean and std
32 | mean = batch.data.new(batch.data.size())
33 | std = batch.data.new(batch.data.size())
34 | mean[:, 0, :, :] = 0.485
35 | mean[:, 1, :, :] = 0.456
36 | mean[:, 2, :, :] = 0.406
37 | std[:, 0, :, :] = 0.229
38 | std[:, 1, :, :] = 0.224
39 | std[:, 2, :, :] = 0.225
40 | batch = torch.div(batch, 255.0)
41 | batch -= Variable(mean)
42 | batch = batch / Variable(std)
43 | return batch
44 |
--------------------------------------------------------------------------------
/fast_neural_style/neural_style/vgg.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 | import torch
4 | from torchvision import models
5 |
6 |
7 | class Vgg16(torch.nn.Module):
8 | def __init__(self, requires_grad=False):
9 | super(Vgg16, self).__init__()
10 | vgg_pretrained_features = models.vgg16(pretrained=True).features
11 | self.slice1 = torch.nn.Sequential()
12 | self.slice2 = torch.nn.Sequential()
13 | self.slice3 = torch.nn.Sequential()
14 | self.slice4 = torch.nn.Sequential()
15 | for x in range(4):
16 | self.slice1.add_module(str(x), vgg_pretrained_features[x])
17 | for x in range(4, 9):
18 | self.slice2.add_module(str(x), vgg_pretrained_features[x])
19 | for x in range(9, 16):
20 | self.slice3.add_module(str(x), vgg_pretrained_features[x])
21 | for x in range(16, 23):
22 | self.slice4.add_module(str(x), vgg_pretrained_features[x])
23 | if not requires_grad:
24 | for param in self.parameters():
25 | param.requires_grad = False
26 |
27 | def forward(self, X):
28 | h = self.slice1(X)
29 | h_relu1_2 = h
30 | h = self.slice2(h)
31 | h_relu2_2 = h
32 | h = self.slice3(h)
33 | h_relu3_3 = h
34 | h = self.slice4(h)
35 | h_relu4_3 = h
36 | vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'])
37 | out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3)
38 | return out
39 |
--------------------------------------------------------------------------------
/imagenet/README.md:
--------------------------------------------------------------------------------
1 | # ImageNet training in PyTorch
2 |
3 | This implements training of popular model architectures, such as ResNet, AlexNet, and VGG on the ImageNet dataset.
4 |
5 | ## Requirements
6 |
7 | - Install PyTorch ([pytorch.org](http://pytorch.org))
8 | - `pip install -r requirements.txt`
9 | - Download the ImageNet dataset and move validation images to labeled subfolders
10 |
11 | ## Training
12 |
13 | To train a model, run `main.py` with the desired model architecture and the path to the ImageNet dataset:
14 |
15 | ```bash
16 | python main.py -a resnet18 [imagenet-folder with train and val folders]
17 | ```
18 |
19 | The default learning rate schedule starts at 0.1 and decays by a factor of 10 every 30 epochs. This is appropriate for ResNet and models with batch normalization, but too high for AlexNet and VGG. Use 0.01 as the initial learning rate for AlexNet or VGG:
20 |
21 | ```bash
22 | python main.py -a alexnet --lr 0.01 [imagenet-folder with train and val folders]
23 | ```
24 |
25 | ## Usage
26 |
27 | ```
28 | usage: main.py [-h] [--arch ARCH] [-j N] [--epochs N] [--start-epoch N] [-b N]
29 | [--lr LR] [--momentum M] [--weight-decay W] [--print-freq N]
30 | [--resume PATH] [-e] [--pretrained]
31 | DIR
32 |
33 | PyTorch ImageNet Training
34 |
35 | positional arguments:
36 | DIR path to dataset
37 |
38 | optional arguments:
39 | -h, --help show this help message and exit
40 | --arch ARCH, -a ARCH model architecture: alexnet | resnet | resnet101 |
41 | resnet152 | resnet18 | resnet34 | resnet50 | vgg |
42 | vgg11 | vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn
43 | | vgg19 | vgg19_bn (default: resnet18)
44 | -j N, --workers N number of data loading workers (default: 4)
45 | --epochs N number of total epochs to run
46 | --start-epoch N manual epoch number (useful on restarts)
47 | -b N, --batch-size N mini-batch size (default: 256)
48 | --lr LR, --learning-rate LR
49 | initial learning rate
50 | --momentum M momentum
51 | --weight-decay W, --wd W
52 | weight decay (default: 1e-4)
53 | --print-freq N, -p N print frequency (default: 10)
54 | --resume PATH path to latest checkpoint (default: none)
55 | -e, --evaluate evaluate model on validation set
56 | --pretrained use pre-trained model
57 | ```
58 |
--------------------------------------------------------------------------------
/imagenet/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 |
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/mnist/README.md:
--------------------------------------------------------------------------------
1 | # Basic MNIST Example
2 |
3 | ```bash
4 | pip install -r requirements.txt
5 | python main.py
6 | # CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2
7 | ```
8 |
--------------------------------------------------------------------------------
/mnist/main.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import argparse
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import torch.optim as optim
7 | from torchvision import datasets, transforms
8 | from torch.autograd import Variable
9 | from tensorboardX import SummaryWriter
10 | writer = SummaryWriter('runs')
11 | # Training settings
12 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
13 | parser.add_argument('--batch-size', type=int, default=64, metavar='N',
14 | help='input batch size for training (default: 64)')
15 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
16 | help='input batch size for testing (default: 1000)')
17 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
18 | help='number of epochs to train (default: 10)')
19 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
20 | help='learning rate (default: 0.01)')
21 | parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
22 | help='SGD momentum (default: 0.5)')
23 | parser.add_argument('--no-cuda', action='store_true', default=False,
24 | help='disables CUDA training')
25 | parser.add_argument('--seed', type=int, default=1, metavar='S',
26 | help='random seed (default: 1)')
27 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
28 | help='how many batches to wait before logging training status')
29 | args = parser.parse_args()
30 | args.cuda = not args.no_cuda and torch.cuda.is_available()
31 |
32 | torch.manual_seed(args.seed)
33 | if args.cuda:
34 | torch.cuda.manual_seed(args.seed)
35 |
36 |
37 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
38 | train_loader = torch.utils.data.DataLoader(
39 | datasets.MNIST('../data', train=True, download=True,
40 | transform=transforms.Compose([
41 | transforms.ToTensor(),
42 | transforms.Normalize((0.1307,), (0.3081,))
43 | ])),
44 | batch_size=args.batch_size, shuffle=True, **kwargs)
45 | test_loader = torch.utils.data.DataLoader(
46 | datasets.MNIST('../data', train=False, transform=transforms.Compose([
47 | transforms.ToTensor(),
48 | transforms.Normalize((0.1307,), (0.3081,))
49 | ])),
50 | batch_size=args.batch_size, shuffle=True, **kwargs)
51 |
52 |
53 | class Net(nn.Module):
54 | def __init__(self):
55 | super(Net, self).__init__()
56 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
57 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
58 | self.conv2_drop = nn.Dropout2d()
59 | self.fc1 = nn.Linear(320, 50)
60 | self.fc2 = nn.Linear(50, 10)
61 |
62 | def forward(self, x):
63 | x = F.relu(F.max_pool2d(self.conv1(x), 2))
64 | x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
65 | x = x.view(-1, 320)
66 | x = F.relu(self.fc1(x))
67 | x = F.dropout(x, training=self.training)
68 | x = self.fc2(x)
69 | return F.log_softmax(x)
70 |
71 | model = Net()
72 | if args.cuda:
73 | model.cuda()
74 |
75 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
76 |
77 | def train(epoch):
78 | model.train()
79 | for batch_idx, (data, target) in enumerate(train_loader):
80 | if args.cuda:
81 | data, target = data.cuda(), target.cuda()
82 | data, target = Variable(data), Variable(target)
83 | optimizer.zero_grad()
84 | output = model(data)
85 | loss = F.nll_loss(output, target)
86 | loss.backward()
87 | optimizer.step()
88 | if batch_idx % args.log_interval == 0:
89 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
90 | epoch, batch_idx * len(data), len(train_loader.dataset),
91 | 100. * batch_idx / len(train_loader), loss.data[0]))
92 | niter = epoch*len(train_loader)+batch_idx
93 | writer.add_scalar('Train/Loss', loss.data[0], niter)
94 |
95 | def test():
96 | model.eval()
97 | test_loss = 0
98 | correct = 0
99 | for data, target in test_loader:
100 | if args.cuda:
101 | data, target = data.cuda(), target.cuda()
102 | data, target = Variable(data, volatile=True), Variable(target)
103 | output = model(data)
104 | test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
105 | pred = output.data.max(1)[1] # get the index of the max log-probability
106 | correct += pred.eq(target.data).cpu().sum()
107 |
108 | test_loss /= len(test_loader.dataset)
109 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
110 | test_loss, correct, len(test_loader.dataset),
111 | 100. * correct / len(test_loader.dataset)))
112 |
113 |
114 | for epoch in range(1, args.epochs + 1):
115 | train(epoch)
116 | test()
117 |
--------------------------------------------------------------------------------
/mnist/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 |
--------------------------------------------------------------------------------
/mnist_hogwild/main.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import argparse
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import torch.multiprocessing as mp
7 |
8 | from train import train
9 |
10 | # Training settings
11 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
12 | parser.add_argument('--batch-size', type=int, default=64, metavar='N',
13 | help='input batch size for training (default: 64)')
14 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
15 | help='input batch size for testing (default: 1000)')
16 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
17 | help='number of epochs to train (default: 2)')
18 | parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
19 | help='learning rate (default: 0.01)')
20 | parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
21 | help='SGD momentum (default: 0.5)')
22 | parser.add_argument('--seed', type=int, default=1, metavar='S',
23 | help='random seed (default: 1)')
24 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
25 | help='how many batches to wait before logging training status')
26 | parser.add_argument('--num-processes', type=int, default=2, metavar='N',
27 | help='how many training processes to use (default: 2)')
28 |
29 | class Net(nn.Module):
30 | def __init__(self):
31 | super(Net, self).__init__()
32 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
33 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
34 | self.conv2_drop = nn.Dropout2d()
35 | self.fc1 = nn.Linear(320, 50)
36 | self.fc2 = nn.Linear(50, 10)
37 |
38 | def forward(self, x):
39 | x = F.relu(F.max_pool2d(self.conv1(x), 2))
40 | x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
41 | x = x.view(-1, 320)
42 | x = F.relu(self.fc1(x))
43 | x = F.dropout(x, training=self.training)
44 | x = self.fc2(x)
45 | return F.log_softmax(x)
46 |
47 | if __name__ == '__main__':
48 | args = parser.parse_args()
49 |
50 | torch.manual_seed(args.seed)
51 |
52 | model = Net()
53 | model.share_memory() # gradients are allocated lazily, so they are not shared here
54 |
55 | processes = []
56 | for rank in range(args.num_processes):
57 | p = mp.Process(target=train, args=(rank, args, model))
58 | p.start()
59 | processes.append(p)
60 | for p in processes:
61 | p.join()
62 |
--------------------------------------------------------------------------------
/mnist_hogwild/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 |
--------------------------------------------------------------------------------
/mnist_hogwild/train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import torch.optim as optim
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 | from torchvision import datasets, transforms
7 |
8 | def train(rank, args, model):
9 | torch.manual_seed(args.seed + rank)
10 |
11 | train_loader = torch.utils.data.DataLoader(
12 | datasets.MNIST('../data', train=True, download=True,
13 | transform=transforms.Compose([
14 | transforms.ToTensor(),
15 | transforms.Normalize((0.1307,), (0.3081,))
16 | ])),
17 | batch_size=args.batch_size, shuffle=True, num_workers=1)
18 | test_loader = torch.utils.data.DataLoader(
19 | datasets.MNIST('../data', train=False, transform=transforms.Compose([
20 | transforms.ToTensor(),
21 | transforms.Normalize((0.1307,), (0.3081,))
22 | ])),
23 | batch_size=args.batch_size, shuffle=True, num_workers=1)
24 |
25 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
26 | for epoch in range(1, args.epochs + 1):
27 | train_epoch(epoch, args, model, train_loader, optimizer)
28 | test_epoch(model, test_loader)
29 |
30 |
31 | def train_epoch(epoch, args, model, data_loader, optimizer):
32 | model.train()
33 | pid = os.getpid()
34 | for batch_idx, (data, target) in enumerate(data_loader):
35 | data, target = Variable(data), Variable(target)
36 | optimizer.zero_grad()
37 | output = model(data)
38 | loss = F.nll_loss(output, target)
39 | loss.backward()
40 | optimizer.step()
41 | if batch_idx % args.log_interval == 0:
42 | print('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
43 | pid, epoch, batch_idx * len(data), len(data_loader.dataset),
44 | 100. * batch_idx / len(data_loader), loss.data[0]))
45 |
46 |
47 | def test_epoch(model, data_loader):
48 | model.eval()
49 | test_loss = 0
50 | correct = 0
51 | for data, target in data_loader:
52 | data, target = Variable(data, volatile=True), Variable(target)
53 | output = model(data)
54 | test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
55 | pred = output.data.max(1)[1] # get the index of the max log-probability
56 | correct += pred.eq(target.data).cpu().sum()
57 |
58 | test_loss /= len(data_loader.dataset)
59 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
60 | test_loss, correct, len(data_loader.dataset),
61 | 100. * correct / len(data_loader.dataset)))
62 |
--------------------------------------------------------------------------------
/regression/README.md:
--------------------------------------------------------------------------------
1 | # Linear regression example
2 |
3 | Trains a single fully-connected layer to fit a 4th degree polynomial.
4 |
--------------------------------------------------------------------------------
/regression/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import print_function
3 | from itertools import count
4 |
5 | import torch
6 | import torch.autograd
7 | import torch.nn.functional as F
8 | from torch.autograd import Variable
9 |
10 | POLY_DEGREE = 4
11 | W_target = torch.randn(POLY_DEGREE, 1) * 5
12 | b_target = torch.randn(1) * 5
13 |
14 |
15 | def make_features(x):
16 | """Builds features i.e. a matrix with columns [x, x^2, x^3, x^4]."""
17 | x = x.unsqueeze(1)
18 | return torch.cat([x ** i for i in range(1, POLY_DEGREE+1)], 1)
19 |
20 |
21 | def f(x):
22 | """Approximated function."""
23 | return x.mm(W_target) + b_target[0]
24 |
25 |
26 | def poly_desc(W, b):
27 | """Creates a string description of a polynomial."""
28 | result = 'y = '
29 | for i, w in enumerate(W):
30 | result += '{:+.2f} x^{} '.format(w, len(W) - i)
31 | result += '{:+.2f}'.format(b[0])
32 | return result
33 |
34 |
35 | def get_batch(batch_size=32):
36 | """Builds a batch i.e. (x, f(x)) pair."""
37 | random = torch.randn(batch_size)
38 | x = make_features(random)
39 | y = f(x)
40 | return Variable(x), Variable(y)
41 |
42 |
43 | # Define model
44 | fc = torch.nn.Linear(W_target.size(0), 1)
45 |
46 | for batch_idx in count(1):
47 | # Get data
48 | batch_x, batch_y = get_batch()
49 |
50 | # Reset gradients
51 | fc.zero_grad()
52 |
53 | # Forward pass
54 | output = F.smooth_l1_loss(fc(batch_x), batch_y)
55 | loss = output.data[0]
56 |
57 | # Backward pass
58 | output.backward()
59 |
60 | # Apply gradients
61 | for param in fc.parameters():
62 | param.data.add_(-0.1 * param.grad.data)
63 |
64 | # Stop criterion
65 | if loss < 1e-3:
66 | break
67 |
68 | print('Loss: {:.6f} after {} batches'.format(loss, batch_idx))
69 | print('==> Learned function:\t' + poly_desc(fc.weight.data.view(-1), fc.bias.data))
70 | print('==> Actual function:\t' + poly_desc(W_target.view(-1), b_target))
71 |
--------------------------------------------------------------------------------
/reinforcement_learning/README.md:
--------------------------------------------------------------------------------
1 | # Reinforcement learning training example
2 |
3 | ```bash
4 | pip install -r requirements.txt
5 | # For REINFORCE:
6 | python reinforce.py
7 | # For actor critic:
8 | python actor_critic.py
9 | ```
10 |
--------------------------------------------------------------------------------
/reinforcement_learning/actor_critic.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import gym
3 | import numpy as np
4 | from itertools import count
5 | from collections import namedtuple
6 |
7 | import torch
8 | import torch.nn as nn
9 | import torch.nn.functional as F
10 | import torch.optim as optim
11 | import torch.autograd as autograd
12 | from torch.autograd import Variable
13 |
14 |
15 | parser = argparse.ArgumentParser(description='PyTorch actor-critic example')
16 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
17 | help='discount factor (default: 0.99)')
18 | parser.add_argument('--seed', type=int, default=543, metavar='N',
19 | help='random seed (default: 1)')
20 | parser.add_argument('--render', action='store_true',
21 | help='render the environment')
22 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
23 | help='interval between training status logs (default: 10)')
24 | args = parser.parse_args()
25 |
26 |
27 | env = gym.make('CartPole-v0')
28 | env.seed(args.seed)
29 | torch.manual_seed(args.seed)
30 |
31 |
32 | SavedAction = namedtuple('SavedAction', ['action', 'value'])
33 | class Policy(nn.Module):
34 | def __init__(self):
35 | super(Policy, self).__init__()
36 | self.affine1 = nn.Linear(4, 128)
37 | self.action_head = nn.Linear(128, 2)
38 | self.value_head = nn.Linear(128, 1)
39 |
40 | self.saved_actions = []
41 | self.rewards = []
42 |
43 | def forward(self, x):
44 | x = F.relu(self.affine1(x))
45 | action_scores = self.action_head(x)
46 | state_values = self.value_head(x)
47 | return F.softmax(action_scores), state_values
48 |
49 |
50 | model = Policy()
51 | optimizer = optim.Adam(model.parameters(), lr=3e-2)
52 |
53 |
54 | def select_action(state):
55 | state = torch.from_numpy(state).float().unsqueeze(0)
56 | probs, state_value = model(Variable(state))
57 | action = probs.multinomial()
58 | model.saved_actions.append(SavedAction(action, state_value))
59 | return action.data
60 |
61 |
62 | def finish_episode():
63 | R = 0
64 | saved_actions = model.saved_actions
65 | value_loss = 0
66 | rewards = []
67 | for r in model.rewards[::-1]:
68 | R = r + args.gamma * R
69 | rewards.insert(0, R)
70 | rewards = torch.Tensor(rewards)
71 | rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
72 | for (action, value), r in zip(saved_actions, rewards):
73 | reward = r - value.data[0,0]
74 | action.reinforce(reward)
75 | value_loss += F.smooth_l1_loss(value, Variable(torch.Tensor([r])))
76 | optimizer.zero_grad()
77 | final_nodes = [value_loss] + list(map(lambda p: p.action, saved_actions))
78 | gradients = [torch.ones(1)] + [None] * len(saved_actions)
79 | autograd.backward(final_nodes, gradients)
80 | optimizer.step()
81 | del model.rewards[:]
82 | del model.saved_actions[:]
83 |
84 |
85 | running_reward = 10
86 | for i_episode in count(1):
87 | state = env.reset()
88 | for t in range(10000): # Don't infinite loop while learning
89 | action = select_action(state)
90 | state, reward, done, _ = env.step(action[0,0])
91 | if args.render:
92 | env.render()
93 | model.rewards.append(reward)
94 | if done:
95 | break
96 |
97 | running_reward = running_reward * 0.99 + t * 0.01
98 | finish_episode()
99 | if i_episode % args.log_interval == 0:
100 | print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format(
101 | i_episode, t, running_reward))
102 | if running_reward > 200:
103 | print("Solved! Running reward is now {} and "
104 | "the last episode runs to {} time steps!".format(running_reward, t))
105 | break
106 |
--------------------------------------------------------------------------------
/reinforcement_learning/reinforce.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import gym
3 | import numpy as np
4 | from itertools import count
5 |
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 | import torch.optim as optim
10 | import torch.autograd as autograd
11 | from torch.autograd import Variable
12 |
13 |
14 | parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
15 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
16 | help='discount factor (default: 0.99)')
17 | parser.add_argument('--seed', type=int, default=543, metavar='N',
18 | help='random seed (default: 543)')
19 | parser.add_argument('--render', action='store_true',
20 | help='render the environment')
21 | parser.add_argument('--log_interval', type=int, default=10, metavar='N',
22 | help='interval between training status logs (default: 10)')
23 | args = parser.parse_args()
24 |
25 |
26 | env = gym.make('CartPole-v0')
27 | env.seed(args.seed)
28 | torch.manual_seed(args.seed)
29 |
30 |
31 | class Policy(nn.Module):
32 | def __init__(self):
33 | super(Policy, self).__init__()
34 | self.affine1 = nn.Linear(4, 128)
35 | self.affine2 = nn.Linear(128, 2)
36 |
37 | self.saved_actions = []
38 | self.rewards = []
39 |
40 | def forward(self, x):
41 | x = F.relu(self.affine1(x))
42 | action_scores = self.affine2(x)
43 | return F.softmax(action_scores)
44 |
45 |
46 | policy = Policy()
47 | optimizer = optim.Adam(policy.parameters(), lr=1e-2)
48 |
49 |
50 | def select_action(state):
51 | state = torch.from_numpy(state).float().unsqueeze(0)
52 | probs = policy(Variable(state))
53 | action = probs.multinomial()
54 | policy.saved_actions.append(action)
55 | return action.data
56 |
57 |
58 | def finish_episode():
59 | R = 0
60 | rewards = []
61 | for r in policy.rewards[::-1]:
62 | R = r + args.gamma * R
63 | rewards.insert(0, R)
64 | rewards = torch.Tensor(rewards)
65 | rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
66 | for action, r in zip(policy.saved_actions, rewards):
67 | action.reinforce(r)
68 | optimizer.zero_grad()
69 | autograd.backward(policy.saved_actions, [None for _ in policy.saved_actions])
70 | optimizer.step()
71 | del policy.rewards[:]
72 | del policy.saved_actions[:]
73 |
74 |
75 | running_reward = 10
76 | for i_episode in count(1):
77 | state = env.reset()
78 | for t in range(10000): # Don't infinite loop while learning
79 | action = select_action(state)
80 | state, reward, done, _ = env.step(action[0,0])
81 | if args.render:
82 | env.render()
83 | policy.rewards.append(reward)
84 | if done:
85 | break
86 |
87 | running_reward = running_reward * 0.99 + t * 0.01
88 | finish_episode()
89 | if i_episode % args.log_interval == 0:
90 | print('Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format(
91 | i_episode, t, running_reward))
92 | if running_reward > 200:
93 | print("Solved! Running reward is now {} and "
94 | "the last episode runs to {} time steps!".format(running_reward, t))
95 | break
96 |
--------------------------------------------------------------------------------
/reinforcement_learning/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | numpy
3 | gym
4 |
--------------------------------------------------------------------------------
/snli/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable
4 |
5 |
6 | class Bottle(nn.Module):
7 |
8 | def forward(self, input):
9 | if len(input.size()) <= 2:
10 | return super(Bottle, self).forward(input)
11 | size = input.size()[:2]
12 | out = super(Bottle, self).forward(input.view(size[0]*size[1], -1))
13 | return out.view(size[0], size[1], -1)
14 |
15 |
16 | class Linear(Bottle, nn.Linear):
17 | pass
18 |
19 |
20 | class Encoder(nn.Module):
21 |
22 | def __init__(self, config):
23 | super(Encoder, self).__init__()
24 | self.config = config
25 | input_size = config.d_proj if config.projection else config.d_embed
26 | self.rnn = nn.LSTM(input_size=input_size, hidden_size=config.d_hidden,
27 | num_layers=config.n_layers, dropout=config.dp_ratio,
28 | bidirectional=config.birnn)
29 |
30 | def forward(self, inputs):
31 | batch_size = inputs.size()[1]
32 | state_shape = self.config.n_cells, batch_size, self.config.d_hidden
33 | h0 = c0 = Variable(inputs.data.new(*state_shape).zero_())
34 | outputs, (ht, ct) = self.rnn(inputs, (h0, c0))
35 | return ht[-1] if not self.config.birnn else ht[-2:].transpose(0, 1).contiguous().view(batch_size, -1)
36 |
37 |
38 | class SNLIClassifier(nn.Module):
39 |
40 | def __init__(self, config):
41 | super(SNLIClassifier, self).__init__()
42 | self.config = config
43 | self.embed = nn.Embedding(config.n_embed, config.d_embed)
44 | self.projection = Linear(config.d_embed, config.d_proj)
45 | self.encoder = Encoder(config)
46 | self.dropout = nn.Dropout(p=config.dp_ratio)
47 | self.relu = nn.ReLU()
48 | seq_in_size = 2*config.d_hidden
49 | if self.config.birnn:
50 | seq_in_size *= 2
51 | lin_config = [seq_in_size]*2
52 | self.out = nn.Sequential(
53 | Linear(*lin_config),
54 | self.relu,
55 | self.dropout,
56 | Linear(*lin_config),
57 | self.relu,
58 | self.dropout,
59 | Linear(*lin_config),
60 | self.relu,
61 | self.dropout,
62 | Linear(seq_in_size, config.d_out))
63 |
64 | def forward(self, batch):
65 | prem_embed = self.embed(batch.premise)
66 | hypo_embed = self.embed(batch.hypothesis)
67 | if self.config.fix_emb:
68 | prem_embed = Variable(prem_embed.data)
69 | hypo_embed = Variable(hypo_embed.data)
70 | if self.config.projection:
71 | prem_embed = self.relu(self.projection(prem_embed))
72 | hypo_embed = self.relu(self.projection(hypo_embed))
73 | premise = self.encoder(prem_embed)
74 | hypothesis = self.encoder(hypo_embed)
75 | scores = self.out(torch.cat([premise, hypothesis], 1))
76 | return scores
77 |
--------------------------------------------------------------------------------
/snli/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchtext
3 |
--------------------------------------------------------------------------------
/snli/train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import glob
4 |
5 | import torch
6 | import torch.optim as O
7 | import torch.nn as nn
8 |
9 | from torchtext import data
10 | from torchtext import datasets
11 |
12 | from model import SNLIClassifier
13 | from util import get_args, makedirs
14 |
15 |
16 | args = get_args()
17 | torch.cuda.set_device(args.gpu)
18 |
19 | inputs = data.Field(lower=args.lower)
20 | answers = data.Field(sequential=False)
21 |
22 | train, dev, test = datasets.SNLI.splits(inputs, answers)
23 |
24 | inputs.build_vocab(train, dev, test)
25 | if args.word_vectors:
26 | if os.path.isfile(args.vector_cache):
27 | inputs.vocab.vectors = torch.load(args.vector_cache)
28 | else:
29 | inputs.vocab.load_vectors(wv_dir=args.data_cache, wv_type=args.word_vectors, wv_dim=args.d_embed)
30 | makedirs(os.path.dirname(args.vector_cache))
31 | torch.save(inputs.vocab.vectors, args.vector_cache)
32 | answers.build_vocab(train)
33 |
34 | train_iter, dev_iter, test_iter = data.BucketIterator.splits(
35 | (train, dev, test), batch_size=args.batch_size, device=args.gpu)
36 |
37 | config = args
38 | config.n_embed = len(inputs.vocab)
39 | config.d_out = len(answers.vocab)
40 | config.n_cells = config.n_layers
41 |
42 | # double the number of cells for bidirectional networks
43 | if config.birnn:
44 | config.n_cells *= 2
45 |
46 | if args.resume_snapshot:
47 | model = torch.load(args.resume_snapshot, map_location=lambda storage, locatoin: storage.cuda(args.gpu))
48 | else:
49 | model = SNLIClassifier(config)
50 | if args.word_vectors:
51 | model.embed.weight.data = inputs.vocab.vectors
52 | model.cuda()
53 |
54 | criterion = nn.CrossEntropyLoss()
55 | opt = O.Adam(model.parameters(), lr=args.lr)
56 |
57 | iterations = 0
58 | start = time.time()
59 | best_dev_acc = -1
60 | train_iter.repeat = False
61 | header = ' Time Epoch Iteration Progress (%Epoch) Loss Dev/Loss Accuracy Dev/Accuracy'
62 | dev_log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:8.6f},{:12.4f},{:12.4f}'.split(','))
63 | log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{},{:12.4f},{}'.split(','))
64 | makedirs(args.save_path)
65 | print(header)
66 |
67 | for epoch in range(args.epochs):
68 | train_iter.init_epoch()
69 | n_correct, n_total = 0, 0
70 | for batch_idx, batch in enumerate(train_iter):
71 |
72 | # switch model to training mode, clear gradient accumulators
73 | model.train(); opt.zero_grad()
74 |
75 | iterations += 1
76 |
77 | # forward pass
78 | answer = model(batch)
79 |
80 | # calculate accuracy of predictions in the current batch
81 | n_correct += (torch.max(answer, 1)[1].view(batch.label.size()).data == batch.label.data).sum()
82 | n_total += batch.batch_size
83 | train_acc = 100. * n_correct/n_total
84 |
85 | # calculate loss of the network output with respect to training labels
86 | loss = criterion(answer, batch.label)
87 |
88 | # backpropagate and update optimizer learning rate
89 | loss.backward(); opt.step()
90 |
91 | # checkpoint model periodically
92 | if iterations % args.save_every == 0:
93 | snapshot_prefix = os.path.join(args.save_path, 'snapshot')
94 | snapshot_path = snapshot_prefix + '_acc_{:.4f}_loss_{:.6f}_iter_{}_model.pt'.format(train_acc, loss.data[0], iterations)
95 | torch.save(model, snapshot_path)
96 | for f in glob.glob(snapshot_prefix + '*'):
97 | if f != snapshot_path:
98 | os.remove(f)
99 |
100 | # evaluate performance on validation set periodically
101 | if iterations % args.dev_every == 0:
102 |
103 | # switch model to evaluation mode
104 | model.eval(); dev_iter.init_epoch()
105 |
106 | # calculate accuracy on validation set
107 | n_dev_correct, dev_loss = 0, 0
108 | for dev_batch_idx, dev_batch in enumerate(dev_iter):
109 | answer = model(dev_batch)
110 | n_dev_correct += (torch.max(answer, 1)[1].view(dev_batch.label.size()).data == dev_batch.label.data).sum()
111 | dev_loss = criterion(answer, dev_batch.label)
112 | dev_acc = 100. * n_dev_correct / len(dev)
113 |
114 | print(dev_log_template.format(time.time()-start,
115 | epoch, iterations, 1+batch_idx, len(train_iter),
116 | 100. * (1+batch_idx) / len(train_iter), loss.data[0], dev_loss.data[0], train_acc, dev_acc))
117 |
118 | # update best valiation set accuracy
119 | if dev_acc > best_dev_acc:
120 |
121 | # found a model with better validation set accuracy
122 |
123 | best_dev_acc = dev_acc
124 | snapshot_prefix = os.path.join(args.save_path, 'best_snapshot')
125 | snapshot_path = snapshot_prefix + '_devacc_{}_devloss_{}__iter_{}_model.pt'.format(dev_acc, dev_loss.data[0], iterations)
126 |
127 | # save model, delete previous 'best_snapshot' files
128 | torch.save(model, snapshot_path)
129 | for f in glob.glob(snapshot_prefix + '*'):
130 | if f != snapshot_path:
131 | os.remove(f)
132 |
133 | elif iterations % args.log_every == 0:
134 |
135 | # print progress message
136 | print(log_template.format(time.time()-start,
137 | epoch, iterations, 1+batch_idx, len(train_iter),
138 | 100. * (1+batch_idx) / len(train_iter), loss.data[0], ' '*8, n_correct/n_total*100, ' '*12))
139 |
140 |
141 |
--------------------------------------------------------------------------------
/snli/util.py:
--------------------------------------------------------------------------------
1 | import os
2 | from argparse import ArgumentParser
3 |
4 | def makedirs(name):
5 | """helper function for python 2 and 3 to call os.makedirs()
6 | avoiding an error if the directory to be created already exists"""
7 |
8 | import os, errno
9 |
10 | try:
11 | os.makedirs(name)
12 | except OSError as ex:
13 | if ex.errno == errno.EEXIST and os.path.isdir(name):
14 | # ignore existing directory
15 | pass
16 | else:
17 | # a different error happened
18 | raise
19 |
20 |
21 | def get_args():
22 | parser = ArgumentParser(description='PyTorch/torchtext SNLI example')
23 | parser.add_argument('--epochs', type=int, default=50)
24 | parser.add_argument('--batch_size', type=int, default=128)
25 | parser.add_argument('--d_embed', type=int, default=300)
26 | parser.add_argument('--d_proj', type=int, default=300)
27 | parser.add_argument('--d_hidden', type=int, default=300)
28 | parser.add_argument('--n_layers', type=int, default=1)
29 | parser.add_argument('--log_every', type=int, default=50)
30 | parser.add_argument('--lr', type=float, default=.001)
31 | parser.add_argument('--dev_every', type=int, default=1000)
32 | parser.add_argument('--save_every', type=int, default=1000)
33 | parser.add_argument('--dp_ratio', type=int, default=0.2)
34 | parser.add_argument('--no-bidirectional', action='store_false', dest='birnn')
35 | parser.add_argument('--preserve-case', action='store_false', dest='lower')
36 | parser.add_argument('--no-projection', action='store_false', dest='projection')
37 | parser.add_argument('--train_embed', action='store_false', dest='fix_emb')
38 | parser.add_argument('--gpu', type=int, default=0)
39 | parser.add_argument('--save_path', type=str, default='results')
40 | parser.add_argument('--data_cache', type=str, default=os.path.join(os.getcwd(), '.data_cache'))
41 | parser.add_argument('--vector_cache', type=str, default=os.path.join(os.getcwd(), '.vector_cache/input_vectors.pt'))
42 | parser.add_argument('--word_vectors', type=str, default='glove.42B')
43 | parser.add_argument('--resume_snapshot', type=str, default='')
44 | args = parser.parse_args()
45 | return args
46 |
--------------------------------------------------------------------------------
/super_resolution/README.md:
--------------------------------------------------------------------------------
1 | # Superresolution using an efficient sub-pixel convolutional neural network
2 |
3 | This example illustrates how to use the efficient sub-pixel convolution layer described in ["Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network" - Shi et al.](https://arxiv.org/abs/1609.05158) for increasing spatial resolution within your network for tasks such as superresolution.
4 |
5 | ```
6 | usage: main.py [-h] --upscale_factor UPSCALE_FACTOR [--batchSize BATCHSIZE]
7 | [--testBatchSize TESTBATCHSIZE] [--nEpochs NEPOCHS] [--lr LR]
8 | [--cuda] [--threads THREADS] [--seed SEED]
9 |
10 | PyTorch Super Res Example
11 |
12 | optional arguments:
13 | -h, --help show this help message and exit
14 | --upscale_factor super resolution upscale factor
15 | --batchSize training batch size
16 | --testBatchSize testing batch size
17 | --nEpochs number of epochs to train for
18 | --lr Learning Rate. Default=0.01
19 | --cuda use cuda
20 | --threads number of threads for data loader to use Default=4
21 | --seed random seed to use. Default=123
22 | ```
23 | This example trains a super-resolution network on the [BSD300 dataset](https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/), using crops from the 200 training images, and evaluating on crops of the 100 test images. A snapshot of the model after every epoch with filename model_epoch_.pth
24 |
25 | ## Example Usage:
26 |
27 | ### Train
28 |
29 | `python main.py --upscale_factor 3 --batchSize 4 --testBatchSize 100 --nEpochs 30 --lr 0.001`
30 |
31 | ### Super Resolve
32 | `python super_resolve.py --input_image dataset/BSDS300/images/test/16077.jpg --model model_epoch_500.pth --output_filename out.png`
33 |
--------------------------------------------------------------------------------
/super_resolution/data.py:
--------------------------------------------------------------------------------
1 | from os.path import exists, join, basename
2 | from os import makedirs, remove
3 | from six.moves import urllib
4 | import tarfile
5 | from torchvision.transforms import Compose, CenterCrop, ToTensor, Scale
6 |
7 | from dataset import DatasetFromFolder
8 |
9 |
10 | def download_bsd300(dest="dataset"):
11 | output_image_dir = join(dest, "BSDS300/images")
12 |
13 | if not exists(output_image_dir):
14 | makedirs(dest)
15 | url = "http://www2.eecs.berkeley.edu/Research/Projects/CS/vision/bsds/BSDS300-images.tgz"
16 | print("downloading url ", url)
17 |
18 | data = urllib.request.urlopen(url)
19 |
20 | file_path = join(dest, basename(url))
21 | with open(file_path, 'wb') as f:
22 | f.write(data.read())
23 |
24 | print("Extracting data")
25 | with tarfile.open(file_path) as tar:
26 | for item in tar:
27 | tar.extract(item, dest)
28 |
29 | remove(file_path)
30 |
31 | return output_image_dir
32 |
33 |
34 | def calculate_valid_crop_size(crop_size, upscale_factor):
35 | return crop_size - (crop_size % upscale_factor)
36 |
37 |
38 | def input_transform(crop_size, upscale_factor):
39 | return Compose([
40 | CenterCrop(crop_size),
41 | Scale(crop_size // upscale_factor),
42 | ToTensor(),
43 | ])
44 |
45 |
46 | def target_transform(crop_size):
47 | return Compose([
48 | CenterCrop(crop_size),
49 | ToTensor(),
50 | ])
51 |
52 |
53 | def get_training_set(upscale_factor):
54 | root_dir = download_bsd300()
55 | train_dir = join(root_dir, "train")
56 | crop_size = calculate_valid_crop_size(256, upscale_factor)
57 |
58 | return DatasetFromFolder(train_dir,
59 | input_transform=input_transform(crop_size, upscale_factor),
60 | target_transform=target_transform(crop_size))
61 |
62 |
63 | def get_test_set(upscale_factor):
64 | root_dir = download_bsd300()
65 | test_dir = join(root_dir, "test")
66 | crop_size = calculate_valid_crop_size(256, upscale_factor)
67 |
68 | return DatasetFromFolder(test_dir,
69 | input_transform=input_transform(crop_size, upscale_factor),
70 | target_transform=target_transform(crop_size))
71 |
--------------------------------------------------------------------------------
/super_resolution/dataset.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data as data
2 |
3 | from os import listdir
4 | from os.path import join
5 | from PIL import Image
6 |
7 |
8 | def is_image_file(filename):
9 | return any(filename.endswith(extension) for extension in [".png", ".jpg", ".jpeg"])
10 |
11 |
12 | def load_img(filepath):
13 | img = Image.open(filepath).convert('YCbCr')
14 | y, _, _ = img.split()
15 | return y
16 |
17 |
18 | class DatasetFromFolder(data.Dataset):
19 | def __init__(self, image_dir, input_transform=None, target_transform=None):
20 | super(DatasetFromFolder, self).__init__()
21 | self.image_filenames = [join(image_dir, x) for x in listdir(image_dir) if is_image_file(x)]
22 |
23 | self.input_transform = input_transform
24 | self.target_transform = target_transform
25 |
26 | def __getitem__(self, index):
27 | input = load_img(self.image_filenames[index])
28 | target = input.copy()
29 | if self.input_transform:
30 | input = self.input_transform(input)
31 | if self.target_transform:
32 | target = self.target_transform(target)
33 |
34 | return input, target
35 |
36 | def __len__(self):
37 | return len(self.image_filenames)
38 |
--------------------------------------------------------------------------------
/super_resolution/main.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import argparse
3 | from math import log10
4 |
5 | import torch
6 | import torch.nn as nn
7 | import torch.optim as optim
8 | from torch.autograd import Variable
9 | from torch.utils.data import DataLoader
10 | from model import Net
11 | from data import get_training_set, get_test_set
12 | import torchvision.utils as vutils
13 | from tensorboardX import SummaryWriter
14 | writer = SummaryWriter('runs')
15 | # Training settings
16 | parser = argparse.ArgumentParser(description='PyTorch Super Res Example')
17 | parser.add_argument('--upscale_factor', type=int, required=True, help="super resolution upscale factor")
18 | parser.add_argument('--batchSize', type=int, default=64, help='training batch size')
19 | parser.add_argument('--testBatchSize', type=int, default=10, help='testing batch size')
20 | parser.add_argument('--nEpochs', type=int, default=200, help='number of epochs to train for')
21 | parser.add_argument('--lr', type=float, default=0.01, help='Learning Rate. Default=0.01')
22 | parser.add_argument('--cuda', action='store_true', help='use cuda?')
23 | parser.add_argument('--threads', type=int, default=4, help='number of threads for data loader to use')
24 | parser.add_argument('--seed', type=int, default=123, help='random seed to use. Default=123')
25 | opt = parser.parse_args()
26 |
27 | print(opt)
28 |
29 | cuda = opt.cuda
30 | if cuda and not torch.cuda.is_available():
31 | raise Exception("No GPU found, please run without --cuda")
32 |
33 | torch.manual_seed(opt.seed)
34 | if cuda:
35 | torch.cuda.manual_seed(opt.seed)
36 |
37 | print('===> Loading datasets')
38 | train_set = get_training_set(opt.upscale_factor)
39 | test_set = get_test_set(opt.upscale_factor)
40 | training_data_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batchSize, shuffle=True)
41 | testing_data_loader = DataLoader(dataset=test_set, num_workers=opt.threads, batch_size=opt.testBatchSize, shuffle=False)
42 |
43 | print('===> Building model')
44 | model = Net(upscale_factor=opt.upscale_factor)
45 | criterion = nn.MSELoss()
46 |
47 | if cuda:
48 | model = model.cuda()
49 | criterion = criterion.cuda()
50 |
51 | optimizer = optim.Adam(model.parameters(), lr=opt.lr)
52 |
53 |
54 | def train(epoch):
55 | epoch_loss = 0
56 | for iteration, batch in enumerate(training_data_loader, 1):
57 | input, target = Variable(batch[0]), Variable(batch[1])
58 | if cuda:
59 | input = input.cuda()
60 | target = target.cuda()
61 |
62 | optimizer.zero_grad()
63 | output = model(input)
64 | loss = criterion(output, target)
65 | epoch_loss += loss.data[0]
66 | loss.backward()
67 | optimizer.step()
68 |
69 | print("===> Epoch[{}]({}/{}): Loss: {:.4f}".format(epoch, iteration, len(training_data_loader), loss.data[0]))
70 | niter = epoch*len(training_data_loader)+iteration
71 | writer.add_scalar('Loss', loss.data[0], niter)
72 | writer.add_image('Output', vutils.make_grid(output.data, normalize=True, scale_each=True), niter)
73 | print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, epoch_loss / len(training_data_loader)))
74 |
75 |
76 | def test():
77 | avg_psnr = 0
78 | for batch in testing_data_loader:
79 | input, target = Variable(batch[0]), Variable(batch[1])
80 | if cuda:
81 | input = input.cuda()
82 | target = target.cuda()
83 |
84 | prediction = model(input)
85 | mse = criterion(prediction, target)
86 | psnr = 10 * log10(1 / mse.data[0])
87 | avg_psnr += psnr
88 | print("===> Avg. PSNR: {:.4f} dB".format(avg_psnr / len(testing_data_loader)))
89 |
90 |
91 | def checkpoint(epoch):
92 | model_out_path = "model_epoch_{}.pth".format(epoch)
93 | #torch.save(model, model_out_path)
94 | print("Checkpoint saved to {}".format(model_out_path))
95 |
96 | for epoch in range(1, opt.nEpochs + 1):
97 | train(epoch)
98 | test()
99 | checkpoint(epoch)
100 |
--------------------------------------------------------------------------------
/super_resolution/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.init as init
4 |
5 |
6 | class Net(nn.Module):
7 | def __init__(self, upscale_factor):
8 | super(Net, self).__init__()
9 |
10 | self.relu = nn.ReLU()
11 | self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2))
12 | self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1))
13 | self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1))
14 | self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1))
15 | self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
16 |
17 | self._initialize_weights()
18 |
19 | def forward(self, x):
20 | x = self.relu(self.conv1(x))
21 | x = self.relu(self.conv2(x))
22 | x = self.relu(self.conv3(x))
23 | x = self.pixel_shuffle(self.conv4(x))
24 | return x
25 |
26 | def _initialize_weights(self):
27 | init.orthogonal(self.conv1.weight, init.calculate_gain('relu'))
28 | init.orthogonal(self.conv2.weight, init.calculate_gain('relu'))
29 | init.orthogonal(self.conv3.weight, init.calculate_gain('relu'))
30 | init.orthogonal(self.conv4.weight)
31 |
--------------------------------------------------------------------------------
/super_resolution/super_resolve.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import argparse
3 | import torch
4 | from torch.autograd import Variable
5 | from PIL import Image
6 | from torchvision.transforms import ToTensor
7 |
8 | import numpy as np
9 |
10 | # Training settings
11 | parser = argparse.ArgumentParser(description='PyTorch Super Res Example')
12 | parser.add_argument('--input_image', type=str, required=True, help='input image to use')
13 | parser.add_argument('--model', type=str, required=True, help='model file to use')
14 | parser.add_argument('--output_filename', type=str, help='where to save the output image')
15 | parser.add_argument('--cuda', action='store_true', help='use cuda')
16 | opt = parser.parse_args()
17 |
18 | print(opt)
19 | img = Image.open(opt.input_image).convert('YCbCr')
20 | y, cb, cr = img.split()
21 |
22 | model = torch.load(opt.model)
23 | input = Variable(ToTensor()(y)).view(1, -1, y.size[1], y.size[0])
24 |
25 | if opt.cuda:
26 | model = model.cuda()
27 | input = input.cuda()
28 |
29 | out = model(input)
30 | out = out.cpu()
31 | out_img_y = out.data[0].numpy()
32 | out_img_y *= 255.0
33 | out_img_y = out_img_y.clip(0, 255)
34 | out_img_y = Image.fromarray(np.uint8(out_img_y[0]), mode='L')
35 |
36 | out_img_cb = cb.resize(out_img_y.size, Image.BICUBIC)
37 | out_img_cr = cr.resize(out_img_y.size, Image.BICUBIC)
38 | out_img = Image.merge('YCbCr', [out_img_y, out_img_cb, out_img_cr]).convert('RGB')
39 |
40 | out_img.save(opt.output_filename)
41 | print('output image saved to ', opt.output_filename)
42 |
--------------------------------------------------------------------------------
/time_sequence_prediction/README.md:
--------------------------------------------------------------------------------
1 | # Time Sequence Prediction
2 | This is a toy example for beginners to start with. It is helpful for learning both pytorch and time sequence prediction. Two LSTMCell units are used in this example to learn some sine wave signals starting at different phases. After learning the sine waves, the network tries to predict the signal values in the future. The results is shown in the picture below.
3 |
4 | ## Usage
5 |
6 | ```
7 | python generate_sine_wave.py
8 | python train.py
9 | ```
10 |
11 | ## Result
12 | The initial signal and the predicted results are shown in the image. We first give some initial signals (full line). The network will subsequently give some predicted results (dash line). It can be concluded that the network can generate new sine waves.
13 | 
14 |
--------------------------------------------------------------------------------
/time_sequence_prediction/generate_sine_wave.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 | import torch
4 | T = 20
5 | L = 1000
6 | N = 100
7 | np.random.seed(2)
8 | x = np.empty((N, L), 'int64')
9 | x[:] = np.array(range(L)) + np.random.randint(-4*T, 4*T, N).reshape(N, 1)
10 | data = np.sin(x / 1.0 / T).astype('float64')
11 | torch.save(data, open('traindata.pt', 'wb'))
12 |
13 |
--------------------------------------------------------------------------------
/time_sequence_prediction/train.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import torch
3 | import torch.nn as nn
4 | from torch.autograd import Variable
5 | import torch.optim as optim
6 | import numpy as np
7 | import matplotlib
8 | matplotlib.use('Agg')
9 | import matplotlib.pyplot as plt
10 |
11 | class Sequence(nn.Module):
12 | def __init__(self):
13 | super(Sequence, self).__init__()
14 | self.lstm1 = nn.LSTMCell(1, 51)
15 | self.lstm2 = nn.LSTMCell(51, 1)
16 |
17 | def forward(self, input, future = 0):
18 | outputs = []
19 | h_t = Variable(torch.zeros(input.size(0), 51).double(), requires_grad=False)
20 | c_t = Variable(torch.zeros(input.size(0), 51).double(), requires_grad=False)
21 | h_t2 = Variable(torch.zeros(input.size(0), 1).double(), requires_grad=False)
22 | c_t2 = Variable(torch.zeros(input.size(0), 1).double(), requires_grad=False)
23 |
24 | for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
25 | h_t, c_t = self.lstm1(input_t, (h_t, c_t))
26 | h_t2, c_t2 = self.lstm2(c_t, (h_t2, c_t2))
27 | outputs += [c_t2]
28 | for i in range(future):# if we should predict the future
29 | h_t, c_t = self.lstm1(c_t2, (h_t, c_t))
30 | h_t2, c_t2 = self.lstm2(c_t, (h_t2, c_t2))
31 | outputs += [c_t2]
32 | outputs = torch.stack(outputs, 1).squeeze(2)
33 | return outputs
34 |
35 |
36 |
37 | if __name__ == '__main__':
38 | # set ramdom seed to 0
39 | np.random.seed(0)
40 | torch.manual_seed(0)
41 | # load data and make training set
42 | data = torch.load('traindata.pt')
43 | input = Variable(torch.from_numpy(data[3:, :-1]), requires_grad=False)
44 | target = Variable(torch.from_numpy(data[3:, 1:]), requires_grad=False)
45 | test_input = Variable(torch.from_numpy(data[:3, :-1]), requires_grad=False)
46 | test_target = Variable(torch.from_numpy(data[:3, 1:]), requires_grad=False)
47 | # build the model
48 | seq = Sequence()
49 | seq.double()
50 | criterion = nn.MSELoss()
51 | # use LBFGS as optimizer since we can load the whole data to train
52 | optimizer = optim.LBFGS(seq.parameters())
53 | #begin to train
54 | for i in range(15):
55 | print('STEP: ', i)
56 | def closure():
57 | optimizer.zero_grad()
58 | out = seq(input)
59 | loss = criterion(out, target)
60 | print('loss:', loss.data.numpy()[0])
61 | loss.backward()
62 | return loss
63 | optimizer.step(closure)
64 | # begin to predict
65 | future = 1000
66 | pred = seq(test_input, future = future)
67 | loss = criterion(pred[:, :-future], test_target)
68 | print('test loss:', loss.data.numpy()[0])
69 | y = pred.data.numpy()
70 | # draw the result
71 | plt.figure(figsize=(30,10))
72 | plt.title('Predict future values for time sequences\n(Dashlines are predicted values)', fontsize=30)
73 | plt.xlabel('x', fontsize=20)
74 | plt.ylabel('y', fontsize=20)
75 | plt.xticks(fontsize=20)
76 | plt.yticks(fontsize=20)
77 | def draw(yi, color):
78 | plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0)
79 | plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0)
80 | draw(y[0], 'r')
81 | draw(y[1], 'g')
82 | draw(y[2], 'b')
83 | plt.savefig('predict%d.pdf'%i)
84 | plt.close()
85 |
86 |
--------------------------------------------------------------------------------
/vae/README.md:
--------------------------------------------------------------------------------
1 | # Basic VAE Example
2 |
3 | This is an improved implementation of the paper [Stochastic Gradient VB and the
4 | Variational Auto-Encoder](http://arxiv.org/abs/1312.6114) by Kingma and Welling.
5 | It uses ReLUs and the adam optimizer, instead of sigmoids and adagrad. These changes make the network converge much faster.
6 |
7 | ```bash
8 | pip install -r requirements.txt
9 | python main.py
10 | ```
11 |
--------------------------------------------------------------------------------
/vae/main.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import argparse
3 | import torch
4 | import torch.utils.data
5 | import torch.nn as nn
6 | import torch.optim as optim
7 | from torch.autograd import Variable
8 | from torchvision import datasets, transforms
9 |
10 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
11 | parser.add_argument('--batch-size', type=int, default=128, metavar='N',
12 | help='input batch size for training (default: 64)')
13 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
14 | help='number of epochs to train (default: 2)')
15 | parser.add_argument('--no-cuda', action='store_true', default=False,
16 | help='enables CUDA training')
17 | parser.add_argument('--seed', type=int, default=1, metavar='S',
18 | help='random seed (default: 1)')
19 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
20 | help='how many batches to wait before logging training status')
21 | args = parser.parse_args()
22 | args.cuda = not args.no_cuda and torch.cuda.is_available()
23 |
24 |
25 | torch.manual_seed(args.seed)
26 | if args.cuda:
27 | torch.cuda.manual_seed(args.seed)
28 |
29 |
30 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
31 | train_loader = torch.utils.data.DataLoader(
32 | datasets.MNIST('../data', train=True, download=True,
33 | transform=transforms.ToTensor()),
34 | batch_size=args.batch_size, shuffle=True, **kwargs)
35 | test_loader = torch.utils.data.DataLoader(
36 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
37 | batch_size=args.batch_size, shuffle=True, **kwargs)
38 |
39 |
40 | class VAE(nn.Module):
41 | def __init__(self):
42 | super(VAE, self).__init__()
43 |
44 | self.fc1 = nn.Linear(784, 400)
45 | self.fc21 = nn.Linear(400, 20)
46 | self.fc22 = nn.Linear(400, 20)
47 | self.fc3 = nn.Linear(20, 400)
48 | self.fc4 = nn.Linear(400, 784)
49 |
50 | self.relu = nn.ReLU()
51 | self.sigmoid = nn.Sigmoid()
52 |
53 | def encode(self, x):
54 | h1 = self.relu(self.fc1(x))
55 | return self.fc21(h1), self.fc22(h1)
56 |
57 | def reparametrize(self, mu, logvar):
58 | std = logvar.mul(0.5).exp_()
59 | if args.cuda:
60 | eps = torch.cuda.FloatTensor(std.size()).normal_()
61 | else:
62 | eps = torch.FloatTensor(std.size()).normal_()
63 | eps = Variable(eps)
64 | return eps.mul(std).add_(mu)
65 |
66 | def decode(self, z):
67 | h3 = self.relu(self.fc3(z))
68 | return self.sigmoid(self.fc4(h3))
69 |
70 | def forward(self, x):
71 | mu, logvar = self.encode(x.view(-1, 784))
72 | z = self.reparametrize(mu, logvar)
73 | return self.decode(z), mu, logvar
74 |
75 |
76 | model = VAE()
77 | if args.cuda:
78 | model.cuda()
79 |
80 | reconstruction_function = nn.BCELoss()
81 | reconstruction_function.size_average = False
82 |
83 |
84 | def loss_function(recon_x, x, mu, logvar):
85 | BCE = reconstruction_function(recon_x, x)
86 |
87 | # see Appendix B from VAE paper:
88 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
89 | # https://arxiv.org/abs/1312.6114
90 | # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
91 | KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
92 | KLD = torch.sum(KLD_element).mul_(-0.5)
93 |
94 | return BCE + KLD
95 |
96 |
97 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
98 |
99 |
100 | def train(epoch):
101 | model.train()
102 | train_loss = 0
103 | for batch_idx, (data, _) in enumerate(train_loader):
104 | data = Variable(data)
105 | if args.cuda:
106 | data = data.cuda()
107 | optimizer.zero_grad()
108 | recon_batch, mu, logvar = model(data)
109 | loss = loss_function(recon_batch, data, mu, logvar)
110 | loss.backward()
111 | train_loss += loss.data[0]
112 | optimizer.step()
113 | if batch_idx % args.log_interval == 0:
114 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
115 | epoch, batch_idx * len(data), len(train_loader.dataset),
116 | 100. * batch_idx / len(train_loader),
117 | loss.data[0] / len(data)))
118 |
119 | print('====> Epoch: {} Average loss: {:.4f}'.format(
120 | epoch, train_loss / len(train_loader.dataset)))
121 |
122 |
123 | def test(epoch):
124 | model.eval()
125 | test_loss = 0
126 | for data, _ in test_loader:
127 | if args.cuda:
128 | data = data.cuda()
129 | data = Variable(data, volatile=True)
130 | recon_batch, mu, logvar = model(data)
131 | test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
132 |
133 | test_loss /= len(test_loader.dataset)
134 | print('====> Test set loss: {:.4f}'.format(test_loss))
135 |
136 |
137 | for epoch in range(1, args.epochs + 1):
138 | train(epoch)
139 | test(epoch)
140 |
--------------------------------------------------------------------------------
/vae/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | tqdm
4 | six
5 |
--------------------------------------------------------------------------------
/word_language_model/README.md:
--------------------------------------------------------------------------------
1 | # Word-level language modeling RNN
2 |
3 | This example trains a multi-layer RNN (Elman, GRU, or LSTM) on a language modeling task.
4 | By default, the training script uses the PTB dataset, provided.
5 | The trained model can then be used by the generate script to generate new text.
6 |
7 | ```bash
8 | python main.py --cuda --epochs 6 # Train a LSTM on PTB with CUDA, reaching perplexity of 117.61
9 | python main.py --cuda --epochs 6 --tied # Train a tied LSTM on PTB with CUDA, reaching perplexity of 110.44
10 | python main.py --cuda --tied # Train a tied LSTM on PTB with CUDA for 40 epochs, reaching perplexity of 87.17
11 | python generate.py # Generate samples from the trained LSTM model.
12 | ```
13 |
14 | The model uses the `nn.RNN` module (and its sister modules `nn.GRU` and `nn.LSTM`)
15 | which will automatically use the cuDNN backend if run on CUDA with cuDNN installed.
16 |
17 | During training, if a keyboard interrupt (Ctrl-C) is received,
18 | training is stopped and the current model is evaluted against the test dataset.
19 |
20 | The `main.py` script accepts the following arguments:
21 |
22 | ```bash
23 | optional arguments:
24 | -h, --help show this help message and exit
25 | --data DATA location of the data corpus
26 | --model MODEL type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)
27 | --emsize EMSIZE size of word embeddings
28 | --nhid NHID humber of hidden units per layer
29 | --nlayers NLAYERS number of layers
30 | --lr LR initial learning rate
31 | --clip CLIP gradient clipping
32 | --epochs EPOCHS upper epoch limit
33 | --batch-size N batch size
34 | --bptt BPTT sequence length
35 | --dropout DROPOUT dropout applied to layers (0 = no dropout)
36 | --decay DECAY learning rate decay per epoch
37 | --tied tie the word embedding and softmax weights
38 | --seed SEED random seed
39 | --cuda use CUDA
40 | --log-interval N report interval
41 | --save SAVE path to save the final model
42 | ```
43 |
44 | With these arguments, a variety of models can be tested.
45 | As an example, the following arguments produce slower but better models:
46 |
47 | ```bash
48 | python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 # Test perplexity of 80.97
49 | python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 --tied # Test perplexity of 75.96
50 | python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 # Test perplexity of 77.42
51 | python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 --tied # Test perplexity of 72.30
52 | ```
53 |
54 | These perplexities are equal or better than
55 | [Recurrent Neural Network Regularization (Zaremba et al. 2014)](https://arxiv.org/pdf/1409.2329.pdf)
56 | and are similar to [Using the Output Embedding to Improve Language Models (Press & Wolf 2016](https://arxiv.org/abs/1608.05859) and [Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling (Inan et al. 2016)](https://arxiv.org/pdf/1611.01462.pdf), though both of these papers have improved perplexities by using a form of recurrent dropout [(variational dropout)](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks).
57 |
--------------------------------------------------------------------------------
/word_language_model/data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 |
4 | class Dictionary(object):
5 | def __init__(self):
6 | self.word2idx = {}
7 | self.idx2word = []
8 |
9 | def add_word(self, word):
10 | if word not in self.word2idx:
11 | self.idx2word.append(word)
12 | self.word2idx[word] = len(self.idx2word) - 1
13 | return self.word2idx[word]
14 |
15 | def __len__(self):
16 | return len(self.idx2word)
17 |
18 |
19 | class Corpus(object):
20 | def __init__(self, path):
21 | self.dictionary = Dictionary()
22 | self.train = self.tokenize(os.path.join(path, 'train.txt'))
23 | self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
24 | self.test = self.tokenize(os.path.join(path, 'test.txt'))
25 |
26 | def tokenize(self, path):
27 | """Tokenizes a text file."""
28 | assert os.path.exists(path)
29 | # Add words to the dictionary
30 | with open(path, 'r') as f:
31 | tokens = 0
32 | for line in f:
33 | words = line.split() + ['']
34 | tokens += len(words)
35 | for word in words:
36 | self.dictionary.add_word(word)
37 |
38 | # Tokenize file content
39 | with open(path, 'r') as f:
40 | ids = torch.LongTensor(tokens)
41 | token = 0
42 | for line in f:
43 | words = line.split() + ['']
44 | for word in words:
45 | ids[token] = self.dictionary.word2idx[word]
46 | token += 1
47 |
48 | return ids
49 |
--------------------------------------------------------------------------------
/word_language_model/generate.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Language Modeling on Penn Tree Bank
3 | #
4 | # This file generates new sentences sampled from the language model
5 | #
6 | ###############################################################################
7 |
8 | import argparse
9 |
10 | import torch
11 | from torch.autograd import Variable
12 |
13 | import data
14 |
15 | parser = argparse.ArgumentParser(description='PyTorch PTB Language Model')
16 |
17 | # Model parameters.
18 | parser.add_argument('--data', type=str, default='./data/penn',
19 | help='location of the data corpus')
20 | parser.add_argument('--checkpoint', type=str, default='./model.pt',
21 | help='model checkpoint to use')
22 | parser.add_argument('--outf', type=str, default='generated.txt',
23 | help='output file for generated text')
24 | parser.add_argument('--words', type=int, default='1000',
25 | help='number of words to generate')
26 | parser.add_argument('--seed', type=int, default=1111,
27 | help='random seed')
28 | parser.add_argument('--cuda', action='store_true',
29 | help='use CUDA')
30 | parser.add_argument('--temperature', type=float, default=1.0,
31 | help='temperature - higher will increase diversity')
32 | parser.add_argument('--log-interval', type=int, default=100,
33 | help='reporting interval')
34 | args = parser.parse_args()
35 |
36 | # Set the random seed manually for reproducibility.
37 | torch.manual_seed(args.seed)
38 | if torch.cuda.is_available():
39 | if not args.cuda:
40 | print("WARNING: You have a CUDA device, so you should probably run with --cuda")
41 | else:
42 | torch.cuda.manual_seed(args.seed)
43 |
44 | if args.temperature < 1e-3:
45 | parser.error("--temperature has to be greater or equal 1e-3")
46 |
47 | with open(args.checkpoint, 'rb') as f:
48 | model = torch.load(f)
49 | model.eval()
50 |
51 | if args.cuda:
52 | model.cuda()
53 | else:
54 | model.cpu()
55 |
56 | corpus = data.Corpus(args.data)
57 | ntokens = len(corpus.dictionary)
58 | hidden = model.init_hidden(1)
59 | input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True)
60 | if args.cuda:
61 | input.data = input.data.cuda()
62 |
63 | with open(args.outf, 'w') as outf:
64 | for i in range(args.words):
65 | output, hidden = model(input, hidden)
66 | word_weights = output.squeeze().data.div(args.temperature).exp().cpu()
67 | word_idx = torch.multinomial(word_weights, 1)[0]
68 | input.data.fill_(word_idx)
69 | word = corpus.dictionary.idx2word[word_idx]
70 |
71 | outf.write(word + ('\n' if i % 20 == 19 else ' '))
72 |
73 | if i % args.log_interval == 0:
74 | print('| Generated {}/{} words'.format(i, args.words))
75 |
--------------------------------------------------------------------------------
/word_language_model/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import time
3 | import math
4 | import torch
5 | import torch.nn as nn
6 | from torch.autograd import Variable
7 |
8 | import data
9 | import model
10 |
11 | parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model')
12 | parser.add_argument('--data', type=str, default='./data/penn',
13 | help='location of the data corpus')
14 | parser.add_argument('--model', type=str, default='LSTM',
15 | help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)')
16 | parser.add_argument('--emsize', type=int, default=200,
17 | help='size of word embeddings')
18 | parser.add_argument('--nhid', type=int, default=200,
19 | help='number of hidden units per layer')
20 | parser.add_argument('--nlayers', type=int, default=2,
21 | help='number of layers')
22 | parser.add_argument('--lr', type=float, default=20,
23 | help='initial learning rate')
24 | parser.add_argument('--clip', type=float, default=0.25,
25 | help='gradient clipping')
26 | parser.add_argument('--epochs', type=int, default=40,
27 | help='upper epoch limit')
28 | parser.add_argument('--batch_size', type=int, default=20, metavar='N',
29 | help='batch size')
30 | parser.add_argument('--bptt', type=int, default=35,
31 | help='sequence length')
32 | parser.add_argument('--dropout', type=float, default=0.2,
33 | help='dropout applied to layers (0 = no dropout)')
34 | parser.add_argument('--tied', action='store_true',
35 | help='tie the word embedding and softmax weights')
36 | parser.add_argument('--seed', type=int, default=1111,
37 | help='random seed')
38 | parser.add_argument('--cuda', action='store_true',
39 | help='use CUDA')
40 | parser.add_argument('--log-interval', type=int, default=200, metavar='N',
41 | help='report interval')
42 | parser.add_argument('--save', type=str, default='model.pt',
43 | help='path to save the final model')
44 | args = parser.parse_args()
45 |
46 | # Set the random seed manually for reproducibility.
47 | torch.manual_seed(args.seed)
48 | if torch.cuda.is_available():
49 | if not args.cuda:
50 | print("WARNING: You have a CUDA device, so you should probably run with --cuda")
51 | else:
52 | torch.cuda.manual_seed(args.seed)
53 |
54 | ###############################################################################
55 | # Load data
56 | ###############################################################################
57 |
58 | corpus = data.Corpus(args.data)
59 |
60 | def batchify(data, bsz):
61 | # Work out how cleanly we can divide the dataset into bsz parts.
62 | nbatch = data.size(0) // bsz
63 | # Trim off any extra elements that wouldn't cleanly fit (remainders).
64 | data = data.narrow(0, 0, nbatch * bsz)
65 | # Evenly divide the data across the bsz batches.
66 | data = data.view(bsz, -1).t().contiguous()
67 | if args.cuda:
68 | data = data.cuda()
69 | return data
70 |
71 | eval_batch_size = 10
72 | train_data = batchify(corpus.train, args.batch_size)
73 | val_data = batchify(corpus.valid, eval_batch_size)
74 | test_data = batchify(corpus.test, eval_batch_size)
75 |
76 | ###############################################################################
77 | # Build the model
78 | ###############################################################################
79 |
80 | ntokens = len(corpus.dictionary)
81 | model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied)
82 | if args.cuda:
83 | model.cuda()
84 |
85 | criterion = nn.CrossEntropyLoss()
86 |
87 | ###############################################################################
88 | # Training code
89 | ###############################################################################
90 |
91 | def repackage_hidden(h):
92 | """Wraps hidden states in new Variables, to detach them from their history."""
93 | if type(h) == Variable:
94 | return Variable(h.data)
95 | else:
96 | return tuple(repackage_hidden(v) for v in h)
97 |
98 |
99 | def get_batch(source, i, evaluation=False):
100 | seq_len = min(args.bptt, len(source) - 1 - i)
101 | data = Variable(source[i:i+seq_len], volatile=evaluation)
102 | target = Variable(source[i+1:i+1+seq_len].view(-1))
103 | return data, target
104 |
105 |
106 | def evaluate(data_source):
107 | # Turn on evaluation mode which disables dropout.
108 | model.eval()
109 | total_loss = 0
110 | ntokens = len(corpus.dictionary)
111 | hidden = model.init_hidden(eval_batch_size)
112 | for i in range(0, data_source.size(0) - 1, args.bptt):
113 | data, targets = get_batch(data_source, i, evaluation=True)
114 | output, hidden = model(data, hidden)
115 | output_flat = output.view(-1, ntokens)
116 | total_loss += len(data) * criterion(output_flat, targets).data
117 | hidden = repackage_hidden(hidden)
118 | return total_loss[0] / len(data_source)
119 |
120 |
121 | def train():
122 | # Turn on training mode which enables dropout.
123 | model.train()
124 | total_loss = 0
125 | start_time = time.time()
126 | ntokens = len(corpus.dictionary)
127 | hidden = model.init_hidden(args.batch_size)
128 | for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
129 | data, targets = get_batch(train_data, i)
130 | # Starting each batch, we detach the hidden state from how it was previously produced.
131 | # If we didn't, the model would try backpropagating all the way to start of the dataset.
132 | hidden = repackage_hidden(hidden)
133 | model.zero_grad()
134 | output, hidden = model(data, hidden)
135 | loss = criterion(output.view(-1, ntokens), targets)
136 | loss.backward()
137 |
138 | # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
139 | torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
140 | for p in model.parameters():
141 | p.data.add_(-lr, p.grad.data)
142 |
143 | total_loss += loss.data
144 |
145 | if batch % args.log_interval == 0 and batch > 0:
146 | cur_loss = total_loss[0] / args.log_interval
147 | elapsed = time.time() - start_time
148 | print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
149 | 'loss {:5.2f} | ppl {:8.2f}'.format(
150 | epoch, batch, len(train_data) // args.bptt, lr,
151 | elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
152 | total_loss = 0
153 | start_time = time.time()
154 |
155 | # Loop over epochs.
156 | lr = args.lr
157 | best_val_loss = None
158 |
159 | # At any point you can hit Ctrl + C to break out of training early.
160 | try:
161 | for epoch in range(1, args.epochs+1):
162 | epoch_start_time = time.time()
163 | train()
164 | val_loss = evaluate(val_data)
165 | print('-' * 89)
166 | print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
167 | 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
168 | val_loss, math.exp(val_loss)))
169 | print('-' * 89)
170 | # Save the model if the validation loss is the best we've seen so far.
171 | if not best_val_loss or val_loss < best_val_loss:
172 | with open(args.save, 'wb') as f:
173 | torch.save(model, f)
174 | best_val_loss = val_loss
175 | else:
176 | # Anneal the learning rate if no improvement has been seen in the validation dataset.
177 | lr /= 4.0
178 | except KeyboardInterrupt:
179 | print('-' * 89)
180 | print('Exiting from training early')
181 |
182 | # Load the best saved model.
183 | with open(args.save, 'rb') as f:
184 | model = torch.load(f)
185 |
186 | # Run on test data.
187 | test_loss = evaluate(test_data)
188 | print('=' * 89)
189 | print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
190 | test_loss, math.exp(test_loss)))
191 | print('=' * 89)
192 |
--------------------------------------------------------------------------------
/word_language_model/model.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from torch.autograd import Variable
3 |
4 | class RNNModel(nn.Module):
5 | """Container module with an encoder, a recurrent module, and a decoder."""
6 |
7 | def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False):
8 | super(RNNModel, self).__init__()
9 | self.drop = nn.Dropout(dropout)
10 | self.encoder = nn.Embedding(ntoken, ninp)
11 | if rnn_type in ['LSTM', 'GRU']:
12 | self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
13 | else:
14 | try:
15 | nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
16 | except KeyError:
17 | raise ValueError( """An invalid option for `--model` was supplied,
18 | options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
19 | self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
20 | self.decoder = nn.Linear(nhid, ntoken)
21 |
22 | # Optionally tie weights as in:
23 | # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
24 | # https://arxiv.org/abs/1608.05859
25 | # and
26 | # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
27 | # https://arxiv.org/abs/1611.01462
28 | if tie_weights:
29 | if nhid != ninp:
30 | raise ValueError('When using the tied flag, nhid must be equal to emsize')
31 | self.decoder.weight = self.encoder.weight
32 |
33 | self.init_weights()
34 |
35 | self.rnn_type = rnn_type
36 | self.nhid = nhid
37 | self.nlayers = nlayers
38 |
39 | def init_weights(self):
40 | initrange = 0.1
41 | self.encoder.weight.data.uniform_(-initrange, initrange)
42 | self.decoder.bias.data.fill_(0)
43 | self.decoder.weight.data.uniform_(-initrange, initrange)
44 |
45 | def forward(self, input, hidden):
46 | emb = self.drop(self.encoder(input))
47 | output, hidden = self.rnn(emb, hidden)
48 | output = self.drop(output)
49 | decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
50 | return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
51 |
52 | def init_hidden(self, bsz):
53 | weight = next(self.parameters()).data
54 | if self.rnn_type == 'LSTM':
55 | return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()),
56 | Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()))
57 | else:
58 | return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())
59 |
--------------------------------------------------------------------------------
/word_language_model/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 |
--------------------------------------------------------------------------------