├── floyd_requirements.txt
├── .floydignore
├── LICENSE
├── data.py
├── .gitignore
├── model.py
├── generate.py
├── app.py
├── README.md
└── main.py


/floyd_requirements.txt:
--------------------------------------------------------------------------------
1 | flask
2 | 


--------------------------------------------------------------------------------
/.floydignore:
--------------------------------------------------------------------------------
 1 | 
 2 | # Directories and files to ignore when uploading code to floyd
 3 | 
 4 | FLOYD_README.md
 5 | .git
 6 | .eggs
 7 | eggs
 8 | lib
 9 | lib64
10 | parts
11 | sdist
12 | core
13 | var
14 | *.pyc
15 | *.swp
16 | .DS_Store
17 | data/
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017,
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | 
 4 | class Dictionary(object):
 5 |     """Build word2idx and idx2word from Corpus(train/val/test)"""
 6 |     def __init__(self):
 7 |         self.word2idx = {} # word: index
 8 |         self.idx2word = [] # position(index): word
 9 | 
10 |     def add_word(self, word):
11 |         """Create/Update word2idx and idx2word"""
12 |         if word not in self.word2idx:
13 |             self.idx2word.append(word)
14 |             self.word2idx[word] = len(self.idx2word) - 1
15 |         return self.word2idx[word]
16 | 
17 |     def __len__(self):
18 |         return len(self.idx2word)
19 | 
20 | 
21 | class Corpus(object):
22 |     """Corpus Tokenizer"""
23 |     def __init__(self, path):
24 |         self.dictionary = Dictionary()
25 |         self.train = self.tokenize(os.path.join(path, 'train.txt'))
26 |         self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
27 |         self.test = self.tokenize(os.path.join(path, 'test.txt'))
28 | 
29 |     def tokenize(self, path):
30 |         """Tokenizes a text file."""
31 |         assert os.path.exists(path)
32 |         # Add words to the dictionary
33 |         with open(path, 'r') as f:
34 |             tokens = 0
35 |             for line in f:
36 |                 # line to list of token + eos
37 |                 words = line.split() + ['<eos>']
38 |                 tokens += len(words)
39 |                 for word in words:
40 |                     self.dictionary.add_word(word)
41 | 
42 |         # Tokenize file content
43 |         with open(path, 'r') as f:
44 |             ids = torch.LongTensor(tokens)
45 |             token = 0
46 |             for line in f:
47 |                 words = line.split() + ['<eos>']
48 |                 for word in words:
49 |                     ids[token] = self.dictionary.word2idx[word]
50 |                     token += 1
51 | 
52 |         return ids
53 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/python
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | *.pyc
 10 | 
 11 | # Floyd things
 12 | FLOYD_README.md
 13 | .floydexpt
 14 | .DS_Store
 15 | core
 16 | data/
 17 | 
 18 | # C extensions
 19 | *.so
 20 | 
 21 | # Distribution / packaging
 22 | .Python
 23 | build/
 24 | develop-eggs/
 25 | dist/
 26 | downloads/
 27 | eggs/
 28 | .eggs/
 29 | lib/
 30 | lib64/
 31 | parts/
 32 | sdist/
 33 | var/
 34 | wheels/
 35 | *.egg-info/
 36 | .installed.cfg
 37 | *.egg
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *.cover
 58 | .hypothesis/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # celery beat schedule file
 88 | celerybeat-schedule
 89 | 
 90 | # SageMath parsed files
 91 | *.sage.py
 92 | 
 93 | # Environments
 94 | .env
 95 | .venv
 96 | env/
 97 | venv/
 98 | ENV/
 99 | env.bak/
100 | venv.bak/
101 | 
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 | 
106 | # Rope project settings
107 | .ropeproject
108 | 
109 | # mkdocs documentation
110 | /site
111 | 
112 | # mypy
113 | .mypy_cache/
114 | 
115 | # End of https://www.gitignore.io/api/python
116 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Variable
 3 | 
 4 | class RNNModel(nn.Module):
 5 |     """Container module with an encoder, a recurrent module, and a decoder."""
 6 | 
 7 |     def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False):
 8 |         super(RNNModel, self).__init__()
 9 |         self.drop = nn.Dropout(dropout)
10 |         self.encoder = nn.Embedding(ntoken, ninp) # Token2Embeddings
11 |         if rnn_type in ['LSTM', 'GRU']:
12 |             self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
13 |         else:
14 |             try:
15 |                 nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
16 |             except KeyError:
17 |                 raise ValueError( """An invalid option for `--model` was supplied,
18 |                                  options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
19 |             self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
20 |         self.decoder = nn.Linear(nhid, ntoken)
21 | 
22 |         # Optionally tie weights as in:
23 |         # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
24 |         # https://arxiv.org/abs/1608.05859
25 |         # and
26 |         # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
27 |         # https://arxiv.org/abs/1611.01462
28 |         if tie_weights:
29 |             if nhid != ninp:
30 |                 raise ValueError('When using the tied flag, nhid must be equal to emsize')
31 |             self.decoder.weight = self.encoder.weight
32 | 
33 |         self.init_weights()
34 | 
35 |         self.rnn_type = rnn_type
36 |         self.nhid = nhid
37 |         self.nlayers = nlayers
38 | 
39 |     def init_weights(self):
40 |         initrange = 0.1
41 |         self.encoder.weight.data.uniform_(-initrange, initrange)
42 |         self.decoder.bias.data.fill_(0)
43 |         self.decoder.weight.data.uniform_(-initrange, initrange)
44 | 
45 |     def forward(self, input, hidden):
46 |         emb = self.drop(self.encoder(input))
47 |         output, hidden = self.rnn(emb, hidden)
48 |         output = self.drop(output)
49 |         decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
50 |         return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
51 | 
52 |     def init_hidden(self, bsz):
53 |         weight = next(self.parameters()).data
54 |         if self.rnn_type == 'LSTM':
55 |             return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()),
56 |                     Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()))
57 |         else:
58 |             return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())
59 | 


--------------------------------------------------------------------------------
/generate.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Language Modeling on Penn Tree Bank
 3 | #
 4 | # This file generates new sentences sampled from the language model
 5 | #
 6 | ###############################################################################
 7 | 
 8 | import argparse
 9 | 
10 | import torch
11 | from torch.autograd import Variable
12 | 
13 | import data
14 | 
15 | parser = argparse.ArgumentParser(description='PyTorch PTB Language Model')
16 | 
17 | # Model parameters.
18 | parser.add_argument('--data', type=str, default='/input',
19 |                     help='location of the data corpus')
20 | parser.add_argument('--checkpoint', type=str, default='/model/model.pt',
21 |                     help='model checkpoint to use')
22 | parser.add_argument('--outf', type=str, default='/output/generated.txt',
23 |                     help='output file for generated text')
24 | parser.add_argument('--words', type=int, default='1000',
25 |                     help='number of words to generate')
26 | parser.add_argument('--seed', type=int, default=1111,
27 |                     help='random seed')
28 | parser.add_argument('--cuda', action='store_true',
29 |                     help='use CUDA')
30 | parser.add_argument('--temperature', type=float, default=1.0,
31 |                     help='temperature - higher will increase diversity')
32 | parser.add_argument('--log-interval', type=int, default=100,
33 |                     help='reporting interval')
34 | args = parser.parse_args()
35 | 
36 | # Set the random seed manually for reproducibility.
37 | torch.manual_seed(args.seed)
38 | if torch.cuda.is_available():
39 |     if not args.cuda:
40 |         print("WARNING: You have a CUDA device, so you should probably run with --cuda")
41 |     else:
42 |         torch.cuda.manual_seed(args.seed)
43 | 
44 | if args.temperature < 1e-3:
45 |     parser.error("--temperature has to be greater or equal 1e-3")
46 | 
47 | # Load checkpoint
48 | if args.checkpoint != '':
49 |     if args.cuda:
50 |         model = torch.load(args.checkpoint)
51 |     else:
52 |         # Load GPU model on CPU
53 |         model = torch.load(args.checkpoint, map_location=lambda storage, loc: storage)
54 | model.eval()
55 | 
56 | if args.cuda:
57 |     model.cuda()
58 | else:
59 |     model.cpu()
60 | 
61 | corpus = data.Corpus(args.data)
62 | ntokens = len(corpus.dictionary)
63 | hidden = model.init_hidden(1)
64 | input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True)
65 | if args.cuda:
66 |     input.data = input.data.cuda()
67 | 
68 | with open(args.outf, 'w') as outf:
69 |     for i in range(args.words):
70 |         output, hidden = model(input, hidden)
71 |         word_weights = output.squeeze().data.div(args.temperature).exp().cpu()
72 |         word_idx = torch.multinomial(word_weights, 1)[0]
73 |         input.data.fill_(word_idx)
74 |         word = corpus.dictionary.idx2word[word_idx]
75 |         # word = '\n' if word == "<eos>" else word
76 | 
77 |         outf.write(word + ('\n' if i % 20 == 19 else ' '))
78 | 
79 |         if i % args.log_interval == 0:
80 |             print('| Generated {}/{} words'.format(i, args.words))
81 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Flask Serving
 3 | This file is a sample flask app that can be used to test your model with an REST API.
 4 | This app does the following:
 5 |     - Look for a number of word and the temperature
 6 |     - Returns the evaluation
 7 | 
 8 | POST req:
 9 |     parameter:
10 |         - words, required, how many words to generate
11 |         - temperature, optional, degree of diversity
12 | 
13 | """
14 | import os
15 | from flask import Flask, send_file, request
16 | from werkzeug.exceptions import BadRequest
17 | import torch
18 | from torch.autograd import Variable
19 | import data
20 | 
21 | DATA_PATH = '/input'
22 | CHECKPOINT = '/model/model.pt'
23 | OUTPUT_PATH = '/output/generated.txt'
24 | LOG_INTERVAL = 50
25 | print('Loading checkpoint: %s' % CHECKPOINT)
26 | 
27 | app = Flask('Language-Model-Text-Generator')
28 | 
29 | # Check if ckp exists
30 | try:
31 |     os.path.isfile(CHECKPOINT)
32 | except IOError as e:
33 |         # Does not exist OR no read permissions
34 |     print ("Unable to open ckp file")
35 | 
36 | cuda = torch.cuda.is_available()
37 | 
38 | # Load checkpoint
39 | if cuda:
40 |     model = torch.load(CHECKPOINT)
41 | else:
42 |     # Load GPU model on CPU
43 |     model = torch.load(CHECKPOINT, map_location=lambda storage, loc: storage)
44 | model.eval()
45 | 
46 | if cuda:
47 |     model.cuda()
48 | else:
49 |     model.cpu()
50 | 
51 | # Load Data
52 | corpus = data.Corpus(DATA_PATH)
53 | ntokens = len(corpus.dictionary)
54 | 
55 | def generate(words, temperature):
56 |     """Generate number of words with the given temperature"""
57 |     hidden = model.init_hidden(1)
58 |     input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True)
59 |     if cuda:
60 |         input.data = input.data.cuda()
61 | 
62 |     # Generate
63 |     with open(OUTPUT_PATH, 'w') as outf:
64 |         for i in range(words):
65 |             output, hidden = model(input, hidden)
66 |             word_weights = output.squeeze().data.div(temperature).exp().cpu()
67 |             word_idx = torch.multinomial(word_weights, 1)[0]
68 |             input.data.fill_(word_idx)
69 |             word = corpus.dictionary.idx2word[word_idx]
70 |             # word = '\n' if word == "<eos>" else word
71 |             outf.write(word + ('\n' if i % 20 == 19 else ' '))
72 | 
73 |             if i % LOG_INTERVAL == 0:
74 |                 print('| Generated {}/{} words'.format(i, words))
75 | 
76 | # Return an Text Generated
77 | @app.route('/<path:path>', methods=['POST'])
78 | def geneator_handler(path):
79 |     # Get ckp
80 |     words = int(request.form.get("words"))
81 |     if words is None:
82 |         return BadRequest("You must provide a words parameter")
83 |     # if words is not int:
84 |     #     return BadRequest("Invalid words type")
85 |     temp = request.form.get("temperature") or 1.0
86 |     temp = float(temp)
87 |     # if type(temp) is not float or type(temp) is not int:
88 |     #     return BadRequest("Invalid temperature type")
89 |     if temp < 1e-3:
90 |         return BadRequest("Temperature has to be greater or equal 1e-3")
91 |     print (words, temp)
92 |     # Generate word
93 |     generate(words, temp)
94 |     # Return the text generated
95 |     return send_file(OUTPUT_PATH, mimetype='text/plain')
96 | 
97 | if __name__ == '__main__':
98 |     app.run(host='0.0.0.0')
99 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Word-level language modeling RNN
  2 | 
  3 | This example trains a multi-layer RNN (Elman, GRU, or LSTM) on a language modeling task.
  4 | By default, the training script uses the PTB dataset, provided.
  5 | The trained model can then be used by the generate script to generate new text.
  6 | This is a porting of [pytorch/examples/word_language_model](https://github.com/pytorch/examples/tree/master/word_language_model) making it usables on [FloydHub](https://www.floydhub.com/).
  7 | 
  8 | ## Usage
  9 | 
 10 | The `main.py` script accepts the following arguments:
 11 | 
 12 | ```bash
 13 | optional arguments:
 14 |   -h, --help         show this help message and exit
 15 |   --data DATA        location of the data corpus
 16 |   --model MODEL      type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)
 17 |   --emsize EMSIZE    size of word embeddings
 18 |   --nhid NHID        number of hidden units per layer
 19 |   --nlayers NLAYERS  number of layers
 20 |   --lr LR            initial learning rate
 21 |   --clip CLIP        gradient clipping
 22 |   --epochs EPOCHS    upper epoch limit
 23 |   --batch-size N     batch size
 24 |   --bptt BPTT        sequence length
 25 |   --dropout DROPOUT  dropout applied to layers (0 = no dropout)
 26 |   --decay DECAY      learning rate decay per epoch
 27 |   --tied             tie the word embedding and softmax weights
 28 |   --seed SEED        random seed
 29 |   --cuda             use CUDA
 30 |   --log-interval N   report interval
 31 |   --save SAVE        path to save the final model
 32 | ```
 33 | 
 34 | With these arguments, a variety of models can be tested.
 35 | As an example, the following arguments produce slower but better models:
 36 | 
 37 | ```bash
 38 | python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40           # Test perplexity of 80.97
 39 | python main.py --cuda --emsize 650 --nhid 650 --dropout 0.5 --epochs 40 --tied    # Test perplexity of 75.96
 40 | python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40        # Test perplexity of 77.42
 41 | python main.py --cuda --emsize 1500 --nhid 1500 --dropout 0.65 --epochs 40 --tied # Test perplexity of 72.30
 42 | ```
 43 | 
 44 | These perplexities are equal or better than
 45 | [Recurrent Neural Network Regularization (Zaremba et al. 2014)](https://arxiv.org/pdf/1409.2329.pdf)
 46 | and are similar to [Using the Output Embedding to Improve Language Models (Press & Wolf 2016](https://arxiv.org/abs/1608.05859) and [Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling (Inan et al. 2016)](https://arxiv.org/pdf/1611.01462.pdf), though both of these papers have improved perplexities by using a form of recurrent dropout [(variational dropout)](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks).
 47 | 
 48 | 
 49 | ## Architecture
 50 | 
 51 | Soon.
 52 | 
 53 | ## Run on FloydHub
 54 | 
 55 | Here's the commands to training, evaluating and serving your language modeling task on FloydHub.
 56 | 
 57 | ### Project Setup
 58 | 
 59 | Before you start, log in on FloydHub with the [floyd login](http://docs.floydhub.com/commands/login/) command, then fork and init the project:
 60 | 
 61 | ```bash
 62 | $ git clone https://github.com/floydhub/word-language-model.git
 63 | $ cd word-language-model
 64 | $ floyd init word-language-model
 65 | ```
 66 | 
 67 | ### Training
 68 | 
 69 | Before you start, you need to upload the [Penn Treebank-3 dataset](https://catalog.ldc.upenn.edu/ldc99t42) as a FloydHub Dataset following this guide: [create and upload a dataset](https://docs.floydhub.com/guides/create_and_upload_dataset/). Then you will be ready to play with different language models.
 70 | 
 71 | ```bash
 72 | # Train a LSTM on PTB with CUDA, reaching perplexity of 114.22
 73 | floyd run --gpu --env pytorch-0.2 --data <USERNAME>/dataset/<PENN-TB3>/<VERSION>:input "python main.py --cuda --epochs 7"
 74 | 
 75 | # Train a tied LSTM on PTB with CUDA, reaching perplexity of 110.44
 76 | floyd run --gpu --env pytorch-0.2 --data <USERNAME>/dataset/<PENN-TB3>/<VERSION>:input "python main.py --cuda --epochs 7 --tied"
 77 | 
 78 | # Train a tied LSTM on PTB with CUDA for 40 epochs, reaching perplexity of 87.17
 79 | floyd run --gpu --env pytorch-0.2 --data <USERNAME>/dataset/<PENN-TB3>/<VERSION>:input "python main.py --cuda --tied"
 80 | ```
 81 | 
 82 | Note:
 83 | 
 84 | - `--gpu` run your job on a FloydHub GPU instance.
 85 | - `--env pytorch-0.2` prepares a pytorch environment for python 3.
 86 | - `--data <USERNAME>/dataset/<PENN-TB3>/<VERSION>:input` mounts the previus uploaded Penn Treebank-3 dataset in the `/input` folder inside the container for our job.
 87 | 
 88 | The model uses the `nn.RNN` module (and its sister modules `nn.GRU` and `nn.LSTM`)
 89 | which will automatically use the cuDNN backend if run on CUDA with cuDNN installed.
 90 | 
 91 | During training, if a keyboard interrupt (Ctrl-C) is received,
 92 | training is stopped and the current model is evaluated against the test dataset.
 93 | 
 94 | You can follow along the progress by using the [logs](https://docs.floydhub.com/commands/logs/) command.
 95 | The first 2 examples of training should be completed in about 5 minutes on a GPU instance and 40' on a CPU one. The last example should take about 30' on a GPU instance and above 3 hours on a CPU instace.
 96 | 
 97 | ### Evaluating
 98 | 
 99 | It's time to evaluate our model generating some text:
100 | 
101 | ```bash
102 | # Generate samples from the trained LSTM model.
103 | floyd run --gpu --env pytorch-0.2 --data <USERNAME>/dataset/<PENN-TB3>/<VERSION>:input --data <REPLACE_WITH_JOB_OUTPUT_NAME>:model "python generate.py --cuda"
104 | ```
105 | 
106 | ### Try our pre-trained model
107 | 
108 | We have provided to you a pre-trained model trained for 40 epochs reaching perplexity of 87.17:
109 | ```bash
110 | # Generate samples from the trained LSTM model.
111 | floyd run --gpu --env pytorch-0.2 --data <USERNAME>/dataset/<PENN-TB3>/<VERSION>:input --data <REPLACE_WITH_JOB_OUTPUT_NAME>:model "python generate.py --cuda"
112 | ```
113 | 
114 | 
115 | ### Serve model through REST API
116 | 
117 | FloydHub supports seving mode for demo and testing purpose. Before serving your model through REST API,
118 | you need to create a `floyd_requirements.txt` and declare the flask requirement in it. If you run a job
119 | with `--mode serve` flag, FloydHub will run the `app.py` file in your project
120 | and attach it to a dynamic service endpoint:
121 | 
122 | ```bash
123 | floyd run --gpu --mode serve --env pytorch-0.2  --data <USERNAME>/dataset/<PENN-TB3>/<VERSION>:input --data <REPLACE_WITH_JOB_OUTPUT_NAME>:model
124 | ```
125 | 
126 | The above command will print out a service endpoint for this job in your terminal console.
127 | 
128 | The service endpoint will take a couple minutes to become ready. Once it's up, you can interact with the model by sending a POST request wih the number of words and the temperature that the model will use to generate text:
129 | ```bash
130 | # Template
131 | # curl -X POST -o <NAME_&_PATH_DOWNLOADED_GENERATED_TEXT> -F "words=<NUMBER_OF_WORDS_TO_GENERATE>" -F "temperature=<TEMPERATURE>" <SERVICE_ENDPOINT>
132 | 
133 | curl -X POST -o generated.txt -F "words=100" -F "temperature=3" https://www.floydlabs.com/expose/vk47ixT8NeYBTFeMavbWta
134 | ```
135 | 
136 | Any job running in serving mode will stay up until it reaches maximum runtime. So
137 | once you are done testing, **remember to shutdown the job!**
138 | 
139 | *Note that this feature is in preview mode and is not production ready yet*
140 | 
141 | ## More resources
142 | 
143 | Some useful resources on NLP for Deep Learning and language modeling task:
144 | 
145 | - [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/)
146 | - [Natural Language Processing with Deep Learning - Stanford](https://youtu.be/OQQ-W_63UgQ)
147 | - [Oxford Deep NLP 2017 course](https://github.com/oxford-cs-deepnlp-2017/lectures)
148 | 
149 | ## Contributing
150 | 
151 | For any questions, bug(even typos) and/or features requests do not hesitate to contact me or open an issue!
152 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | import math
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | 
  8 | import data
  9 | import model
 10 | 
 11 | # Add ckp
 12 | parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model')
 13 | parser.add_argument('--data', type=str, default='/input', # /input
 14 |                     help='location of the data corpus')
 15 | parser.add_argument('--checkpoint', type=str, default='',
 16 |                     help='model checkpoint to use')
 17 | parser.add_argument('--model', type=str, default='LSTM',
 18 |                     help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)')
 19 | parser.add_argument('--emsize', type=int, default=200,
 20 |                     help='size of word embeddings')
 21 | parser.add_argument('--nhid', type=int, default=200,
 22 |                     help='number of hidden units per layer')
 23 | parser.add_argument('--nlayers', type=int, default=2,
 24 |                     help='number of layers')
 25 | parser.add_argument('--lr', type=float, default=20,
 26 |                     help='initial learning rate')
 27 | parser.add_argument('--clip', type=float, default=0.25,
 28 |                     help='gradient clipping')
 29 | parser.add_argument('--epochs', type=int, default=40,
 30 |                     help='upper epoch limit')
 31 | parser.add_argument('--batch_size', type=int, default=20, metavar='N',
 32 |                     help='batch size')
 33 | parser.add_argument('--bptt', type=int, default=35,
 34 |                     help='sequence length')
 35 | parser.add_argument('--dropout', type=float, default=0.2,
 36 |                     help='dropout applied to layers (0 = no dropout)')
 37 | parser.add_argument('--tied', action='store_true',
 38 |                     help='tie the word embedding and softmax weights')
 39 | parser.add_argument('--seed', type=int, default=1111,
 40 |                     help='random seed')
 41 | parser.add_argument('--cuda', action='store_true',
 42 |                     help='use CUDA')
 43 | parser.add_argument('--log-interval', type=int, default=200, metavar='N',
 44 |                     help='report interval')
 45 | parser.add_argument('--save', type=str,  default='/output/model.pt', # /output
 46 |                     help='path to save the final model')
 47 | args = parser.parse_args()
 48 | 
 49 | # Set the random seed manually for reproducibility.
 50 | torch.manual_seed(args.seed)
 51 | if torch.cuda.is_available():
 52 |     if not args.cuda:
 53 |         print("WARNING: You have a CUDA device, so you should probably run with --cuda")
 54 |     else:
 55 |         torch.cuda.manual_seed(args.seed)
 56 | 
 57 | ###############################################################################
 58 | # Load data
 59 | ###############################################################################
 60 | 
 61 | corpus = data.Corpus(args.data)
 62 | 
 63 | def batchify(data, bsz):
 64 |     # Work out how cleanly we can divide the dataset into bsz parts.
 65 |     nbatch = data.size(0) // bsz
 66 |     # Trim off any extra elements that wouldn't cleanly fit (remainders).
 67 |     data = data.narrow(0, 0, nbatch * bsz)
 68 |     # Evenly divide the data across the bsz batches.
 69 |     data = data.view(bsz, -1).t().contiguous()
 70 |     if args.cuda:
 71 |         data = data.cuda()
 72 |     return data
 73 | 
 74 | eval_batch_size = 10
 75 | train_data = batchify(corpus.train, args.batch_size)
 76 | val_data = batchify(corpus.valid, eval_batch_size)
 77 | test_data = batchify(corpus.test, eval_batch_size)
 78 | 
 79 | ###############################################################################
 80 | # Build the model
 81 | ###############################################################################
 82 | 
 83 | ntokens = len(corpus.dictionary)
 84 | model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied)
 85 | 
 86 | # Load checkpoint
 87 | if args.checkpoint != '':
 88 |     if args.cuda:
 89 |         model = torch.load(args.checkpoint)
 90 |     else:
 91 |         # Load GPU model on CPU
 92 |         model = torch.load(args.checkpoint, map_location=lambda storage, loc: storage)
 93 | 
 94 | if args.cuda:
 95 |     model.cuda()
 96 | else:
 97 |     model.cpu()
 98 | print (model)
 99 | 
100 | criterion = nn.CrossEntropyLoss()
101 | if args.cuda:
102 |     criterion.cuda()
103 | 
104 | ###############################################################################
105 | # Training code
106 | ###############################################################################
107 | 
108 | def repackage_hidden(h):
109 |     """Wraps hidden states in new Variables, to detach them from their history."""
110 |     if type(h) == Variable:
111 |         return Variable(h.data)
112 |     else:
113 |         return tuple(repackage_hidden(v) for v in h)
114 | 
115 | 
116 | def get_batch(source, i, evaluation=False):
117 |     seq_len = min(args.bptt, len(source) - 1 - i)
118 |     data = Variable(source[i:i+seq_len], volatile=evaluation)
119 |     target = Variable(source[i+1:i+1+seq_len].view(-1))
120 |     return data, target
121 | 
122 | 
123 | def evaluate(data_source):
124 |     # Turn on evaluation mode which disables dropout.
125 |     model.eval()
126 |     total_loss = 0
127 |     ntokens = len(corpus.dictionary)
128 |     hidden = model.init_hidden(eval_batch_size)
129 |     for i in range(0, data_source.size(0) - 1, args.bptt):
130 |         data, targets = get_batch(data_source, i, evaluation=True)
131 |         output, hidden = model(data, hidden)
132 |         output_flat = output.view(-1, ntokens)
133 |         total_loss += len(data) * criterion(output_flat, targets).data
134 |         hidden = repackage_hidden(hidden)
135 |     return total_loss[0] / len(data_source)
136 | 
137 | 
138 | def train():
139 |     # Turn on training mode which enables dropout.
140 |     model.train()
141 |     total_loss = 0
142 |     start_time = time.time()
143 |     ntokens = len(corpus.dictionary)
144 |     hidden = model.init_hidden(args.batch_size)
145 |     for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
146 |         data, targets = get_batch(train_data, i)
147 |         # Starting each batch, we detach the hidden state from how it was previously produced.
148 |         # If we didn't, the model would try backpropagating all the way to start of the dataset.
149 |         hidden = repackage_hidden(hidden)
150 |         model.zero_grad()
151 |         output, hidden = model(data, hidden)
152 |         loss = criterion(output.view(-1, ntokens), targets)
153 |         loss.backward()
154 | 
155 |         # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
156 |         torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
157 |         for p in model.parameters():
158 |             p.data.add_(-lr, p.grad.data)
159 | 
160 |         total_loss += loss.data
161 | 
162 |         if batch % args.log_interval == 0 and batch > 0:
163 |             cur_loss = total_loss[0] / args.log_interval
164 |             elapsed = time.time() - start_time
165 |             print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
166 |                     'loss {:5.2f} | ppl {:8.2f}'.format(
167 |                 epoch, batch, len(train_data) // args.bptt, lr,
168 |                 elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
169 |             total_loss = 0
170 |             start_time = time.time()
171 | 
172 | # Loop over epochs.
173 | lr = args.lr
174 | best_val_loss = None
175 | 
176 | # At any point you can hit Ctrl + C to break out of training early.
177 | try:
178 |     for epoch in range(1, args.epochs+1):
179 |         epoch_start_time = time.time()
180 |         train()
181 |         val_loss = evaluate(val_data)
182 |         print('-' * 89)
183 |         print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
184 |                 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
185 |                                            val_loss, math.exp(val_loss)))
186 |         print('-' * 89)
187 |         # Save the model if the validation loss is the best we've seen so far.
188 |         if not best_val_loss or val_loss < best_val_loss:
189 |             with open(args.save, 'wb') as f:
190 |                 torch.save(model, f)
191 |             best_val_loss = val_loss
192 |         else:
193 |             # Anneal the learning rate if no improvement has been seen in the validation dataset.
194 |             lr /= 4.0
195 | except KeyboardInterrupt:
196 |     print('-' * 89)
197 |     print('Exiting from training early')
198 | 
199 | # Load the best saved model.
200 | with open(args.save, 'rb') as f:
201 |     model = torch.load(f)
202 | 
203 | # Run on test data.
204 | test_loss = evaluate(test_data)
205 | print('=' * 89)
206 | print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
207 |     test_loss, math.exp(test_loss)))
208 | print('=' * 89)
209 | 


--------------------------------------------------------------------------------