├── language-modeling
    ├── .gitignore
    ├── locked_dropout.py
    ├── getdata.sh
    ├── embed_regularize.py
    ├── utils.py
    ├── LICENSE
    ├── data.py
    ├── generate.py
    ├── weight_drop.py
    ├── test-model.py
    ├── model.py
    ├── pointer.py
    ├── g2_lstm.py
    ├── finetune.py
    └── main.py
├── machine-translation
    ├── libs
    │   ├── multiverso_
    │   │   ├── theano_ext
    │   │   │   ├── __init__.py
    │   │   │   ├── lasagne_ext
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── param_manager.py
    │   │   │   └── sharedvar.py
    │   │   ├── Multiverso.dll
    │   │   ├── libmultiverso.so
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── utils.py
    │   │   ├── tests
    │   │   │   └── test_multiverso.py
    │   │   └── tables.py
    │   ├── __init__.py
    │   ├── utility
    │   │   ├── __init__.py
    │   │   ├── basic.py
    │   │   ├── data_iterator.py
    │   │   ├── optimizers.py
    │   │   └── translate.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   ├── layers_.py
    │   │   └── basic.py
    │   ├── gpu_manager.py
    │   ├── models
    │   │   └── __init__.py
    │   ├── config.py
    │   └── constants.py
    ├── scripts
    │   ├── convert_bpe_dic.py
    │   ├── map_vocabs.py
    │   ├── get_small_train.py
    │   ├── build_dictionary.py
    │   ├── moses
    │   │   ├── detruecase.perl
    │   │   ├── truecase.perl
    │   │   ├── train-truecaser.perl
    │   │   └── multi-bleu.perl
    │   └── plot_cost.py
    ├── math
    │   └── math.py
    ├── .gitignore
    ├── translate_compressed.py
    ├── seq_translate.py
    ├── translate_single.py
    ├── translate.py
    ├── replace_unk.py
    └── train_nmt.py
└── README.md


/language-modeling/.gitignore:
--------------------------------------------------------------------------------
1 | *.pt
2 | __pycache__/
3 | 


--------------------------------------------------------------------------------
/machine-translation/libs/multiverso_/theano_ext/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/python
2 | # -*- encoding: utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/machine-translation/libs/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | 
4 | __author__ = 'fyabc'
5 | 


--------------------------------------------------------------------------------
/machine-translation/libs/multiverso_/theano_ext/lasagne_ext/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/python
2 | # -*- encoding: utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/machine-translation/libs/multiverso_/Multiverso.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/g2-lstm/master/machine-translation/libs/multiverso_/Multiverso.dll


--------------------------------------------------------------------------------
/machine-translation/libs/multiverso_/libmultiverso.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/g2-lstm/master/machine-translation/libs/multiverso_/libmultiverso.so


--------------------------------------------------------------------------------
/machine-translation/libs/utility/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | 
4 | from .basic import *
5 | 
6 | __author__ = 'fyabc'
7 | 


--------------------------------------------------------------------------------
/machine-translation/libs/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from .basic import tanh, linear, dropout_layer, attention_layer, param_init_feed_forward, feed_forward
 5 | from .gru import *
 6 | from .lstm import *
 7 | from .layers_ import *
 8 | 
 9 | __author__ = 'fyabc'
10 | 


--------------------------------------------------------------------------------
/machine-translation/libs/utility/basic.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import numpy as np
 5 | 
 6 | from ..constants import fX
 7 | 
 8 | __author__ = 'fyabc'
 9 | 
10 | 
11 | def floatX(value):
12 |     return np.asarray(value, dtype=fX)
13 | 
14 | 
15 | __all__ = [
16 |     'floatX',
17 | ]
18 | 


--------------------------------------------------------------------------------
/machine-translation/libs/multiverso_/__init__.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | """The multiverso library.
 5 | 
 6 | Copied from v-yixia.
 7 | """
 8 | 
 9 | from api import init, shutdown, barrier, workers_num, worker_id, server_id, is_master_worker
10 | from tables import ArrayTableHandler, MatrixTableHandler
11 | 


--------------------------------------------------------------------------------
/language-modeling/locked_dropout.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | 
 5 | class LockedDropout(nn.Module):
 6 |     def __init__(self):
 7 |         super().__init__()
 8 | 
 9 |     def forward(self, x, dropout=0.5):
10 |         if not self.training or not dropout:
11 |             return x
12 |         m = x.data.new(1, x.size(1), x.size(2)).bernoulli_(1 - dropout)
13 |         mask = Variable(m, requires_grad=False) / (1 - dropout)
14 |         mask = mask.expand_as(x)
15 |         return mask * x
16 | 


--------------------------------------------------------------------------------
/language-modeling/getdata.sh:
--------------------------------------------------------------------------------
 1 | echo "=== Acquiring datasets ==="
 2 | echo "---"
 3 | mkdir -p data
 4 | cd data
 5 | 
 6 | echo "- Downloading Penn Treebank (PTB)"
 7 | mkdir -p penn
 8 | cd penn
 9 | wget --quiet --continue https://github.com/pytorch/examples/raw/master/word_language_model/data/penn/train.txt
10 | wget --quiet --continue https://github.com/pytorch/examples/raw/master/word_language_model/data/penn/valid.txt
11 | wget --quiet --continue https://github.com/pytorch/examples/raw/master/word_language_model/data/penn/test.txt
12 | cd ..
13 | 
14 | echo "- Downloading WikiText-2 (WT2)"
15 | wget --quiet --continue https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
16 | unzip -q wikitext-2-v1.zip
17 | cd wikitext-2
18 | mv wiki.train.tokens train.txt
19 | mv wiki.valid.tokens valid.txt
20 | mv wiki.test.tokens test.txt
21 | 
22 | echo "---"
23 | echo "Happy language modeling :)"
24 | 


--------------------------------------------------------------------------------
/machine-translation/scripts/convert_bpe_dic.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | 
 6 | import sys
 7 | from collections import OrderedDict
 8 | import cPickle as pkl
 9 | 
10 | __author__ = 'fyabc'
11 | 
12 | 
13 | def main():
14 |     input_filename = sys.argv[1]
15 | 
16 |     with open(input_filename, 'r') as f_in:
17 |         d = OrderedDict()
18 | 
19 |         d['eos'] = 0
20 |         d['UNK'] = 1
21 | 
22 |         i = 2
23 | 
24 |         for line in f_in:
25 |             word = line.strip()
26 |             if word:
27 |                 d[word] = i
28 |                 i += 1
29 | 
30 |         with open('{}.pkl'.format(input_filename), 'wb') as f_out:
31 |             pkl.dump(d, f_out)
32 | 
33 |         print('Convert {} -> {}.pkl'.format(input_filename, input_filename))
34 | 
35 | if __name__ == '__main__':
36 |     main()
37 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # g2-lstm
 2 | Codes for "Towards Binary-Valued Gates for Robust LSTM Training".
 3 | 
 4 | Language modeling code is based on [awd-lstm-lm](https://github.com/salesforce/awd-lstm-lm) using PyTorch.
 5 | 
 6 | Translation code is based on Theano.
 7 | 
 8 | Implementation of Gumbel-Gate LSTM: [Pytorch version](language-modeling/g2_lstm.py), [Theano version](machine-translation/libs/layers/stochastic_lstm.py).
 9 | 
10 | We also apply *dropout* to the Gumbel noise added to the gates. In particular, given a fixed probability *p*, all gates will independently be preturbed by the Gumbel noise with probability *p*, or stay unperturbed otherwise. We find that no matter what the value of *p* is, the performance of trained G2-LSTM will be better. When *p* is small, our model will have better generalization error, and when *p* is large, our model will have less performance drop under compression. We fix *p=0.2* in all our experiments in the paper.
11 | 


--------------------------------------------------------------------------------
/machine-translation/math/math.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def low_rank_approx(SVD=None, A=None, r=1):
 5 |     """
 6 |     Computes an r-rank approximation of a matrix
 7 |     given the component u, s, and v of it's SVD
 8 |     Requires: numpy
 9 |     """
10 |     if not SVD:
11 |         SVD = np.linalg.svd(A, full_matrices=False)
12 |     u, s, v = SVD
13 |     Ar = np.zeros((len(u), len(v)))
14 |     for i in xrange(r):
15 |         Ar += s[i] * np.outer(u.T[i], v[i])
16 |     return Ar
17 | 
18 | if __name__ == "__main__":
19 |     """
20 |     Test: visualize an r-rank approximation of `lena`
21 |     for increasing values of r
22 |     Requires: scipy, matplotlib
23 |     """
24 |     x = np.random.rand(10,10)
25 |     u, s, v = np.linalg.svd(x, full_matrices=False)
26 |     i = 1
27 |     print x[0]
28 |     while i < 10:
29 |         y = low_rank_approx((u, s, v), r=i)
30 |         print y[0]
31 |         i += 1


--------------------------------------------------------------------------------
/machine-translation/libs/gpu_manager.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | import subprocess
 3 | import os
 4 | import re
 5 | import numpy as np
 6 | 
 7 | def get_gpu_usage(ranks):
 8 |     exec_nvidia_smi = 'nvidia-smi' if platform.system() == 'Linux' else '\"C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe\"'
 9 |     pl_output = subprocess.Popen(exec_nvidia_smi, shell=True,
10 |         stdout=subprocess.PIPE, stderr=open(os.devnull, 'w')).stdout.read()
11 | 
12 |     pattern = re.compile(r'(?P<num>[0-9]{1,5})MiB[\s]+/')
13 |     gpu_mems_usages = []
14 |     for line in pl_output.split('\n'):
15 |         result = pattern.search(line)
16 |         if result:
17 |             gpu_mems_usages.append(int(result.group("num")))
18 |     sorted_gpu_ids = np.argsort(np.array(gpu_mems_usages,dtype= np.float32))
19 |     top = min(ranks, len(gpu_mems_usages))
20 |     return (np.array(range(len(gpu_mems_usages)), dtype= np.int)[sorted_gpu_ids[:top]]).tolist()


--------------------------------------------------------------------------------
/machine-translation/libs/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from .model import *
 5 | 
 6 | __author__ = 'fyabc'
 7 | 
 8 | 
 9 | def build_and_init_model(model_name, options=None, build=True, model_type='NMTModel'):
10 |     import cPickle as pkl
11 | 
12 |     from ..config import DefaultOptions
13 |     from ..utility.utils import load_params
14 | 
15 |     if options is None:
16 |         with open('{}.pkl'.format(model_name), 'rb') as f:
17 |             options = DefaultOptions.copy()
18 |             options.update(pkl.load(f))
19 | 
20 |     model = eval(model_type)(options)
21 | 
22 |     # allocate model parameters
23 |     params = model.initializer.init_params()
24 |     # load model parameters and set theano shared variables
25 |     params = load_params(model_name, params)
26 |     model.init_tparams(params)
27 | 
28 |     if build:
29 |         ret = model.build_model()
30 |         return model, options, ret
31 |     return model, options
32 | 


--------------------------------------------------------------------------------
/machine-translation/libs/layers/layers_.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from .basic import *
 5 | from .gru import *
 6 | from .stochastic_lstm import *
 7 | 
 8 | __author__ = 'fyabc'
 9 | 
10 | # layers: 'name': ('parameter initializer', 'builder')
11 | layers = {
12 |     'ff': (param_init_feed_forward, feed_forward),
13 |     'gru': (param_init_gru, gru_layer),
14 |     'gru_cond': (param_init_gru_cond, gru_cond_layer),
15 |     'multi_gru': (param_init_gru, gru_layer),
16 |     'multi_gru_cond': (param_init_gru_cond, gru_cond_layer),
17 |     'lstm': (param_init_lstm, lstm_layer),
18 |     'lstm_cond': (param_init_lstm_cond, lstm_cond_layer),
19 |     # todo: implement it
20 |     'multi_lstm': (param_init_lstm, lstm_layer),
21 |     'multi_lstm_cond': (param_init_lstm_cond, lstm_cond_layer),
22 | }
23 | 
24 | 
25 | def get_layer(name):
26 |     fns = layers[name]
27 |     return fns[0], fns[1]
28 | 
29 | 
30 | def get_init(name):
31 |     return layers[name][0]
32 | 
33 | 
34 | def get_build(name):
35 |     return layers[name][1]
36 | 
37 | 
38 | __all__ = [
39 |     'layers',
40 |     'get_layer',
41 |     'get_build',
42 |     'get_init',
43 | ]
44 | 


--------------------------------------------------------------------------------
/language-modeling/embed_regularize.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | from torch.autograd import Variable
 5 | 
 6 | def embedded_dropout(embed, words, dropout=0.1, scale=None):
 7 |   if dropout:
 8 |     mask = embed.weight.data.new().resize_((embed.weight.size(0), 1)).bernoulli_(1 - dropout).expand_as(embed.weight) / (1 - dropout)
 9 |     mask = Variable(mask)
10 |     masked_embed_weight = mask * embed.weight
11 |   else:
12 |     masked_embed_weight = embed.weight
13 |   if scale:
14 |     masked_embed_weight = scale.expand_as(masked_embed_weight) * masked_embed_weight
15 | 
16 |   padding_idx = embed.padding_idx
17 |   if padding_idx is None:
18 |       padding_idx = -1
19 |   X = embed._backend.Embedding.apply(words, masked_embed_weight,
20 |     padding_idx, embed.max_norm, embed.norm_type,
21 |     embed.scale_grad_by_freq, embed.sparse
22 |   )
23 |   return X
24 | 
25 | if __name__ == '__main__':
26 |   V = 50
27 |   h = 4
28 |   bptt = 10
29 |   batch_size = 2
30 | 
31 |   embed = torch.nn.Embedding(V, h)
32 | 
33 |   words = np.random.random_integers(low=0, high=V-1, size=(batch_size, bptt))
34 |   words = torch.LongTensor(words)
35 |   words = Variable(words)
36 | 
37 |   origX = embed(words)
38 |   X = embedded_dropout(embed, words)
39 | 
40 |   print(origX)
41 |   print(X)
42 | 


--------------------------------------------------------------------------------
/language-modeling/utils.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Variable
 2 | 
 3 | def repackage_hidden(h):
 4 |     """Wraps hidden states in new Variables, to detach them from their history."""
 5 |     if type(h) == Variable:
 6 |         return Variable(h.data)
 7 |     else:
 8 |         return tuple(repackage_hidden(v) for v in h)
 9 | 
10 | def batchify(data, bsz, args):
11 |     # Work out how cleanly we can divide the dataset into bsz parts.
12 |     nbatch = data.size(0) // bsz
13 |     # Trim off any extra elements that wouldn't cleanly fit (remainders).
14 |     data = data.narrow(0, 0, nbatch * bsz)
15 |     # Evenly divide the data across the bsz batches.
16 |     data = data.view(bsz, -1).t().contiguous()
17 |     if args.cuda:
18 |         data = data.cuda()
19 |     return data
20 | 
21 | def get_batch(source, i, args, seq_len=None, evaluation=False):
22 |     seq_len = min(seq_len if seq_len else args.bptt, len(source) - 1 - i)
23 |     data = Variable(source[i:i+seq_len], volatile=evaluation)
24 |     target = Variable(source[i+1:i+1+seq_len].view(-1))
25 |     return data, target
26 | 
27 | import sys
28 | 
29 | _log_file = None
30 | 
31 | def set_log_file(name):
32 |     global _log_file
33 |     if name != '':
34 |         _log_file = open(name, 'w')
35 | 
36 | def message(msg):
37 |     print(msg)
38 |     sys.stdout.flush()
39 |     if _log_file is not None:
40 |         _log_file.write(msg + '\n')
41 |         _log_file.flush()
42 | 
43 | 


--------------------------------------------------------------------------------
/machine-translation/scripts/map_vocabs.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #used to map new dataset vocab id to old dataset vocab id
 4 | 
 5 | import sys
 6 | import cPickle as pkl
 7 | 
 8 | def main():
 9 | 
10 |     new_src_dic_file = sys.argv[1]
11 |     new_tgt_dic_file = sys.argv[2]
12 |     old_src_dic_file = sys.argv[3]
13 |     old_tgt_dic_file = sys.argv[4]
14 | 
15 |     new_to_old_src_map = {}
16 |     new_to_old_tgt_map = {}
17 | 
18 |     o_src_dic = pkl.load(open(old_src_dic_file, 'rb'))
19 |     o_tgt_dic = pkl.load(open(old_tgt_dic_file, 'rb'))
20 | 
21 |     new_src_dic = pkl.load(open(new_src_dic_file, 'rb'))
22 |     new_tgt_dic = pkl.load(open(new_tgt_dic_file, 'rb'))
23 | 
24 |     for (word, id) in new_src_dic.iteritems():
25 |         if word in o_src_dic:
26 |             new_to_old_src_map[id] = o_src_dic[word]
27 | 
28 |     print 'Find %d vocabs in total %d src vocabs' % (len(new_to_old_src_map), len(new_src_dic))
29 | 
30 |     for (word, id) in new_tgt_dic.iteritems():
31 |         if word in o_tgt_dic:
32 |             new_to_old_tgt_map[id] = o_tgt_dic[word]
33 | 
34 |     print 'Find %d vocabs in total %d target vocabs' % (len(new_to_old_tgt_map), len(new_tgt_dic))
35 | 
36 |     pkl.dump(new_to_old_src_map, open('../resources/enfr_large2small_src_vocab_map.pkl', 'wb'))
37 |     pkl.dump(new_to_old_tgt_map, open('../resources/enfr_large2small_tgt_vocab_map.pkl', 'wb'))
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     main()


--------------------------------------------------------------------------------
/machine-translation/scripts/get_small_train.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | 
 6 | import sys
 7 | import os
 8 | import random
 9 | 
10 | __author__ = 'fyabc'
11 | 
12 | 
13 | def main():
14 |     input_filename1 = sys.argv[1]
15 |     input_filename2 = sys.argv[2]
16 | 
17 |     if len(sys.argv) >= 4:
18 |         small_size = int(sys.argv[3])
19 |     else:
20 |         small_size = 10000
21 | 
22 |     with open(input_filename1, 'r') as f_in:
23 |         lines = list(f_in)
24 | 
25 |         selected_indices = random.sample(range(len(lines)), small_size)
26 | 
27 |         head, tail = os.path.split(input_filename1)
28 |         output_filename1 = '{}{}small_{}'.format(head, '/' if head else '', tail)
29 |         with open(output_filename1, 'w') as f_out:
30 |             for index in selected_indices:
31 |                 print(lines[index], end='', file=f_out)
32 | 
33 |         print('Extract {} -> {}'.format(input_filename1, output_filename1))
34 | 
35 |     with open(input_filename2, 'r') as f_in:
36 |         lines = list(f_in)
37 | 
38 |         head, tail = os.path.split(input_filename2)
39 |         output_filename2 = '{}{}small_{}'.format(head, '/' if head else '', tail)
40 |         with open(output_filename2, 'w') as f_out:
41 |             for index in selected_indices:
42 |                 print(lines[index], end='', file=f_out)
43 | 
44 |         print('Extract {} -> {}'.format(input_filename2, output_filename2))
45 | 
46 | if __name__ == '__main__':
47 |     main()
48 | 


--------------------------------------------------------------------------------
/language-modeling/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, 
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/language-modeling/data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | 
 4 | from collections import Counter
 5 | 
 6 | 
 7 | class Dictionary(object):
 8 |     def __init__(self):
 9 |         self.word2idx = {}
10 |         self.idx2word = []
11 |         self.counter = Counter()
12 |         self.total = 0
13 | 
14 |     def add_word(self, word):
15 |         if word not in self.word2idx:
16 |             self.idx2word.append(word)
17 |             self.word2idx[word] = len(self.idx2word) - 1
18 |         token_id = self.word2idx[word]
19 |         self.counter[token_id] += 1
20 |         self.total += 1
21 |         return self.word2idx[word]
22 | 
23 |     def __len__(self):
24 |         return len(self.idx2word)
25 | 
26 | 
27 | class Corpus(object):
28 |     def __init__(self, path):
29 |         self.dictionary = Dictionary()
30 |         self.train = self.tokenize(os.path.join(path, 'train.txt'))
31 |         self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
32 |         self.test = self.tokenize(os.path.join(path, 'test.txt'))
33 | 
34 |     def tokenize(self, path):
35 |         """Tokenizes a text file."""
36 |         assert os.path.exists(path)
37 |         # Add words to the dictionary
38 |         with open(path, 'r') as f:
39 |             tokens = 0
40 |             for line in f:
41 |                 words = line.split() + ['<eos>']
42 |                 tokens += len(words)
43 |                 for word in words:
44 |                     self.dictionary.add_word(word)
45 | 
46 |         # Tokenize file content
47 |         with open(path, 'r') as f:
48 |             ids = torch.LongTensor(tokens)
49 |             token = 0
50 |             for line in f:
51 |                 words = line.split() + ['<eos>']
52 |                 for word in words:
53 |                     ids[token] = self.dictionary.word2idx[word]
54 |                     token += 1
55 | 
56 |         return ids
57 | 


--------------------------------------------------------------------------------
/machine-translation/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | # *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *,cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # IPython Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # dotenv
 79 | .env
 80 | 
 81 | # virtualenv
 82 | venv/
 83 | ENV/
 84 | 
 85 | # Spyder project settings
 86 | .spyderproject
 87 | 
 88 | # Rope project settings
 89 | .ropeproject
 90 | 
 91 | # Pycharm project settings
 92 | .idea/
 93 | 
 94 | # data
 95 | data/
 96 | 
 97 | # model
 98 | model/
 99 | 
100 | # log
101 | log/
102 | 
103 | # translated files
104 | translated/
105 | 
106 | # numpy saved models
107 | *.npz
108 | 
109 | # theano config files
110 | .theanorc.*
111 | 
112 | # Command line argument files
113 | /arguments/
114 | 


--------------------------------------------------------------------------------
/machine-translation/libs/multiverso_/api.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | import ctypes
 5 | 
 6 | import numpy as np
 7 | 
 8 | from utils import Loader
 9 | 
10 | 
11 | mv_lib = Loader.get_lib()
12 | 
13 | 
14 | def init(sync=False):
15 |     """Initialize multiverso.
16 | 
17 |     This should be called only once before training at the beginning of the
18 |     whole project.
19 |     If sync is True, a sync server will be created. Otherwise an async server
20 |     will be created.
21 |     """
22 | 
23 |     args = [""]  # the first argument will be ignored. So we put a placeholder here
24 |     if sync:
25 |         args.append("-sync=true")
26 |     n = len(args)
27 |     args_type = ctypes.c_char_p * n
28 |     mv_lib.MV_Init(ctypes.pointer(ctypes.c_int(n)), args_type(*[ctypes.c_char_p(arg) for arg in args]))
29 | 
30 | 
31 | def shutdown():
32 |     """Shutdown multiverso.
33 | 
34 |     This should be called only once after finishing training at the end of the
35 |     whole project.
36 |     """
37 |     mv_lib.MV_ShutDown()
38 | 
39 | 
40 | def barrier():
41 |     """Set a barrier for all workers to wait.
42 | 
43 |     Workers will wait until all workers reach a specific barrier.
44 |     """
45 |     mv_lib.MV_Barrier()
46 | 
47 | 
48 | def workers_num():
49 |     """Return the total number of workers."""
50 |     return mv_lib.MV_NumWorkers()
51 | 
52 | 
53 | def worker_id():
54 |     """Return the id (zero-based index) for current worker."""
55 |     return mv_lib.MV_WorkerId()
56 | 
57 | 
58 | def server_id():
59 |     return mv_lib.MV_ServerId()
60 | 
61 | 
62 | def is_master_worker():
63 |     """If the worker is master worker.
64 | 
65 |     Some things only need one worker process, such as validation, outputting the
66 |     result, initializing the parameters and so on. So we mark the worker 0 as
67 |     the master worker to finish these things.
68 |     """
69 |     return worker_id() == 0
70 | 
71 | 
72 | __all__ = [
73 |     'init',
74 |     'shutdown',
75 |     'barrier',
76 |     'workers_num',
77 |     'worker_id',
78 |     'server_id',
79 |     'is_master_worker',
80 | ]
81 | 


--------------------------------------------------------------------------------
/machine-translation/scripts/build_dictionary.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | 
 6 | import numpy
 7 | 
 8 | try:
 9 |     import cPickle as pkl
10 | except:
11 |     import pickle as pkl
12 | 
13 | import sys
14 | import os
15 | import fileinput
16 | 
17 | from collections import OrderedDict
18 | import argparse
19 | 
20 | __author__ = 'fyabc'
21 | 
22 | 
23 | def real_main(args):
24 |     if args.output is None:
25 |         args.output = '{}.pkl'.format(args.input[0])
26 | 
27 |     tgt_filename = os.path.join('data', 'dic', args.output)
28 | 
29 |     word_freqs = OrderedDict()
30 |     worddict = OrderedDict()
31 |     worddict['eos'] = 0
32 |     worddict['UNK'] = 1
33 | 
34 |     for filename in args.input:
35 |         src_filename = os.path.join('data', 'train', filename)
36 | 
37 |         print('Processing', src_filename)
38 | 
39 |         with open(src_filename, 'r') as f:
40 |             for line in f:
41 |                 words_in = line.strip().split(' ')
42 |                 for w in words_in:
43 |                     if w not in word_freqs:
44 |                         word_freqs[w] = 0
45 |                     word_freqs[w] += 1
46 | 
47 |     words = list(word_freqs.keys())
48 |     freqs = list(word_freqs.values())
49 | 
50 |     sorted_idx = numpy.argsort(freqs)
51 |     sorted_words = [words[ii] for ii in sorted_idx[::-1]]
52 | 
53 |     for ii, ww in enumerate(sorted_words):
54 |         worddict[ww] = ii + 2
55 | 
56 |     with open(tgt_filename, 'wb') as f:
57 |         print('Dump to', tgt_filename)
58 | 
59 |         pkl.dump(worddict, f)
60 | 
61 | 
62 | def main(args=None):
63 |     parser = argparse.ArgumentParser(description='Build dictionary file.')
64 | 
65 |     parser.add_argument('input', nargs='+',
66 |                         help='input filenames')
67 |     parser.add_argument('-o', '--output', action='store', dest='output', default=None,
68 |                         help='dict output file, default is first input filename + ".pkl"')
69 | 
70 |     args = parser.parse_args(args)
71 | 
72 |     real_main(args)
73 | 
74 | 
75 | if __name__ == '__main__':
76 |     main()
77 | 


--------------------------------------------------------------------------------
/machine-translation/scripts/moses/detruecase.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | #
 3 | # This file is part of moses.  Its use is licensed under the GNU Lesser General
 4 | # Public License version 2.1 or, at your option, any later version.
 5 | 
 6 | use warnings;
 7 | use strict;
 8 | use Getopt::Long "GetOptions";
 9 | 
10 | binmode(STDIN, ":utf8");
11 | binmode(STDOUT, ":utf8");
12 | 
13 | my ($SRC,$INFILE,$UNBUFFERED);
14 | die("detruecase.perl < in > out")
15 |     unless &GetOptions('headline=s' => \$SRC,
16 | 		       'in=s' => \$INFILE,
17 |                        'b|unbuffered' => \$UNBUFFERED);
18 | if (defined($UNBUFFERED) && $UNBUFFERED) { $|=1; }
19 | 
20 | my %SENTENCE_END = ("."=>1,":"=>1,"?"=>1,"!"=>1);
21 | my %DELAYED_SENTENCE_START = ("("=>1,"["=>1,"\""=>1,"'"=>1,"&quot;"=>1,"&apos;"=>1,"&#91;"=>1,"&#93;"=>1);
22 | 
23 | # lowercase even in headline
24 | my %ALWAYS_LOWER;
25 | foreach ("a","after","against","al-.+","and","any","as","at","be","because","between","by","during","el-.+","for","from","his","in","is","its","last","not","of","off","on","than","the","their","this","to","was","were","which","will","with") { $ALWAYS_LOWER{$_} = 1; }
26 | 
27 | # find out about the headlines
28 | my @HEADLINE;
29 | if (defined($SRC)) {
30 |     open(SRC,$SRC);
31 |     my $headline_flag = 0;
32 |     while(<SRC>) {
33 | 	$headline_flag = 1 if /<hl>/;
34 | 	$headline_flag = 0 if /<.hl>/;
35 | 	next unless /^<seg/;
36 | 	push @HEADLINE, $headline_flag;
37 |     }
38 |     close(SRC);
39 | }
40 | 
41 | my $sentence = 0;
42 | if ($INFILE) {
43 |   open(IN,$INFILE) || die("ERROR: could not open file '$INFILE'");
44 |   binmode(IN, ":utf8");
45 |   while(<IN>) {
46 |     &process($_,$sentence++);
47 |   }
48 |   close(IN);
49 | }
50 | else {
51 |   while(<STDIN>) {
52 |     &process($_,$sentence++);
53 |   }
54 | }
55 | 
56 | sub process {
57 |     my $line = $_[0];
58 |     chomp($line);
59 |     $line =~ s/^\s+//;
60 |     $line =~ s/\s+$//;
61 |     my @WORD  = split(/\s+/,$line);
62 | 
63 |     # uppercase at sentence start
64 |     my $sentence_start = 1;
65 |     for(my $i=0;$i<scalar(@WORD);$i++) {
66 |       &uppercase(\$WORD[$i]) if $sentence_start;
67 |       if (defined($SENTENCE_END{ $WORD[$i] })) { $sentence_start = 1; }
68 |       elsif (!defined($DELAYED_SENTENCE_START{$WORD[$i] })) { $sentence_start = 0; }
69 |     }
70 | 
71 |     # uppercase headlines {
72 |     if (defined($SRC) && $HEADLINE[$sentence]) {
73 | 	foreach (@WORD) {
74 | 	    &uppercase(\$_) unless $ALWAYS_LOWER{$_};
75 | 	}	
76 |     }
77 | 
78 |     # output
79 |     my $first = 1;
80 |     foreach (@WORD) {
81 | 	print " " unless $first;
82 | 	$first = 0;
83 | 	print $_;
84 |     }
85 |     print "\n";
86 |     $sentence++;
87 | }
88 | 
89 | sub uppercase {
90 |     my ($W) = @_;
91 |     $$W = uc(substr($$W,0,1)).substr($$W,1);
92 | }
93 | 


--------------------------------------------------------------------------------
/machine-translation/libs/multiverso_/theano_ext/lasagne_ext/param_manager.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | import lasagne
 5 | import numpy as np
 6 | from ... import *
 7 | 
 8 | 
 9 | class MVNetParamManager(object):
10 |     """
11 |     MVNetParamManager is manager to make managing and synchronizing the
12 |     variables in lasagne more easily
13 |     """
14 |     def __init__(self, network):
15 |         """ The constructor of MVNetParamManager
16 | 
17 |         The constructor will associate the parameter with multiverso array
18 |         table.  The initial value of ArrayTableHandler will be same as the
19 |         parameters of network. If different parameters are used in different
20 |         processes, the average of them will be used as the initial value
21 |         """
22 |         self.shapes = []
23 |         self.dtypes = []
24 |         self.sizes = []
25 |         self.all_param_list = []
26 |         self.network = network
27 |         for arr in lasagne.layers.get_all_param_values(self.network):
28 |             self.shapes.append(arr.shape)
29 |             # TODO: Now only float32 is supported in multiverso. So I store all
30 |             # the parameters in a float32 array. This place need modification
31 |             # after other types are supported
32 |             assert(np.dtype("float32") == arr.dtype)
33 |             self.dtypes.append(arr.dtype)
34 |             self.sizes.append(arr.size)
35 |             self.all_param_list.extend([i for i in np.nditer(arr)])
36 |         self.all_param_list = np.array(self.all_param_list)
37 | 
38 |         self.tbh = ArrayTableHandler(len(self.all_param_list), init_value=self.all_param_list)
39 |         barrier()  # add barrier to make sure the initial values have token effect
40 |         self.all_param_list = self.tbh.get()
41 |         self._set_all_param_to_net()
42 | 
43 |     def _set_all_param_to_net(self):
44 |         n = 0
45 |         params = []
46 |         for i, size in enumerate(self.sizes):
47 |             params.append(self.all_param_list[n:n + size].reshape(self.shapes[i]))
48 |             n += size
49 |         lasagne.layers.set_all_param_values(self.network, params)
50 | 
51 |     def sync_all_param(self):
52 |         """sync all parameters with multiverso server
53 | 
54 |         This function will
55 |         1) calc all the delta of params in the network and add the delta to multiverso server
56 |         2) get the latest value from the multiverso server
57 |         """
58 |         cur_network_params = np.concatenate([
59 |             arr.reshape(-1) for arr in lasagne.layers.get_all_param_values(self.network)])
60 | 
61 |         params_delta = cur_network_params - self.all_param_list
62 |         self.tbh.add(params_delta)
63 |         self.all_param_list = self.tbh.get()
64 |         self._set_all_param_to_net()
65 | 
66 | 
67 | __all__ = [
68 |     'MVNetParamManager',
69 | ]
70 | 


--------------------------------------------------------------------------------
/language-modeling/generate.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Language Modeling on Penn Tree Bank
 3 | #
 4 | # This file generates new sentences sampled from the language model
 5 | #
 6 | ###############################################################################
 7 | 
 8 | import argparse
 9 | 
10 | import torch
11 | from torch.autograd import Variable
12 | 
13 | import data
14 | 
15 | parser = argparse.ArgumentParser(description='PyTorch PTB Language Model')
16 | 
17 | # Model parameters.
18 | parser.add_argument('--data', type=str, default='./data/penn',
19 |                     help='location of the data corpus')
20 | parser.add_argument('--model', type=str, default='LSTM',
21 |                     help='type of recurrent net (LSTM, QRNN)')
22 | parser.add_argument('--checkpoint', type=str, default='./model.pt',
23 |                     help='model checkpoint to use')
24 | parser.add_argument('--outf', type=str, default='generated.txt',
25 |                     help='output file for generated text')
26 | parser.add_argument('--words', type=int, default='1000',
27 |                     help='number of words to generate')
28 | parser.add_argument('--seed', type=int, default=1111,
29 |                     help='random seed')
30 | parser.add_argument('--cuda', action='store_true',
31 |                     help='use CUDA')
32 | parser.add_argument('--temperature', type=float, default=1.0,
33 |                     help='temperature - higher will increase diversity')
34 | parser.add_argument('--log-interval', type=int, default=100,
35 |                     help='reporting interval')
36 | args = parser.parse_args()
37 | 
38 | # Set the random seed manually for reproducibility.
39 | torch.manual_seed(args.seed)
40 | if torch.cuda.is_available():
41 |     if not args.cuda:
42 |         print("WARNING: You have a CUDA device, so you should probably run with --cuda")
43 |     else:
44 |         torch.cuda.manual_seed(args.seed)
45 | 
46 | if args.temperature < 1e-3:
47 |     parser.error("--temperature has to be greater or equal 1e-3")
48 | 
49 | with open(args.checkpoint, 'rb') as f:
50 |     model = torch.load(f)
51 | model.eval()
52 | if args.model == 'QRNN':
53 |     model.reset()
54 | 
55 | if args.cuda:
56 |     model.cuda()
57 | else:
58 |     model.cpu()
59 | 
60 | corpus = data.Corpus(args.data)
61 | ntokens = len(corpus.dictionary)
62 | hidden = model.init_hidden(1)
63 | input = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True)
64 | if args.cuda:
65 |     input.data = input.data.cuda()
66 | 
67 | with open(args.outf, 'w') as outf:
68 |     for i in range(args.words):
69 |         output, hidden = model(input, hidden)
70 |         word_weights = output.squeeze().data.div(args.temperature).exp().cpu()
71 |         word_idx = torch.multinomial(word_weights, 1)[0]
72 |         input.data.fill_(word_idx)
73 |         word = corpus.dictionary.idx2word[word_idx]
74 | 
75 |         outf.write(word + ('\n' if i % 20 == 19 else ' '))
76 | 
77 |         if i % args.log_interval == 0:
78 |             print('| Generated {}/{} words'.format(i, args.words))
79 | 


--------------------------------------------------------------------------------
/machine-translation/libs/multiverso_/utils.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | from __future__ import print_function
 5 | 
 6 | import os
 7 | import platform
 8 | import ctypes
 9 | from ctypes.util import find_library
10 | 
11 | import numpy as np
12 | 
13 | PACKAGE_PATH = os.path.abspath(os.path.dirname(__file__))
14 | 
15 | 
16 | class Loader(object):
17 |     """
18 |     This loader is responsible for loading multiverso dynamic library in both *nix and windows.
19 |     """
20 | 
21 |     LIB = None
22 | 
23 |     @classmethod
24 |     def _find_mv_path(cls):
25 |         if platform.system() == 'Windows':
26 |             mv_lib_path = find_library('Multiverso')
27 |             if mv_lib_path is None:
28 |                 print('* Fail to load Multiverso.dll from the windows $PATH.'
29 |                       'Because Multiverso.dll can not be found in the $PATH '
30 |                       'directories. Go on loading Multiverso from the package.')
31 |             else:
32 |                 return mv_lib_path
33 | 
34 |             mv_lib_path = os.path.join(PACKAGE_PATH, 'Multiverso.dll')
35 |             if not os.path.exists(mv_lib_path):
36 |                 print('* Fail to load Multiverso.dll from the package. '
37 |                       'Because the file {} can not be found.'.format(mv_lib_path))
38 |             else:
39 |                 return mv_lib_path
40 |         else:
41 |             mv_lib_path = find_library('Multiverso')
42 |             if mv_lib_path is None:
43 |                 print('* Fail to load libmultiverso.so from the system'
44 |                       'libraries. Because libmultiverso.so can\'t be found in'
45 |                       'library paths. Go on loading Multiverso from the package.')
46 |             else:
47 |                 return mv_lib_path
48 | 
49 |             mv_lib_path = os.path.join(PACKAGE_PATH, 'libmultiverso.so')
50 |             if not os.path.exists(mv_lib_path):
51 |                 print('* Fail to load libmultiverso.so from the package. '
52 |                       'Because the file {} can not be found.'.format(mv_lib_path))
53 |             else:
54 |                 return mv_lib_path
55 | 
56 |         return None
57 | 
58 |     @classmethod
59 |     def load_lib(cls):
60 |         mv_lib_path = cls._find_mv_path()
61 |         if mv_lib_path is None:
62 |             print("Fail to load the multiverso library. Please make sure you"
63 |                   "  have installed multiverso successfully")
64 |         else:
65 |             print('Find the multiverso library successfully({})'.format(mv_lib_path))
66 |         return ctypes.cdll.LoadLibrary(mv_lib_path)
67 | 
68 |     @classmethod
69 |     def get_lib(cls):
70 |         if not cls.LIB:
71 |             cls.LIB = cls.load_lib()
72 |             cls.LIB.MV_NumWorkers.restype = ctypes.c_int
73 |         return cls.LIB
74 | 
75 | 
76 | def convert_data(data):
77 |     """Convert the data to float32 ndarray."""
78 |     if not isinstance(data, np.ndarray):
79 |         data = np.array(data)
80 |     return data.astype(np.float32)
81 | 
82 | 
83 | __all__ = [
84 |     'Loader',
85 |     'convert_data',
86 | ]
87 | 


--------------------------------------------------------------------------------
/machine-translation/libs/layers/basic.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | from theano import tensor as T
  5 | 
  6 | from ..utility.utils import _p, normal_weight
  7 | from ..constants import fX
  8 | 
  9 | __author__ = 'fyabc'
 10 | 
 11 | 
 12 | # Some utilities.
 13 | 
 14 | def _slice(_x, n, dim):
 15 |     """Utility function to slice a tensor."""
 16 | 
 17 |     if _x.ndim == 3:
 18 |         return _x[:, :, n * dim:(n + 1) * dim]
 19 |     return _x[:, n * dim:(n + 1) * dim]
 20 | 
 21 | 
 22 | # Activations.
 23 | 
 24 | def tanh(x):
 25 |     return T.tanh(x)
 26 | 
 27 | 
 28 | def linear(x):
 29 |     return x
 30 | 
 31 | 
 32 | # Some helper layers.
 33 | 
 34 | def dropout_layer(state_before, use_noise, trng, dropout_rate=0.5):
 35 |     """Dropout"""
 36 | 
 37 |     projection = T.switch(
 38 |         use_noise,
 39 |         state_before * trng.binomial(state_before.shape, p=(1. - dropout_rate), n=1,
 40 |                                      dtype=state_before.dtype),
 41 |         state_before * (1. - dropout_rate))
 42 |     return projection
 43 | 
 44 | 
 45 | def attention_layer(context_mask, et, ht_1, We_att, Wh_att, Wb_att, U_att, Ub_att):
 46 |     """Attention"""
 47 | 
 48 |     a_network = T.tanh(T.dot(et, We_att) + T.dot(ht_1, Wh_att) + Wb_att)
 49 |     alpha = T.dot(a_network, U_att) + Ub_att
 50 |     alpha = alpha.reshape([alpha.shape[0], alpha.shape[1]])
 51 |     alpha = T.exp(alpha - alpha.max(axis=0, keepdims=True))
 52 |     if context_mask:
 53 |         alpha *= context_mask
 54 |     alpha = alpha / alpha.sum(0, keepdims=True)
 55 |     # if Wp_compress_e:
 56 |     #    ctx_t = (tensor.dot(et, Wp_compress_e) * alpha[:,:,None]).sum(0) # This is the c_t in Baidu's paper
 57 |     # else:
 58 |     #    ctx_t = (et * alpha[:,:,None]).sum(0)
 59 |     ctx_t = (et * alpha[:, :, None]).sum(0)
 60 |     return ctx_t
 61 | 
 62 | 
 63 | def param_init_feed_forward(O, params, prefix='ff', nin=None, nout=None,
 64 |                             orthogonal=True):
 65 |     """feedforward layer: affine transformation + point-wise nonlinearity"""
 66 | 
 67 |     if nin is None:
 68 |         nin = O['dim_proj']
 69 |     if nout is None:
 70 |         nout = O['dim_proj']
 71 |     params[_p(prefix, 'W')] = normal_weight(nin, nout, scale=0.01, orthogonal=orthogonal)
 72 |     params[_p(prefix, 'b')] = np.zeros((nout,), dtype=fX)
 73 | 
 74 |     return params
 75 | 
 76 | 
 77 | def feed_forward(P, state_below, O, prefix='rconv',
 78 |                  activ=tanh, **kwargs):
 79 |     if isinstance(activ, (str, unicode)):
 80 |         activ = eval(activ)
 81 |     return activ(T.dot(state_below, P[_p(prefix, 'W')]) + P[_p(prefix, 'b')])
 82 | 
 83 | 
 84 | def _attention(h1, projected_context_, context_, W_comb_att, U_att, c_tt, context_mask=None):
 85 |     pstate_ = T.dot(h1, W_comb_att)
 86 |     pctx__ = projected_context_ + pstate_[None, :, :]
 87 |     # pctx__ += xc_
 88 |     pctx__ = T.tanh(pctx__)
 89 | 
 90 |     alpha = T.dot(pctx__, U_att) + c_tt
 91 |     alpha = alpha.reshape([alpha.shape[0], alpha.shape[1]])
 92 |     alpha = T.exp(alpha)
 93 |     if context_mask:
 94 |         alpha = alpha * context_mask
 95 |     alpha = alpha / alpha.sum(0, keepdims=True)
 96 |     ctx_ = (context_ * alpha[:, :, None]).sum(0)  # current context
 97 | 
 98 |     return ctx_, alpha
 99 | 
100 | 
101 | __all__ = [
102 |     '_slice',
103 |     'tanh',
104 |     'linear',
105 |     'dropout_layer',
106 |     'attention_layer',
107 |     'param_init_feed_forward',
108 |     'feed_forward',
109 |     '_attention',
110 | ]
111 | 


--------------------------------------------------------------------------------
/language-modeling/weight_drop.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn import Parameter
  3 | from functools import wraps
  4 | 
  5 | class WeightDrop(torch.nn.Module):
  6 |     def __init__(self, module, weights, dropout=0, variational=False):
  7 |         super(WeightDrop, self).__init__()
  8 |         self.module = module
  9 |         self.weights = weights
 10 |         self.dropout = dropout
 11 |         self.variational = variational
 12 |         self._setup()
 13 | 
 14 |     def widget_demagnetizer_y2k_edition(*args, **kwargs):
 15 |         # We need to replace flatten_parameters with a nothing function
 16 |         # It must be a function rather than a lambda as otherwise pickling explodes
 17 |         # We can't write boring code though, so ... WIDGET DEMAGNETIZER Y2K EDITION!
 18 |         # (╯°□°）╯︵ ┻━┻
 19 |         return
 20 | 
 21 |     def _setup(self):
 22 |         # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN
 23 |         if issubclass(type(self.module), torch.nn.RNNBase):
 24 |             self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition
 25 | 
 26 |         for name_w in self.weights:
 27 |             print('Applying weight drop of {} to {}'.format(self.dropout, name_w))
 28 |             w = getattr(self.module, name_w)
 29 |             del self.module._parameters[name_w]
 30 |             self.module.register_parameter(name_w + '_raw', Parameter(w.data))
 31 | 
 32 |     def _setweights(self):
 33 |         for name_w in self.weights:
 34 |             raw_w = getattr(self.module, name_w + '_raw')
 35 |             w = None
 36 |             if self.variational:
 37 |                 mask = torch.autograd.Variable(torch.ones(raw_w.size(0), 1))
 38 |                 if raw_w.is_cuda: mask = mask.cuda()
 39 |                 mask = torch.nn.functional.dropout(mask, p=self.dropout, training=True)
 40 |                 w = mask.expand_as(raw_w) * raw_w
 41 |             else:
 42 |                 w = torch.nn.functional.dropout(raw_w, p=self.dropout, training=self.training)
 43 |             setattr(self.module, name_w, w)
 44 | 
 45 |     def forward(self, *args):
 46 |         self._setweights()
 47 |         return self.module.forward(*args)
 48 | 
 49 | if __name__ == '__main__':
 50 |     import torch
 51 |     from weight_drop import WeightDrop
 52 | 
 53 |     # Input is (seq, batch, input)
 54 |     x = torch.autograd.Variable(torch.randn(2, 1, 10)).cuda()
 55 |     h0 = None
 56 | 
 57 |     ###
 58 | 
 59 |     print('Testing WeightDrop')
 60 |     print('=-=-=-=-=-=-=-=-=-=')
 61 | 
 62 |     ###
 63 | 
 64 |     print('Testing WeightDrop with Linear')
 65 | 
 66 |     lin = WeightDrop(torch.nn.Linear(10, 10), ['weight'], dropout=0.9)
 67 |     lin.cuda()
 68 |     run1 = [x.sum() for x in lin(x).data]
 69 |     run2 = [x.sum() for x in lin(x).data]
 70 | 
 71 |     print('All items should be different')
 72 |     print('Run 1:', run1)
 73 |     print('Run 2:', run2)
 74 | 
 75 |     assert run1[0] != run2[0]
 76 |     assert run1[1] != run2[1]
 77 | 
 78 |     print('---')
 79 | 
 80 |     ###
 81 | 
 82 |     print('Testing WeightDrop with LSTM')
 83 | 
 84 |     wdrnn = WeightDrop(torch.nn.LSTM(10, 10), ['weight_hh_l0'], dropout=0.9)
 85 |     wdrnn.cuda()
 86 | 
 87 |     run1 = [x.sum() for x in wdrnn(x, h0)[0].data]
 88 |     run2 = [x.sum() for x in wdrnn(x, h0)[0].data]
 89 | 
 90 |     print('First timesteps should be equal, all others should differ')
 91 |     print('Run 1:', run1)
 92 |     print('Run 2:', run2)
 93 | 
 94 |     # First time step, not influenced by hidden to hidden weights, should be equal
 95 |     assert run1[0] == run2[0]
 96 |     # Second step should not
 97 |     assert run1[1] != run2[1]
 98 | 
 99 |     print('---')
100 | 


--------------------------------------------------------------------------------
/machine-translation/scripts/moses/truecase.perl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | #
  3 | # This file is part of moses.  Its use is licensed under the GNU Lesser General
  4 | # Public License version 2.1 or, at your option, any later version.
  5 | 
  6 | # $Id: train-recaser.perl 1326 2007-03-26 05:44:27Z bojar $
  7 | 
  8 | use warnings;
  9 | use strict;
 10 | use Getopt::Long "GetOptions";
 11 | 
 12 | binmode(STDIN, ":utf8");
 13 | binmode(STDOUT, ":utf8");
 14 | 
 15 | # apply switches
 16 | # ASR input has no case, make sure it is lowercase, and make sure known are cased eg. 'i' to be uppercased even if i is known
 17 | my ($MODEL, $UNBUFFERED, $ASR);
 18 | die("truecase.perl --model MODEL [-b] [-a] < in > out")
 19 |     unless &GetOptions('model=s' => \$MODEL,'b|unbuffered' => \$UNBUFFERED, 'a|asr' => \$ASR)
 20 |     && defined($MODEL);
 21 | if (defined($UNBUFFERED) && $UNBUFFERED) { $|=1; }
 22 | my $asr = 0;
 23 | if (defined($ASR) && $ASR) { $asr = 1; }
 24 | 
 25 | my (%BEST,%KNOWN);
 26 | open(MODEL,$MODEL) || die("ERROR: could not open '$MODEL'");
 27 | binmode(MODEL, ":utf8");
 28 | while(<MODEL>) {
 29 |   my ($word,@OPTIONS) = split;
 30 |   $BEST{ lc($word) } = $word;
 31 |   if ($asr == 0) {
 32 |     $KNOWN{ $word } = 1;
 33 |     for(my $i=1;$i<$#OPTIONS;$i+=2) {
 34 |       $KNOWN{ $OPTIONS[$i] } = 1;
 35 |     }
 36 |   }
 37 | }
 38 | close(MODEL);
 39 | 
 40 | my %SENTENCE_END = ("."=>1,":"=>1,"?"=>1,"!"=>1);
 41 | my %DELAYED_SENTENCE_START = ("("=>1,"["=>1,"\""=>1,"'"=>1,"&apos;"=>1,"&quot;"=>1,"&#91;"=>1,"&#93;"=>1);
 42 | 
 43 | while(<STDIN>) {
 44 |   chop;
 45 |   my ($WORD,$MARKUP) = split_xml($_);
 46 |   my $sentence_start = 1;
 47 |   for(my $i=0;$i<=$#$WORD;$i++) {
 48 |     print " " if $i && $$MARKUP[$i] eq '';
 49 |     print $$MARKUP[$i];
 50 | 
 51 |     my ($word,$otherfactors);
 52 |     if ($$WORD[$i] =~ /^([^\|]+)(.*)/)
 53 |     {
 54 | 	$word = $1;
 55 | 	$otherfactors = $2;
 56 |     }
 57 |     else
 58 |     {
 59 | 	$word = $$WORD[$i];
 60 | 	$otherfactors = "";
 61 |     }
 62 |     if ($asr){
 63 |       $word = lc($word); #make sure ASR output is not uc
 64 |     }
 65 | 
 66 |     if ($sentence_start && defined($BEST{lc($word)})) {
 67 |       print $BEST{lc($word)}; # truecase sentence start
 68 |     }
 69 |     elsif (defined($KNOWN{$word})) {
 70 |       print $word; # don't change known words
 71 |     }
 72 |     elsif (defined($BEST{lc($word)})) {
 73 |       print $BEST{lc($word)}; # truecase otherwise unknown words
 74 |     }
 75 |     else {
 76 |       print $word; # unknown, nothing to do
 77 |     }
 78 |     print $otherfactors;
 79 | 
 80 |     if    ( defined($SENTENCE_END{ $word }))           { $sentence_start = 1; }
 81 |     elsif (!defined($DELAYED_SENTENCE_START{ $word })) { $sentence_start = 0; }
 82 |   }
 83 |   print $$MARKUP[$#$MARKUP];
 84 |   print "\n";
 85 | }
 86 | 
 87 | # store away xml markup
 88 | sub split_xml {
 89 |   my ($line) = @_;
 90 |   my (@WORD,@MARKUP);
 91 |   my $i = 0;
 92 |   $MARKUP[0] = "";
 93 |   while($line =~ /\S/) {
 94 |     # XML tag
 95 |     if ($line =~ /^\s*(<\S[^>]*>)(.*)$/) {
 96 |       my $potential_xml = $1;
 97 |       my $line_next = $2;
 98 |       # exception for factor that is an XML tag
 99 |       if ($line =~ /^\S/ && scalar(@WORD)>0 && $WORD[$i-1] =~ /\|$/) {
100 | 	$WORD[$i-1] .= $potential_xml;
101 | 	if ($line_next =~ /^(\|+)(.*)$/) {
102 | 	  $WORD[$i-1] .= $1;
103 | 	  $line_next = $2;
104 | 	}
105 |       }
106 |       else {
107 |         $MARKUP[$i] .= $potential_xml." ";
108 |       }
109 |       $line = $line_next;
110 |     }
111 |     # non-XML text
112 |     elsif ($line =~ /^\s*([^\s<>]+)(.*)$/) {
113 |       $WORD[$i++] = $1;
114 |       $MARKUP[$i] = "";
115 |       $line = $2;
116 |     }
117 |     # '<' or '>' occurs in word, but it's not an XML tag
118 |     elsif ($line =~ /^\s*(\S+)(.*)$/) {
119 |       $WORD[$i++] = $1;
120 |       $MARKUP[$i] = "";
121 |       $line = $2;
122 |       }
123 |     else {
124 |       die("ERROR: huh? $line\n");
125 |     }
126 |   }
127 |   chop($MARKUP[$#MARKUP]);
128 |   return (\@WORD,\@MARKUP);
129 | }
130 | 


--------------------------------------------------------------------------------
/language-modeling/test-model.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | import math
  4 | import numpy as np
  5 | np.random.seed(331)
  6 | import torch
  7 | import torch.nn as nn
  8 | from torch.autograd import Variable
  9 | 
 10 | import data
 11 | import model
 12 | 
 13 | from utils import batchify, get_batch, repackage_hidden, message, set_log_file
 14 | 
 15 | parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model')
 16 | parser.add_argument('--data', type=str, default='data/penn/',
 17 |                     help='location of the data corpus')
 18 | parser.add_argument('--seed', type=int, default=1111,
 19 |                     help='random seed')
 20 | parser.add_argument('--cuda', action='store_false',
 21 |                     help='use CUDA')
 22 | parser.add_argument('--load', type=str,  default='',
 23 |                     help='path to load the fine-tune model')
 24 | parser.add_argument('--log-file', type=str,  default='',
 25 |                     help='path to save the log')
 26 | parser.add_argument('--compression', type=str,  default='',
 27 |                     help='compress the model, (svd, precision)')
 28 | parser.add_argument('--compression-k', type=float,  default=None,
 29 |                     help='compression parameter')
 30 | args = parser.parse_args()
 31 | 
 32 | set_log_file(args.log_file)
 33 | 
 34 | # Set the random seed manually for reproducibility.
 35 | torch.manual_seed(args.seed)
 36 | if torch.cuda.is_available():
 37 |     if not args.cuda:
 38 |         message("WARNING: You have a CUDA device, so you should probably run with --cuda")
 39 |     else:
 40 |         torch.cuda.manual_seed(args.seed)
 41 | 
 42 | ###############################################################################
 43 | # Load data
 44 | ###############################################################################
 45 | args.bptt = 70
 46 | corpus = data.Corpus(args.data)
 47 | train_batch_size = 20
 48 | eval_batch_size = 10
 49 | test_batch_size = 1
 50 | train_data = batchify(corpus.train, train_batch_size, args)
 51 | val_data = batchify(corpus.valid, eval_batch_size, args)
 52 | test_data = batchify(corpus.test, test_batch_size, args)
 53 | criterion = nn.CrossEntropyLoss()
 54 | def evaluate(data_source, batch_size=10):
 55 |     # Turn on evaluation mode which disables dropout.
 56 |     # if args.model == 'QRNN': model.reset()
 57 |     model.eval()
 58 |     total_loss = 0
 59 |     ntokens = len(corpus.dictionary)
 60 |     hidden = model.init_hidden(batch_size)
 61 |     for i in range(0, data_source.size(0) - 1, args.bptt):
 62 |         data, targets = get_batch(data_source, i, args, evaluation=True)
 63 |         output, hidden = model(data, hidden)
 64 |         output_flat = output.view(-1, ntokens)
 65 |         total_loss += len(data) * criterion(output_flat, targets).data
 66 |         hidden = repackage_hidden(hidden)
 67 |     return total_loss[0] / len(data_source)
 68 | 
 69 | # Load the best saved model.
 70 | with open(args.load, 'rb') as f:
 71 |     model = torch.load(f)
 72 | 
 73 | def compression(x):
 74 |     if args.compression == 'precision':
 75 |         x.mul_(1.0 / args.compression_k).round_().mul_(args.compression_k)
 76 |     if args.compression == 'svd':
 77 |         u, s, v = torch.svd(x)
 78 |         r = int(args.compression_k)
 79 |         x_low_rank = torch.mm((u * s).narrow(1, 0, r),
 80 |                               v.narrow(1, 0, r).transpose_(0, 1))
 81 |         x.copy_(x_low_rank)
 82 | 
 83 | for x in model.rnns:
 84 |     weight_hh = getattr(x, 'cell_0').weight_hh.data
 85 |     hidden_size = weight_hh.size()[1] // 4
 86 |     print('hidden_size =', hidden_size)
 87 |     compression(weight_hh.narrow(1, 0, hidden_size))
 88 |     compression(weight_hh.narrow(1, hidden_size, hidden_size))
 89 |     print('weight_hh', weight_hh)
 90 |     weight_ih = getattr(x, 'cell_0').weight_ih.data
 91 |     hidden_size = weight_ih.size()[1] // 4
 92 |     compression(weight_ih.narrow(1, 0, hidden_size))
 93 |     compression(weight_ih.narrow(1, hidden_size, hidden_size))
 94 |     print('weight_ih', weight_ih)
 95 | 
 96 | # Run on test data.
 97 | test_loss = evaluate(test_data, test_batch_size)
 98 | message('=' * 89)
 99 | message('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
100 |     test_loss, math.exp(test_loss)))
101 | message('=' * 89)
102 | 


--------------------------------------------------------------------------------
/machine-translation/libs/multiverso_/tests/test_multiverso.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | """Unittest file of multiverso library.
  5 | 
  6 | If you want to run this test, you should install the multiverso package at first.
  7 | 
  8 | Unused now.
  9 | """
 10 | 
 11 | from __future__ import print_function
 12 | 
 13 | 
 14 | import multiverso_ as mv
 15 | import unittest
 16 | import numpy as np
 17 | import theano
 18 | from multiverso_.theano_ext import sharedvar
 19 | 
 20 | __author__ = 'fyabc'
 21 | 
 22 | 
 23 | def setUpModule():
 24 |     mv.init()
 25 | 
 26 | 
 27 | def tearDownModule():
 28 |     mv.shutdown()
 29 | 
 30 | 
 31 | class TestMultiversoTables(unittest.TestCase):
 32 |     """
 33 |     Use the commands below to run test
 34 |     $ nosetests
 35 |     """
 36 | 
 37 |     def _test_array(self, size):
 38 |         tbh = mv.ArrayTableHandler(size)
 39 |         mv.barrier()
 40 | 
 41 |         for i in xrange(100):
 42 |             tbh.add(range(1, size + 1))
 43 |             tbh.add(range(1, size + 1))
 44 |             mv.barrier()
 45 |             for j, actual in enumerate(tbh.get()):
 46 |                 self.assertEqual((j + 1) * (i + 1) * 2 * mv.workers_num(), actual)
 47 |             mv.barrier()
 48 | 
 49 |     def test_small_array(self):
 50 |         # TODO : this is not supported by multiverso because of the size
 51 |         # limited. Waiting for the solution of this issue
 52 |         # https://github.com/Microsoft/multiverso/issues/69
 53 | 
 54 |         # self._test_array(1)
 55 |         pass
 56 | 
 57 |     def test_array(self):
 58 |         self._test_array(10000)
 59 | 
 60 |     def test_matrix(self):
 61 |         num_row = 11
 62 |         num_col = 10
 63 |         size = num_col * num_row
 64 |         workers_num = mv.workers_num()
 65 |         tbh = mv.MatrixTableHandler(num_row, num_col)
 66 |         mv.barrier()
 67 |         for count in xrange(1, 21):
 68 |             row_ids = [0, 1, 5, 10]
 69 |             tbh.add(range(size))
 70 |             tbh.add([range(rid * num_col, (1 + rid) * num_col) for rid in row_ids], row_ids)
 71 |             mv.barrier()
 72 |             data = tbh.get()
 73 |             mv.barrier()
 74 |             for i, row in enumerate(data):
 75 |                 for j, actual in enumerate(row):
 76 |                     expected = (i * num_col + j) * count * workers_num
 77 |                     if i in row_ids:
 78 |                         expected += (i * num_col + j) * count * workers_num
 79 |                     self.assertEqual(expected, actual)
 80 |             data = tbh.get(row_ids)
 81 |             mv.barrier()
 82 |             for i, row in enumerate(data):
 83 |                 for j, actual in enumerate(row):
 84 |                     expected = (row_ids[i] * num_col + j) * count * workers_num * 2
 85 |                     self.assertEqual(expected, actual)
 86 | 
 87 | 
 88 | class TestMultiversoSharedVariable(unittest.TestCase):
 89 |     """
 90 |     Use the commands below to run test
 91 |     $ nosetests
 92 |     """
 93 | 
 94 |     def _test_sharedvar(self, row, col):
 95 |         W = sharedvar.mv_shared(
 96 |             value=np.zeros(
 97 |                 (row, col),
 98 |                 dtype=theano.config.floatX
 99 |             ),
100 |             name='W',
101 |             borrow=True
102 |         )
103 |         delta = np.array(range(1, row * col + 1),
104 |                         dtype=theano.config.floatX).reshape((row, col))
105 |         train_model = theano.function([], updates=[(W, W + delta)])
106 |         mv.barrier()
107 | 
108 |         for i in xrange(100):
109 |             train_model()
110 |             train_model()
111 |             sharedvar.sync_all_mv_shared_vars()
112 |             mv.barrier()
113 |             # to get the newest value, we must sync again
114 |             sharedvar.sync_all_mv_shared_vars()
115 |             for j, actual in enumerate(W.get_value().reshape(-1)):
116 |                 self.assertEqual((j + 1) * (i + 1) * 2 * mv.workers_num(), actual)
117 |             mv.barrier()
118 | 
119 |     def test_sharedvar(self):
120 |         self._test_sharedvar(200, 200)
121 | 
122 | 
123 | if __name__ == '__main__':
124 |     unittest.main()
125 | 


--------------------------------------------------------------------------------
/machine-translation/libs/utility/data_iterator.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | 
  3 | import cPickle as pkl
  4 | import gzip
  5 | 
  6 | 
  7 | def fopen(filename, mode='r'):
  8 |     if filename.endswith('.gz'):
  9 |         return gzip.open(filename, mode)
 10 |     return open(filename, mode)
 11 | 
 12 | 
 13 | class TextIterator:
 14 |     """Simple Bitext iterator."""
 15 | 
 16 |     def __init__(self, source, target,
 17 |                  source_dict, target_dict,
 18 |                  batch_size=128,
 19 |                  n_words_source=-1,
 20 |                  n_words_target=-1,
 21 |                  maxlen=1000000):
 22 |         self.source = fopen(source, 'r')
 23 |         self.target = fopen(target, 'r')
 24 |         with open(source_dict, 'rb') as f:
 25 |             self.source_dict = pkl.load(f)
 26 |         with open(target_dict, 'rb') as f:
 27 |             self.target_dict = pkl.load(f)
 28 | 
 29 |         self.batch_size = batch_size
 30 |         self.maxlen = maxlen
 31 | 
 32 |         self.n_words_source = n_words_source
 33 |         self.n_words_target = n_words_target
 34 | 
 35 |         self.source_buffer = []
 36 |         self.target_buffer = []
 37 |         self.k = batch_size * 40
 38 | 
 39 |         self.end_of_data = False
 40 | 
 41 |     def __iter__(self):
 42 |         return self
 43 | 
 44 |     def reset(self):
 45 |         self.source.seek(0)
 46 |         self.target.seek(0)
 47 | 
 48 |     def next(self):
 49 |         if self.end_of_data:
 50 |             self.end_of_data = False
 51 |             self.reset()
 52 |             raise StopIteration
 53 | 
 54 |         source = []
 55 |         target = []
 56 | 
 57 |         # fill buffer, if it's empty
 58 |         assert len(self.source_buffer) == len(self.target_buffer), 'Buffer size mismatch!'
 59 | 
 60 |         if len(self.source_buffer) == 0:
 61 |             for k_ in xrange(self.k):
 62 |                 ss = self.source.readline()
 63 |                 if ss == "":
 64 |                     break
 65 |                 tt = self.target.readline()
 66 |                 if tt == "":
 67 |                     break
 68 | 
 69 |                 ss = ss.strip().split()
 70 |                 tt = tt.strip().split()
 71 | 
 72 |                 self.source_buffer.append(ss)
 73 |                 self.target_buffer.append(tt)
 74 | 
 75 |             # sort by target buffer
 76 |             tlen = numpy.array([len(t) for t in self.target_buffer])
 77 |             tidx = tlen.argsort()
 78 | 
 79 |             _sbuf = [self.source_buffer[i] for i in tidx]
 80 |             _tbuf = [self.target_buffer[i] for i in tidx]
 81 | 
 82 |             self.source_buffer = _sbuf
 83 |             self.target_buffer = _tbuf
 84 | 
 85 |         if len(self.source_buffer) == 0 or len(self.target_buffer) == 0:
 86 |             self.end_of_data = False
 87 |             self.reset()
 88 |             raise StopIteration
 89 | 
 90 |         try:
 91 |             # actual work here
 92 |             while True:
 93 |                 # read from source file and map to word index
 94 |                 try:
 95 |                     ss = self.source_buffer.pop()
 96 |                 except IndexError:
 97 |                     break
 98 |                 ss = [self.source_dict.get(w, 1)
 99 |                       for w in ss]
100 |                 if self.n_words_source > 0:
101 |                     ss = [w if w < self.n_words_source else 1 for w in ss]
102 | 
103 |                 # read from target file and map to word index
104 |                 tt = self.target_buffer.pop()
105 |                 tt = [self.target_dict.get(w, 1)
106 |                       for w in tt]
107 |                 if self.n_words_target > 0:
108 |                     tt = [w if w < self.n_words_target else 1 for w in tt]
109 | 
110 |                 if len(ss) > self.maxlen or len(tt) > self.maxlen:
111 |                     continue
112 | 
113 |                 source.append(ss)
114 |                 target.append(tt)
115 | 
116 |                 if len(source) >= self.batch_size or \
117 |                         len(target) >= self.batch_size:
118 |                     break
119 |         except IOError:
120 |             self.end_of_data = True
121 | 
122 |         if len(source) <= 0 or len(target) <= 0:
123 |             self.end_of_data = False
124 |             self.reset()
125 |             raise StopIteration
126 | 
127 |         return source, target
128 | 


--------------------------------------------------------------------------------
/machine-translation/scripts/plot_cost.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | import sys
  7 | import argparse
  8 | 
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | __author__ = 'fyabc'
 12 | 
 13 | 
 14 | def average(l):
 15 |     if not l:
 16 |         return 0.0
 17 |     return sum(l) / len(l)
 18 | 
 19 | 
 20 | def plot(args):
 21 |     """Logging format:
 22 | 
 23 |     Epoch 0 Update 52 Cost 219.29276 G2 1483.47644 UD 2.78200 Time 127.66500 s
 24 |     """
 25 | 
 26 |     colors = ['c', 'r', 'y', 'k', 'b', 'g']
 27 | 
 28 |     for f_idx, filename in enumerate(args.filenames):
 29 |         with open(filename, 'r') as f:
 30 |             iterations = []
 31 |             costs = []
 32 | 
 33 |             valid_iterations = []
 34 |             valid_costs = []
 35 |             small_train_costs = []
 36 | 
 37 |             for line in f:
 38 |                 if line.startswith('Epoch'):
 39 |                     words = line.split()
 40 |                     iterations.append(int(words[3]))
 41 |                     costs.append(float(words[5]))
 42 |                 elif line.startswith('Valid'):
 43 |                     words = line.split()
 44 |                     valid_iterations.append(iterations[-1] if iterations else 0)
 45 |                     valid_costs.append(words[2])
 46 |                     small_train_costs.append(words[6])
 47 | 
 48 |             avg_costs = [average(costs[max(0, i - args.average): i]) for i in xrange(len(costs))]
 49 | 
 50 |             # Get intervals
 51 |             iterations = [iterations[i] for i in xrange(0, len(iterations), args.interval)]
 52 |             avg_costs = [avg_costs[i] for i in xrange(0, len(avg_costs), args.interval)]
 53 | 
 54 |             if args.train:
 55 |                 plt.plot(iterations, avg_costs,
 56 |                          '{}-'.format(colors[f_idx]), label='{}_train'.format(filename))
 57 |             if args.valid:
 58 |                 plt.plot(valid_iterations, valid_costs,
 59 |                          '{}--'.format(colors[f_idx]), label='{}_valid'.format(filename))
 60 |             if args.small_train:
 61 |                 plt.plot(valid_iterations, small_train_costs,
 62 |                          '{}-.'.format(colors[f_idx]), label='{}_small_train'.format(filename))
 63 | 
 64 |     plt.xlim(xmin=args.xmin, xmax=args.xmax)
 65 |     plt.ylim(ymin=args.ymin, ymax=args.ymax)
 66 | 
 67 |     plt.minorticks_on()
 68 | 
 69 |     plt.title('Costs')
 70 |     plt.legend(loc='upper right')
 71 | 
 72 |     plt.grid(which='both')
 73 | 
 74 |     plt.show()
 75 | 
 76 | 
 77 | def main(args=None):
 78 |     parser = argparse.ArgumentParser(description='Plot cost curve.')
 79 |     parser.add_argument('filenames', nargs='+', help='The logging filenames')
 80 |     parser.add_argument('-a', '--average', action='store', metavar='average', dest='average', type=int, default=40,
 81 |                         help='The moving average interval, default is %(default)s')
 82 |     parser.add_argument('-i', '--interval', action='store', metavar='interval', dest='interval', type=int, default=100,
 83 |                         help='The display interval of train curve, default is %(default)s')
 84 |     parser.add_argument('-y', '--ymin', action='store', dest='ymin', type=float, default=None,
 85 |                         help='The y min value (default is %(default)s)')
 86 |     parser.add_argument('-Y', '--ymax', action='store', dest='ymax', type=float, default=None,
 87 |                         help='The y max value (default is %(default)s)')
 88 |     parser.add_argument('-x', '--xmin', action='store', dest='xmin', type=int, default=None,
 89 |                         help='The x min value (default is %(default)s)')
 90 |     parser.add_argument('-X', '--xmax', action='store', dest='xmax', type=int, default=None,
 91 |                         help='The x max value (default is %(default)s)')
 92 |     parser.add_argument('-T', action='store_false', default=True, dest='train',
 93 |                         help='Plot train curve, default is True, set to False')
 94 |     parser.add_argument('-V', action='store_false', default=True, dest='valid',
 95 |                         help='Plot valid curve, default is True, set to False')
 96 |     parser.add_argument('-S', action='store_false', default=True, dest='small_train',
 97 |                         help='Plot small train curve, default is True, set to False')
 98 | 
 99 |     args = parser.parse_args(args)
100 | 
101 |     plot(args)
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     main()
106 | 


--------------------------------------------------------------------------------
/machine-translation/libs/multiverso_/theano_ext/sharedvar.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | from theano.tensor.basic import TensorType, _tensor_py_operators
  5 | from theano.compile import SharedVariable
  6 | from theano.compile.sharedvalue import shared
  7 | from theano.gof import Variable, utils
  8 | import numpy
  9 | 
 10 | from .. import *
 11 | 
 12 | 
 13 | class MVSharedVariable(object):
 14 |     """MVSharedVariable is an wrapper of SharedVariable
 15 | 
 16 |     It will act same as SharedVariable. The only difference is a multiverso
 17 |     ArrayTable is addded to make it easier to sync values.
 18 |     """
 19 | 
 20 |     def __init__(self, svobj):
 21 |         """Constructor of the MVSharedVariable
 22 | 
 23 |         The constructor will create ArrayTableHandler and associate the shared
 24 |         variable with it. The initial value of ArrayTableHandler will be same
 25 |         as the value of SharedVariable. If different initial value is used in
 26 |         different processes, the average of them will be used as the initial
 27 |         value
 28 |         """
 29 |         assert (isinstance(svobj, SharedVariable))
 30 |         self._svobj = svobj
 31 |         self._mv_array = ArrayTableHandler(self._svobj.get_value().size,
 32 |                                            init_value=self._svobj.get_value().reshape((-1,)))
 33 | 
 34 |         barrier()  # add barrier to make sure the initial values have token effect
 35 |         # _last_mv_data restore a copy of value. It will be used for calculate
 36 |         # the update for multiverso when calling mv_sync
 37 |         self._last_mv_data = self._mv_array.get().reshape(self._svobj.get_value().shape)
 38 |         self._svobj.set_value(self._last_mv_data, borrow=False)
 39 | 
 40 |     def mv_sync(self):
 41 |         """ sync values with multiverso server
 42 | 
 43 |         mv_sync will add the delta of SharedVariable, which is usually the
 44 |         gradients in typical examples, to parameter server and then get the
 45 |         latest value in multiverso.
 46 |         """
 47 |         # because multiverso always use add method to sync value, the delta
 48 |         # will be the difference of the current value of last synced value
 49 |         self._mv_array.add(self._svobj.get_value() - self._last_mv_data)
 50 | 
 51 |         self._svobj.set_value(self._mv_array.get().reshape(self._svobj.get_value().shape))
 52 |         self._last_mv_data = self._svobj.get_value(borrow=False)
 53 | 
 54 |     def __getstate__(self):
 55 |         """This is for cPickle to store state.
 56 | 
 57 |         It is usually called when you want to dump the model to file with
 58 |         cPickle
 59 |         """
 60 |         odict = self.__dict__.copy()  # copy the dict since we change it
 61 |         del odict['_mv_array']  # remove mv_array, because we can't pickle it
 62 |         return odict
 63 | 
 64 |     def __getattribute__(self, attr):
 65 |         """This function make MVSharedVariable act same as SharedVariable"""
 66 |         if attr in ['_svobj', '_mv_array', '_last_mv_data']:
 67 |             # If get the attribute of self, use parent __getattribute__ to get
 68 |             # attribute from the object, otherwise it will fall into infinite
 69 |             # loop
 70 |             return object.__getattribute__(self, attr)
 71 |         elif attr in ['mv_sync', "__getstate__"]:
 72 |             # If call method of MVSharedVariable, then call the method directly
 73 |             # and bound the method to self object
 74 |             return getattr(MVSharedVariable, attr).__get__(self)
 75 |         else:
 76 |             # Otherwise I will get attribute from the wrapped object
 77 |             return getattr(self._svobj, attr)
 78 | 
 79 | 
 80 | def mv_shared(*args, **kwargs):
 81 |     """mv_shared works same as `theano.shared`
 82 | 
 83 |     It calls `theano.shared` to create the SharedVariable and use
 84 |     MVSharedVariable to wrap it.
 85 |     """
 86 |     var = shared(*args, **kwargs)
 87 |     mv_shared.shared_vars.append(MVSharedVariable(var))
 88 |     return var
 89 | 
 90 | 
 91 | mv_shared.shared_vars = []  # all shared_vars in multiverso will be recorded here
 92 | 
 93 | 
 94 | def sync_all_mv_shared_vars():
 95 |     """Sync shared value created by `mv_shared` with multiverso
 96 | 
 97 |     It is often used when you are training model, and it will add the gradients
 98 |     (delta value) to the server and update the latest value from the server.
 99 |     Notice: It will **only** sync shared value created by `mv_shared`
100 |     """
101 |     for sv in mv_shared.shared_vars:
102 |         sv.mv_sync()
103 | 
104 | 
105 | __all__ = [
106 |     'MVSharedVariable',
107 |     'mv_shared',
108 |     'sync_all_mv_shared_vars',
109 | ]
110 | 


--------------------------------------------------------------------------------
/machine-translation/translate_compressed.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Build a neural machine translation model with soft attention
  3 | """
  4 | 
  5 | import cPickle as pkl
  6 | import copy
  7 | import os
  8 | import sys
  9 | import time
 10 | import math
 11 | from pprint import pprint
 12 | 
 13 | import numpy as np
 14 | import theano
 15 | import theano.tensor as tensor
 16 | 
 17 | from libs.config import DefaultOptions
 18 | from libs.utility.utils import *
 19 | 
 20 | from libs.utility.translate import translate_dev_get_bleu
 21 | from libs.models import NMTModel
 22 | 
 23 | def test(model_name, beam_size, reload_=True, Hard = False, k = -1, type = None):
 24 |     print(1)
 25 |     with open('%s.pkl' % model_name, 'rb') as f:
 26 |         model_options = DefaultOptions.copy()
 27 |         model_options.update(pkl.load(f))
 28 |     #model_options['temperature'] = 1.0
 29 |     #model_options['scale'] = 1.0
 30 |     #model_options['gate_dropout'] = 0.0
 31 |     #model_options['fix_dp_bug'] = False
 32 |     print(2)
 33 |     model = NMTModel(model_options)
 34 |     #model.O['small_train_datasets'] = (r'\\GCR\Scratch\RR1\dihe\stochastic_lstm\data\test\test.de-en.bpe.25000.de', r'\\GCR\Scratch\RR1\dihe\stochastic_lstm\data\test\test.de-en.bpe.25000.en',) + (r'\\GCR\Scratch\RR1\dihe\stochastic_lstm\data\test\test.de-en.en',)
 35 |     print(3)
 36 |     params = model.initializer.init_params()
 37 |     print(4)
 38 |     params = load_params_v2(model_name, params, k, type)
 39 |     print(5)
 40 |     model.init_tparams(params)
 41 |     print(6)
 42 |     print(model_options)
 43 |     check_options(model_options)
 44 | 
 45 |     trng, use_noise, stochastic_mode, hyper_param,\
 46 |         x, x_mask, y, y_mask, \
 47 |         opt_ret, \
 48 |         cost, test_cost, x_emb, stochastic_updates, _ = model.build_model()
 49 | 
 50 |     print 'Building sampler'
 51 |     f_init, f_next = model.build_sampler(trng=trng, use_noise=use_noise, batch_mode=True, stochastic_mode=stochastic_mode, hyper_param=hyper_param)
 52 | 
 53 |     uidx = search_start_uidx(reload_, model_name)
 54 | 
 55 |     if Hard:
 56 |         stochastic_mode.set_value(2)
 57 |         bleu_hard = translate_dev_get_bleu(model, f_init, f_next, trng, use_noise, beam_size, alpha)
 58 |         message('{} {} BLEU = {:.2f} at uidx {} beam_size = {}'.format(type, k, bleu_hard, uidx, beam_size))
 59 |         sys.stdout.flush()
 60 |         bleu_soft = 0.0
 61 |     else:
 62 |         stochastic_mode.set_value(0)
 63 |         alpha = 1.0
 64 |         while True:
 65 |           bleu_soft = translate_dev_get_bleu(model, f_init, f_next, trng, use_noise, beam_size, alpha)
 66 |           message('{} {} {} BLEU = {:.2f} at uidx {} beam_size = {}'.format(type, k, alpha, bleu_soft, uidx, beam_size))
 67 |           sys.stdout.flush()
 68 |           break
 69 |           
 70 |         bleu_hard = 0.0
 71 | 
 72 |     return (bleu_soft, bleu_hard)
 73 | 
 74 | 
 75 | if __name__ == '__main__':
 76 |     _filename = sys.argv[1]
 77 |     beam_size = int(sys.argv[2])
 78 |     filename = _filename
 79 |       
 80 |     print filename
 81 |     test(filename, beam_size, Hard=False)      
 82 |     #test(filename, beam_size, Hard=False, k = 10000.0, type = 'precision')
 83 |     #test(filename, beam_size, Hard=False, k = 1000.0, type = 'precision')
 84 |     #test(filename, beam_size, Hard=False, k = 100.0, type = 'precision')
 85 |     #test(filename, beam_size, beam_size, k=1.0, type='precision')
 86 |     #test(filename, beam_size, Hard=False, k = 256, type = 'rank')
 87 |     #test(filename, beam_size, Hard=False, k = 128, type = 'rank')
 88 |     #test(filename, beam_size, k=32, type='rank')
 89 |     #test(filename, beam_size, k=16, type='rank')
 90 |       
 91 |     #idx += 10000
 92 | 
 93 |     '''
 94 |     pathDir = os.listdir(sys.argv[1])
 95 |     key = sys.argv[1].split('\\')[-1]
 96 |     dict = []
 97 | 
 98 |     fid = open('res.' + key + '.txt', 'r')
 99 |     while True:
100 |         line = fid.readline()
101 |         if line == '':
102 |             break
103 |         line = line.split('\t')
104 |         dict.append(line[0] + line[1])
105 |     fid.close()
106 | 
107 |     for dir in pathDir:
108 |         child_dir = os.path.join('%s\\%s' % (sys.argv[1], dir))
109 |         _dir = dir.split('\\')[-1]
110 |         pathDir2 = os.listdir(child_dir)
111 |         for file in pathDir2:
112 |             filename = os.path.join('%s\\%s' % (child_dir, file))
113 |             if 'iter' in filename and'npz' in filename and 'pkl' not in filename:
114 |                 if dir + file not in dict:
115 |                     print "testing " + dir + " " + file
116 |                     res = test(filename, Hard = True)
117 |                     fid = open('res.' + key + '.txt', 'a')
118 |                     fid.writelines(dir + '\t' + file + '\t' + str(res[0]) + '\t' + str(res[1]) + '\n')
119 |                     fid.close()
120 |     '''


--------------------------------------------------------------------------------
/machine-translation/scripts/moses/train-truecaser.perl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | #
  3 | # This file is part of moses.  Its use is licensed under the GNU Lesser General
  4 | # Public License version 2.1 or, at your option, any later version.
  5 | 
  6 | # $Id: train-recaser.perl 1326 2007-03-26 05:44:27Z bojar $
  7 | 
  8 | #
  9 | # Options:
 10 | #
 11 | # --possiblyUseFirstToken : boolean option; the default behaviour (when this
 12 | # option is not provided) is that the first token of a sentence is ignored, on
 13 | # the basis that the first word of a sentence is always capitalized; if this
 14 | # option is provided then: a) if a sentence-initial token is *not* capitalized,
 15 | # then it is counted, and b) if a capitalized sentence-initial token is the
 16 | # only token of the segment, then it is counted, but with only 10% of the
 17 | # weight of a normal token.
 18 | 
 19 | use warnings;
 20 | use strict;
 21 | use Getopt::Long "GetOptions";
 22 | 
 23 | # apply switches
 24 | my ($MODEL,$CORPUS);
 25 | die("train-truecaser.perl --model truecaser --corpus cased [--possiblyUseFirstToken]")
 26 |     unless &GetOptions('corpus=s' => \$CORPUS,
 27 |                        'model=s' => \$MODEL,
 28 |                        'possiblyUseFirstToken' => \(my $possiblyUseFirstToken = 0))
 29 |     && defined($CORPUS) && defined($MODEL);
 30 | my %CASING;
 31 | my %SENTENCE_END = ("."=>1,":"=>1,"?"=>1,"!"=>1);
 32 | my %DELAYED_SENTENCE_START = ("("=>1,"["=>1,"\""=>1,"'"=>1,"&apos;"=>1,"&quot;"=>1,"&#91;"=>1,"&#93;"=>1);
 33 | open(CORPUS,$CORPUS) || die("ERROR: could not open '$CORPUS'");
 34 | binmode(CORPUS, ":utf8");
 35 | while(<CORPUS>) {
 36 |   chop;
 37 |   my ($WORD,$MARKUP) = split_xml($_);
 38 |   my $start = 0;
 39 |   while($start<=$#$WORD && defined($DELAYED_SENTENCE_START{$$WORD[$start]})) { $start++; }
 40 |   my $firstWordOfSentence = 1;
 41 |   for(my $i=$start;$i<=$#$WORD;$i++) {
 42 |     my $currentWord = $$WORD[$i];
 43 |     if (! $firstWordOfSentence && defined($SENTENCE_END{$$WORD[$i-1]})) {
 44 |       $firstWordOfSentence = 1;
 45 |     }
 46 | 
 47 |     if ($currentWord !~ /[\p{Ll}\p{Lu}\p{Lt}]/) {
 48 |       # skip words with nothing to case
 49 |       $firstWordOfSentence = 0;
 50 |       next;
 51 |     }
 52 | 
 53 |     my $currentWordWeight = 0;
 54 |     if (! $firstWordOfSentence) {
 55 |       $currentWordWeight = 1;
 56 |     } elsif ($possiblyUseFirstToken) {
 57 |       # gated special handling of first word of sentence
 58 |       my $firstChar = substr($currentWord, 0, 1);
 59 |       if (lc($firstChar) eq $firstChar) {
 60 |         # if the first character is not upper case, count the token as full evidence (because if it's not capitalized, then there's no reason to be wary that the given casing is only due to being sentence-initial)
 61 | 	$currentWordWeight = 1;
 62 |       } elsif (scalar(@$WORD) == 1) {
 63 | 	# if the first character is upper case, but the current token is the only token of the segment, then count the token as partial evidence (because the segment is presumably not a sentence and the token is therefore not the first word of a sentence and is possibly in its natural case)
 64 | 	$currentWordWeight = 0.1;
 65 |       }
 66 |     }
 67 |     if ($currentWordWeight > 0) {
 68 |       $CASING{ lc($currentWord) }{ $currentWord } += $currentWordWeight;
 69 |     }
 70 | 
 71 |     $firstWordOfSentence = 0;
 72 |   }
 73 | }
 74 | close(CORPUS);
 75 | 
 76 | open(MODEL,">$MODEL") || die("ERROR: could not create '$MODEL'");
 77 | binmode(MODEL, ":utf8");
 78 | foreach my $type (keys %CASING) {
 79 |   my ($score,$total,$best) = (-1,0,"");
 80 |   foreach my $word (keys %{$CASING{$type}}) {
 81 |     my $count = $CASING{$type}{$word};
 82 |     $total += $count;
 83 |     if ($count > $score) {
 84 |       $best = $word;
 85 |       $score = $count;
 86 |     }
 87 |   }
 88 |   print MODEL "$best ($score/$total)";
 89 |   foreach my $word (keys %{$CASING{$type}}) {
 90 |     print MODEL " $word ($CASING{$type}{$word})" unless $word eq $best;
 91 |   }
 92 |   print MODEL "\n";
 93 | }
 94 | close(MODEL);
 95 | 
 96 | 
 97 | # store away xml markup
 98 | sub split_xml {
 99 |   my ($line) = @_;
100 |   my (@WORD,@MARKUP);
101 |   my $i = 0;
102 |   $MARKUP[0] = "";
103 |   while($line =~ /\S/) {
104 |     # XML tag
105 |     if ($line =~ /^\s*(<\S[^>]*>)(.*)$/) {
106 |       $MARKUP[$i] .= $1." ";
107 |       $line = $2;
108 |     }
109 |     # non-XML text
110 |     elsif ($line =~ /^\s*([^\s<>]+)(.*)$/) {
111 |       $WORD[$i++] = $1;
112 |       $MARKUP[$i] = "";
113 |       $line = $2;
114 |     }
115 |     # '<' or '>' occurs in word, but it's not an XML tag
116 |     elsif ($line =~ /^\s*(\S+)(.*)$/) {
117 |       $WORD[$i++] = $1;
118 |       $MARKUP[$i] = "";
119 |       $line = $2;
120 |       }
121 |     else {
122 |       die("ERROR: huh? $line\n");
123 |     }
124 |   }
125 |   chop($MARKUP[$#MARKUP]);
126 |   return (\@WORD,\@MARKUP);
127 | }
128 | 


--------------------------------------------------------------------------------
/machine-translation/seq_translate.py:
--------------------------------------------------------------------------------
  1 | # usage: python seq_translate.py model_prefix --start start_iteration --end end_iteration --gap interval --dataset dataset
  2 | 
  3 | import argparse
  4 | import sys
  5 | import os
  6 | import subprocess
  7 | import operator
  8 | import re
  9 | 
 10 | from libs.constants import Datasets
 11 | from libs.utility.translate import de_bpe
 12 | 
 13 | 
 14 | def get_bleu(ref_file, hyp_file):
 15 |     pl_output = subprocess.Popen(
 16 |         'perl scripts/moses/multi-bleu.perl {} < {}\n'.format(ref_file, hyp_file), shell=True,
 17 |         stdout=subprocess.PIPE, stderr=open(os.devnull, 'w')).stdout.read()
 18 | 
 19 |     contents = pl_output.split(',')
 20 |     if len(contents) == 0:
 21 |         return 0.0
 22 |     var = contents[0].split(" = ")
 23 |     if len(var) <= 1:
 24 |         return 0.0
 25 |     BLEU = var[1]
 26 | 
 27 |     return float(BLEU)
 28 | 
 29 | 
 30 | TestDatasets = {'enfr_bpe'}
 31 | 
 32 | 
 33 | def main():
 34 |     parser = argparse.ArgumentParser()
 35 | 
 36 |     parser.add_argument('model_prefix', nargs='?', default='model/complete/enfr',
 37 |                         help='The prefix of nmt model path, default is "%(default)s"')
 38 |     parser.add_argument('--start', action="store", metavar="index", dest="start", type=int, default=1,
 39 |                         help='The starting index of saved model to test, default is %(default)s')
 40 |     parser.add_argument('--end', action="store", metavar="index", dest="end", type=int, default=10,
 41 |                         help='The ending index of saved model to test, default is %(default)s')
 42 |     parser.add_argument('--gap', action="store", metavar="index", dest="interval", type=int, default=10000,
 43 |                         help='The interval between two consecutive tested models\' indexes, default is %(default)s')
 44 |     parser.add_argument('--result', action='store', metavar='filename', dest='result_file', type=str,
 45 |                         default='trans_result.tsv', help='Target small train file, default is %(default)s')
 46 |     parser.add_argument('--beam', action="store", metavar="beam_size", dest="beam_size", type=int, default=4,
 47 |                         help='The beam size for translation, default is 4')
 48 |     parser.add_argument('--dataset', action='store', dest='dataset', default='en-fr_bpe',
 49 |                         help='Dataset, default is "%(default)s"')
 50 | 
 51 |     args = parser.parse_args()
 52 | 
 53 |     if args.result_file == 'trans_result.tsv':
 54 |         model_file_name = os.path.split(args.model_prefix)[-1]
 55 |         args.result_file = './translated/complete/{}_bs{}.txt'.format(os.path.splitext(model_file_name)[0],
 56 |                                                                       args.beam_size)
 57 |     else:
 58 |         model_file_name = os.path.split(args.result_file)[-1]
 59 | 
 60 |     print args
 61 | 
 62 |     bleus = {}
 63 |     train1, train2, small1, small2, dev1, dev2, dev3, test1, test2, dic1, dic2 = Datasets[args.dataset]
 64 | 
 65 |     for idx in xrange(args.start, args.end + 1):
 66 |         trans_model_file = '%s.iter%d.npz' % (os.path.splitext(args.model_prefix)[0], idx * args.interval)
 67 |         trans_result_file = '%s.iter%d.txt' % (os.path.splitext(args.result_file)[0], idx * args.interval)
 68 | 
 69 |         if not os.path.exists(trans_result_file):
 70 |             exec_str = 'python translate_single.py -b 32 -k {} -p 1 -n {} {} {} {} {}\n'.format(
 71 |                 args.beam_size, trans_model_file, './data/dic/{}'.format(dic1), './data/dic/{}'.format(dic2),
 72 |                 './data/test/{}'.format(test1), trans_result_file
 73 |             )
 74 |             print 'Translate model {} '.format(trans_model_file)
 75 |             print exec_str
 76 |             pl_output = subprocess.Popen(exec_str, shell=True, stdout=subprocess.PIPE).stdout.read()
 77 | 
 78 |         if 'tc' in args.dataset:  # first de-truecase, then de-bpe
 79 |             exec_str = 'perl scripts/moses/detruecase.perl < {} > {}.detc'.format(trans_result_file, trans_result_file)
 80 |             pl_output = subprocess.Popen(exec_str, shell=True, stdout=subprocess.PIPE).stdout.read()
 81 |             trans_result_file = '{}.detc'.format(trans_result_file)
 82 | 
 83 |         if 'bpe' in args.dataset:
 84 |             with open('{}.bpe'.format(trans_result_file), 'w') as fout:
 85 |                 fout.write(de_bpe(open(trans_result_file, 'r').read()))
 86 |             trans_result_file = '{}.bpe'.format(trans_result_file)
 87 | 
 88 |         bleus[idx] = get_bleu('./data/test/{}'.format(test2), trans_result_file)
 89 | 
 90 |         print 'model %s, bleu %.2f' % (idx * args.interval, bleus[idx])
 91 | 
 92 |     args.result_file = './translated/complete/{}_s{}_e{}.txt'.format(os.path.splitext(model_file_name)[0], args.start,
 93 |                                                                      args.end)
 94 |     bleu_array = sorted(bleus.items(), key=operator.itemgetter(0), reverse=False)
 95 |     with open(args.result_file, 'w') as fout:
 96 |         fout.write('\n'.join([str(idx) + '\t' + str(score) for (idx, score) in bleu_array]))
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     main()
101 | 


--------------------------------------------------------------------------------
/machine-translation/libs/config.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | __author__ = 'fyabc'
  5 | 
  6 | # Dict of default options (Copied from nmt.py)
  7 | DefaultOptions = dict(
  8 |     dim_word=100,  # word vector dimensionality
  9 |     dim=1000,  # the number of LSTM units
 10 | 
 11 |     # These 2 options are deprecated
 12 |     encoder='gru',
 13 |     decoder='gru_cond',
 14 | 
 15 |     n_words_src=30000,
 16 |     n_words=30000,
 17 |     patience=10,  # early stopping patience
 18 |     max_epochs=5000,
 19 |     finish_after=10000000,  # finish after this many updates
 20 |     dispFreq=100,
 21 |     decay_c=0.,  # L2 regularization penalty
 22 |     alpha_c=0.,  # alignment regularization
 23 |     clip_c=-1.,  # gradient clipping threshold
 24 |     lrate=1.,  # learning rate
 25 |     maxlen=100,  # maximum length of the description
 26 |     optimizer='rmsprop',
 27 |     batch_size=16,
 28 |     valid_batch_size=80,
 29 |     saveto='model.npz',
 30 |     saveFreq=1000,  # save the parameters after every saveFreq updates
 31 |     validFreq=10000,
 32 |     datasets=('/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok',
 33 |               '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok'),
 34 |     valid_datasets=('./data/dev/dev_en.tok',
 35 |                     './data/dev/dev_fr.tok'),
 36 |     small_train_datasets=('./data/train/small_en-fr.en',
 37 |                           './data/train/small_en-fr.fr'),
 38 |     picked_train_idxes_file=r'',
 39 | 
 40 |     # The dropout rate
 41 |     # If False, do not use dropout.
 42 |     # If float value, this is the dropout rate.
 43 |     use_dropout=False,
 44 |     reload_=False,
 45 |     overwrite=False,
 46 |     preload='',
 47 |     sort_by_len=False,
 48 | 
 49 |     # Options below are from v-yanfa
 50 |     convert_embedding=True,
 51 |     dump_before_train=False,
 52 |     plot_graph=None,
 53 |     vocab_filenames=('./data/dic/filtered_dic_en-fr.en.pkl',
 54 |                      './data/dic/filtered_dic_en-fr.fr.pkl'),
 55 |     map_filename='./data/dic/mapFullVocab2Top1MVocab.pkl',
 56 |     lr_discount_freq=80000,
 57 | 
 58 |     # Options of deeper encoder and decoder
 59 |     n_encoder_layers=1,
 60 |     n_decoder_layers=1,
 61 | 
 62 |     # The connection type:
 63 |     #     1. encoder_many_bidirectional = True (default):
 64 |     #         forward1 -> forward2 -> forward3 -> ...
 65 |     #         backward1 -> backward2 -> backward3 -> ...
 66 |     #     2. encoder_many_bidirectional = False:
 67 |     #         forward1 + backward1 -> forward2 -> forward3 -> ...
 68 |     encoder_many_bidirectional=True,
 69 | 
 70 |     # Attention at which decoder layer (default is 0)
 71 |     attention_layer_id=0,
 72 | 
 73 |     # Unit type, LSTM or GRU (Attention unit type = unit type + '_cond')
 74 |     # Add new unit types: multi_gru, multi_lstm
 75 |     unit='gru',
 76 | 
 77 |     # Residual connection type
 78 |     # Candidates:
 79 |     #   None:           not any residual connection
 80 |     #   "layer_wise":   connect to next layer
 81 |     #   "last":         all connect to the last layer (average)
 82 |     residual_enc=None,
 83 |     residual_dec=None,
 84 | 
 85 |     use_zigzag=False,
 86 | 
 87 |     # todo: implement it
 88 |     # Initializer type
 89 |     # Candidates:
 90 |     #   "orthogonal":   Current type
 91 |     #   "baidu":        Baidu initializer
 92 |     initializer='orthogonal',
 93 | 
 94 |     # Given embedding file
 95 |     given_embedding=None,
 96 | 
 97 |     # Options for sync distribution
 98 |     #Set it to none to run single GPU version. Other options include 'mv' and 'mpi_reduce'
 99 |     dist_type=None,
100 |     #The sync frequency. Will be automatically fixed to be 1 when syncing gradients
101 |     sync_batch=1,
102 |     #From start to dist_recover_lr iteration, linearly increase lr to normal lr, s.t. nan is avoided
103 |     dist_recover_lr = 10000,
104 |     #Whether to sync models (i.e., model average) or gradients (per batch)
105 |     sync_models = False,
106 | 
107 |     # Options for multi-gru/lstm
108 |     # Used only when unit is "multi_gru" or "multi_lstm"
109 | 
110 |     # Depth of common unit
111 |     unit_size=2,
112 | 
113 |     # Depth of cond unit
114 |     cond_unit_size=2,
115 | 
116 |     # Given immediate file of adadelta? Only effective in adadelta
117 |     given_imm=False,
118 |     # Dump immediate file of adadelta? Only effective in adadelta
119 |     # (Dump when save model)
120 |     dump_imm=False,
121 | 
122 |     # Shuffle data per epoch?
123 |     shuffle_data=False,
124 | 
125 |     # Attention at each layer of decoder?
126 |     decoder_all_attention=False,
127 | 
128 |     # Average context vector? default is False (use last context vector)
129 |     # Used only when decoder_all_attention is True.
130 |     average_context=False,
131 | 
132 |     #The file storing physical_gpu_id -> theano_id information. Per gpu infor, per line
133 |     gpu_map_file = None,
134 |     task='en-fr',
135 | 
136 |     # MPI options
137 |     dist_recover_lr_iter=False,
138 | 
139 |     # Fine-tune options
140 |     fine_tune_patience=8,
141 | 
142 |     # Target attention options
143 |     # Target attention layer id, default is None, means not use target attention.
144 |     trg_attention_layer_id=None,
145 | )
146 | 
147 | 
148 | # Dict of dual learning default options
149 | DualLearningDefaultOptions = {
150 | 
151 | }
152 | 


--------------------------------------------------------------------------------
/machine-translation/translate_single.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | import argparse
  5 | import os
  6 | import re
  7 | import cPickle as pkl
  8 | from pprint import pprint
  9 | 
 10 | import numpy as np
 11 | import theano
 12 | 
 13 | from libs.config import DefaultOptions
 14 | from libs.models import build_and_init_model
 15 | from libs.utility.translate import load_translate_data, seqs2words, translate, translate_block
 16 | 
 17 | __author__ = 'fyabc'
 18 | 
 19 | 
 20 | def translate_model_single(input_, model_name, options, k, normalize):
 21 |     from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 22 |     trng = RandomStreams(1234)
 23 |     use_noise = theano.shared(np.float32(0.))
 24 | 
 25 |     model, _ = build_and_init_model(model_name, options=options, build=False)
 26 | 
 27 |     # word index
 28 |     f_init, f_next = model.build_sampler(trng=trng, use_noise=use_noise)
 29 | 
 30 |     return translate(input_, model, f_init, f_next, trng, k, normalize)
 31 | 
 32 | 
 33 | def main(model, dictionary, dictionary_target, source_file, saveto, k=5,
 34 |          normalize=False, chr_level=False, batch_size=-1, args=None):
 35 |     batch_mode = batch_size > 0
 36 | 
 37 |     # load model model_options
 38 |     option_file = '%s.pkl' % model
 39 |     if not os.path.exists(option_file):
 40 |         m = re.search("iter(\d+)\.npz", model)
 41 |         if m:
 42 |             uidx = int(m.group((1)))
 43 |             option_file = '%s.iter%d.npz.pkl' % (os.path.splitext(model)[0], uidx)
 44 |     assert os.path.exists(option_file)
 45 | 
 46 |     with open(option_file, 'rb') as f:
 47 |         options = DefaultOptions.copy()
 48 |         options.update(pkl.load(f))
 49 |         if 'fix_dp_bug' not in options:
 50 |             options['fix_dp_bug'] = False
 51 |         print 'Options:'
 52 |         pprint(options)
 53 | 
 54 |     from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 55 |     trng = RandomStreams(1234)
 56 |     use_noise = theano.shared(np.float32(0.))
 57 | 
 58 |     model_type = 'NMTModel'
 59 |     if args.trg_attention:
 60 |         model_type = 'TrgAttnNMTModel'
 61 | 
 62 |     model, _ = build_and_init_model(model, options=options, build=False, model_type=model_type)
 63 | 
 64 |     f_init, f_next = model.build_sampler(trng=trng, use_noise=use_noise, batch_mode=batch_mode, dropout=options['use_dropout'])
 65 | 
 66 |     if not batch_mode:
 67 |         word_dict, word_idict, word_idict_trg, input_ = load_translate_data(
 68 |             dictionary, dictionary_target, source_file,
 69 |             batch_mode=False, chr_level=chr_level, options=options,
 70 |         )
 71 | 
 72 |         print 'Translating ', source_file, '...'
 73 |         trans = seqs2words(
 74 |             translate(input_, model, f_init, f_next, trng, k, normalize),
 75 |             word_idict_trg,
 76 |         )
 77 |     else:
 78 |         word_dict, word_idict, word_idict_trg, all_src_blocks, m_block = load_translate_data(
 79 |             dictionary, dictionary_target, source_file,
 80 |             batch_mode=True, chr_level=chr_level, n_words_src=options['n_words_src'],batch_size= batch_size,
 81 |         )
 82 | 
 83 |         print 'Translating ', source_file, '...'
 84 |         all_sample = []
 85 |         for bidx, seqs in enumerate(all_src_blocks):
 86 |             all_sample.extend(translate_block(seqs, model, f_init, f_next, trng, k))
 87 |             print bidx, '/', m_block, 'Done'
 88 | 
 89 |         trans = seqs2words(all_sample, word_idict_trg)
 90 | 
 91 |     with open(saveto, 'w') as f:
 92 |         print >> f, '\n'.join(trans)
 93 |     print 'Done'
 94 | 
 95 | 
 96 | if __name__ == "__main__":
 97 |     parser = argparse.ArgumentParser(
 98 |         description='Translate the source language test file to target language with given model (single thread)')
 99 |     parser.add_argument('-k', type=int, default=4,
100 |                         help='Beam size (?), default to 4, can also use 12')
101 |     parser.add_argument('-p', type=int, default=5,
102 |                         help='Number of parallel processes, default to 5')
103 |     parser.add_argument('-n', action="store_true", default=False,
104 |                         help='Use normalize, default to False, set to True')
105 |     parser.add_argument('-c', action="store_true", default=False,
106 |                         help='Char level model, default to False, set to True')
107 |     parser.add_argument('-b', type=int, default=-1,
108 |                         help='Batch size, default to -1, means not to use batch mode')
109 |     parser.add_argument('model', type=str, help='The model path')
110 |     parser.add_argument('dictionary_source', type=str, help='The source dict path')
111 |     parser.add_argument('dictionary_target', type=str, help='The target dict path')
112 |     parser.add_argument('source', type=str, help='The source input path')
113 |     parser.add_argument('saveto', type=str, help='The translated file output path')
114 |     parser.add_argument('--trg_att', action='store_true', dest='trg_attention', default=False,
115 |                         help='Use target attention, default is False, set to True')
116 | 
117 |     args = parser.parse_args()
118 | 
119 |     main(args.model, args.dictionary_source, args.dictionary_target, args.source,
120 |          args.saveto, k=args.k, normalize=args.n,
121 |          chr_level=args.c, batch_size=args.b, args=args)
122 | 


--------------------------------------------------------------------------------
/machine-translation/scripts/moses/multi-bleu.perl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | #
  3 | # This file is part of moses.  Its use is licensed under the GNU Lesser General
  4 | # Public License version 2.1 or, at your option, any later version.
  5 | 
  6 | # $Id$
  7 | use warnings;
  8 | use strict;
  9 | 
 10 | my $lowercase = 0;
 11 | if ($ARGV[0] eq "-lc") {
 12 |   $lowercase = 1;
 13 |   shift;
 14 | }
 15 | 
 16 | my $stem = $ARGV[0];
 17 | if (!defined $stem) {
 18 |   print STDERR "usage: multi-bleu.pl [-lc] reference < hypothesis\n";
 19 |   print STDERR "Reads the references from reference or reference0, reference1, ...\n";
 20 |   exit(1);
 21 | }
 22 | 
 23 | $stem .= ".ref" if !-e $stem && !-e $stem."0" && -e $stem.".ref0";
 24 | 
 25 | my @REF;
 26 | my $ref=0;
 27 | while(-e "$stem$ref") {
 28 |     &add_to_ref("$stem$ref",\@REF);
 29 |     $ref++;
 30 | }
 31 | &add_to_ref($stem,\@REF) if -e $stem;
 32 | die("ERROR: could not find reference file $stem") unless scalar @REF;
 33 | 
 34 | sub add_to_ref {
 35 |     my ($file,$REF) = @_;
 36 |     my $s=0;
 37 |     open(REF,$file) or die "Can't read $file";
 38 |     while(<REF>) {
 39 | 	chop;
 40 | 	push @{$$REF[$s++]}, $_;
 41 |     }
 42 |     close(REF);
 43 | }
 44 | 
 45 | my(@CORRECT,@TOTAL,$length_translation,$length_reference);
 46 | my $s=0;
 47 | while(<STDIN>) {
 48 |     chop;
 49 |     $_ = lc if $lowercase;
 50 |     my @WORD = split;
 51 |     my %REF_NGRAM = ();
 52 |     my $length_translation_this_sentence = scalar(@WORD);
 53 |     my ($closest_diff,$closest_length) = (9999,9999);
 54 |     foreach my $reference (@{$REF[$s]}) {
 55 |       #print "$s $_ <=> $reference\n";
 56 |   $reference = lc($reference) if $lowercase;
 57 | 	my @WORD = split(' ',$reference);
 58 | 	my $length = scalar(@WORD);
 59 |         my $diff = abs($length_translation_this_sentence-$length);
 60 | 	if ($diff < $closest_diff) {
 61 | 	    $closest_diff = $diff;
 62 | 	    $closest_length = $length;
 63 | 	    # print STDERR "$s: closest diff ".abs($length_translation_this_sentence-$length)." = abs($length_translation_this_sentence-$length), setting len: $closest_length\n";
 64 | 	} elsif ($diff == $closest_diff) {
 65 |             $closest_length = $length if $length < $closest_length;
 66 |             # from two references with the same closeness to me
 67 |             # take the *shorter* into account, not the "first" one.
 68 |         }
 69 | 	for(my $n=1;$n<=4;$n++) {
 70 | 	    my %REF_NGRAM_N = ();
 71 | 	    for(my $start=0;$start<=$#WORD-($n-1);$start++) {
 72 | 		my $ngram = "$n";
 73 | 		for(my $w=0;$w<$n;$w++) {
 74 | 		    $ngram .= " ".$WORD[$start+$w];
 75 | 		}
 76 | 		$REF_NGRAM_N{$ngram}++;
 77 | 	    }
 78 | 	    foreach my $ngram (keys %REF_NGRAM_N) {
 79 | 		if (!defined($REF_NGRAM{$ngram}) ||
 80 | 		    $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
 81 | 		    $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
 82 | #	    print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}<BR>\n";
 83 | 		}
 84 | 	    }
 85 | 	}
 86 |     }
 87 |     $length_translation += $length_translation_this_sentence;
 88 |     $length_reference += $closest_length;
 89 |     for(my $n=1;$n<=4;$n++) {
 90 | 	my %T_NGRAM = ();
 91 | 	for(my $start=0;$start<=$#WORD-($n-1);$start++) {
 92 | 	    my $ngram = "$n";
 93 | 	    for(my $w=0;$w<$n;$w++) {
 94 | 		$ngram .= " ".$WORD[$start+$w];
 95 | 	    }
 96 | 	    $T_NGRAM{$ngram}++;
 97 | 	}
 98 | 	foreach my $ngram (keys %T_NGRAM) {
 99 | 	    $ngram =~ /^(\d+) /;
100 | 	    my $n = $1;
101 |             # my $corr = 0;
102 | #	print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
103 | 	    $TOTAL[$n] += $T_NGRAM{$ngram};
104 | 	    if (defined($REF_NGRAM{$ngram})) {
105 | 		if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
106 | 		    $CORRECT[$n] += $T_NGRAM{$ngram};
107 |                     # $corr =  $T_NGRAM{$ngram};
108 | #	    print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
109 | 		}
110 | 		else {
111 | 		    $CORRECT[$n] += $REF_NGRAM{$ngram};
112 |                     # $corr =  $REF_NGRAM{$ngram};
113 | #	    print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
114 | 		}
115 | 	    }
116 |             # $REF_NGRAM{$ngram} = 0 if !defined $REF_NGRAM{$ngram};
117 |             # print STDERR "$ngram: {$s, $REF_NGRAM{$ngram}, $T_NGRAM{$ngram}, $corr}\n"
118 | 	}
119 |     }
120 |     $s++;
121 | }
122 | my $brevity_penalty = 1;
123 | my $bleu = 0;
124 | 
125 | my @bleu=();
126 | 
127 | for(my $n=1;$n<=4;$n++) {
128 |   if (defined ($TOTAL[$n])){
129 |     $bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0;
130 |     # print STDERR "CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\n";
131 |   }else{
132 |     $bleu[$n]=0;
133 |   }
134 | }
135 | 
136 | if ($length_reference==0){
137 |   printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
138 |   exit(1);
139 | }
140 | 
141 | if ($length_translation<$length_reference) {
142 |   $brevity_penalty = exp(1-$length_reference/$length_translation);
143 | }
144 | $bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
145 | 				my_log( $bleu[2] ) +
146 | 				my_log( $bleu[3] ) +
147 | 				my_log( $bleu[4] ) ) / 4) ;
148 | printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\n",
149 |     100*$bleu,
150 |     100*$bleu[1],
151 |     100*$bleu[2],
152 |     100*$bleu[3],
153 |     100*$bleu[4],
154 |     $brevity_penalty,
155 |     $length_translation / $length_reference,
156 |     $length_translation,
157 |     $length_reference;
158 | 
159 | sub my_log {
160 |   return -9999999999 unless $_[0];
161 |   return log($_[0]);
162 | }
163 | 


--------------------------------------------------------------------------------
/machine-translation/translate.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Translates a source file using a translation model.
  3 | """
  4 | 
  5 | import argparse
  6 | import cPickle as pkl
  7 | from multiprocessing import Process, Queue
  8 | from pprint import pprint
  9 | 
 10 | import numpy as np
 11 | import theano
 12 | 
 13 | from libs.config import DefaultOptions
 14 | from libs.models.model import NMTModel
 15 | from libs.utility.translate import seqs2words, load_translate_data
 16 | from libs.utility.utils import load_params
 17 | 
 18 | 
 19 | def translate_model(queue, rqueue, pid, model_name, options, k, normalize):
 20 |     from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 21 |     trng = RandomStreams(1234)
 22 |     use_noise = theano.shared(np.float32(0.))
 23 | 
 24 |     model = NMTModel(options)
 25 | 
 26 |     # allocate model parameters
 27 |     params = model.initializer.init_params()
 28 |     # load model parameters and set theano shared variables
 29 |     params = load_params(model_name, params)
 30 |     model.init_tparams(params)
 31 | 
 32 |     # word index
 33 |     f_init, f_next = model.build_sampler(trng=trng, use_noise=use_noise, dropout = options['use_dropout'])
 34 | 
 35 |     def _translate(seq):
 36 |         # sample given an input sequence and obtain scores
 37 |         sample, score = model.gen_sample(
 38 |             f_init, f_next,
 39 |             np.array(seq).reshape([len(seq), 1]),
 40 |             trng=trng, k=k, maxlen=200,
 41 |             stochastic=False, argmax=False,
 42 |         )
 43 | 
 44 |         # normalize scores according to sequence lengths
 45 |         if normalize:
 46 |             lengths = np.array([len(s) for s in sample])
 47 |             score = score / lengths
 48 |         sidx = np.argmin(score)
 49 |         return sample[sidx]
 50 | 
 51 |     while True:
 52 |         req = queue.get()
 53 |         if req is None:
 54 |             break
 55 | 
 56 |         idx, x = req[0], req[1]
 57 |         print pid, '-', idx
 58 |         seq = _translate(x)
 59 | 
 60 |         rqueue.put((idx, seq))
 61 | 
 62 |     return
 63 | 
 64 | 
 65 | def main(model, dictionary, dictionary_target, source_file, saveto, k=5,
 66 |          normalize=False, n_process=5, chr_level=False):
 67 |     # load model model_options
 68 |     with open('%s.pkl' % model, 'rb') as f:
 69 |         options = DefaultOptions.copy()
 70 |         options.update(pkl.load(f))
 71 | 
 72 |         print 'Options:'
 73 |         pprint(options)
 74 | 
 75 |     word_dict, word_idict, word_idict_trg = load_translate_data(
 76 |         dictionary, dictionary_target, source_file,
 77 |         batch_mode=False, chr_level=chr_level, load_input=False,
 78 |         echo=False,
 79 |     )
 80 | 
 81 |     # create input and output queues for processes
 82 |     queue = Queue()
 83 |     rqueue = Queue()
 84 |     processes = [None] * n_process
 85 |     for midx in xrange(n_process):
 86 |         processes[midx] = Process(
 87 |             target=translate_model,
 88 |             args=(queue, rqueue, midx, model, options, k, normalize))
 89 |         processes[midx].start()
 90 | 
 91 |     # utility function
 92 |     def _send_jobs(fname):
 93 |         with open(fname, 'r') as f:
 94 |             for idx, line in enumerate(f):
 95 |                 if chr_level:
 96 |                     words = list(line.decode('utf-8').strip())
 97 |                 else:
 98 |                     words = line.strip().split()
 99 |                 x = map(lambda w: word_dict[w] if w in word_dict else 1, words)
100 |                 x = map(lambda ii: ii if ii < options['n_words_src'] else 1, x)
101 |                 x += [0]
102 |                 queue.put((idx, x))
103 |         return idx + 1
104 | 
105 |     def _finish_processes():
106 |         for midx in xrange(n_process):
107 |             queue.put(None)
108 | 
109 |     def _retrieve_jobs(n_samples):
110 |         trans = [None] * n_samples
111 |         for idx in xrange(n_samples):
112 |             resp = rqueue.get()
113 |             trans[resp[0]] = resp[1]
114 |             if np.mod(idx, 10) == 0:
115 |                 print 'Sample ', (idx + 1), '/', n_samples, ' Done'
116 |         return trans
117 | 
118 |     print 'Translating ', source_file, '...'
119 |     n_samples = _send_jobs(source_file)
120 |     trans = seqs2words(_retrieve_jobs(n_samples), word_idict_trg)
121 |     _finish_processes()
122 |     with open(saveto, 'w') as f:
123 |         print >> f, '\n'.join(trans)
124 |     print 'Done'
125 | 
126 | 
127 | if __name__ == "__main__":
128 |     parser = argparse.ArgumentParser(
129 |         description='Translate the source language test file to target language with given model')
130 |     parser.add_argument('-k', type=int, default=4,
131 |                         help='Beam size (?), default to 4, can also use 12')
132 |     parser.add_argument('-p', type=int, default=5,
133 |                         help='Number of parallel processes, default to 5')
134 |     parser.add_argument('-n', action="store_true", default=False,
135 |                         help='Use normalize, default to False, set to True')
136 |     parser.add_argument('-c', action="store_true", default=False,
137 |                         help='Char level model, default to False, set to True')
138 |     parser.add_argument('model', type=str, help='The model path')
139 |     parser.add_argument('dictionary_source', type=str, help='The source dict path')
140 |     parser.add_argument('dictionary_target', type=str, help='The target dict path')
141 |     parser.add_argument('source', type=str, help='The source input path')
142 |     parser.add_argument('saveto', type=str, help='The translated file output path')
143 | 
144 |     args = parser.parse_args()
145 | 
146 |     main(args.model, args.dictionary_source, args.dictionary_target, args.source,
147 |          args.saveto, k=args.k, normalize=args.n, n_process=args.p,
148 |          chr_level=args.c)
149 | 


--------------------------------------------------------------------------------
/machine-translation/replace_unk.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | import argparse
  5 | import cPickle as pkl
  6 | 
  7 | import numpy as np
  8 | import theano
  9 | 
 10 | from libs.constants import Datasets
 11 | from libs.models import build_and_init_model
 12 | from libs.utility.translate import get_bleu, de_bpe
 13 | from libs.utility.utils import prepare_data
 14 | 
 15 | __author__ = 'fyabc'
 16 | 
 17 | 
 18 | def _load_data(args, dic1, dic2, test1):
 19 |     with open('{}.pkl'.format(args.model), 'rb') as f:
 20 |         options = pkl.load(f)
 21 | 
 22 |     # load source dictionary
 23 |     with open(dic1, 'rb') as f:
 24 |         word_dict_raw = pkl.load(f)
 25 |     word_dict = {k: v for k, v in word_dict_raw.iteritems() if v < options['n_words_src']}
 26 | 
 27 |     # load target dictionary and invert
 28 |     with open(dic2, 'rb') as f:
 29 |         word_dict_tgt_raw = pkl.load(f)
 30 |     word_dict_tgt = {k: v for k, v in word_dict_tgt_raw.iteritems() if v < options['n_words']}
 31 | 
 32 |     if args.nbest == 1:
 33 |         with open(args.translated_file, 'r') as f:
 34 |             trans_sents_str = [s.strip().split() for s in f]
 35 |     else:
 36 |         with open(args.translated_file, 'r') as f:
 37 |             trans_sents_str = [
 38 |                 (s.strip().split('|||')[1]).strip().split()
 39 |                 for idx, s in enumerate(f)
 40 |                 if idx % args.nbest == 0
 41 |             ]
 42 | 
 43 |     trans_sents_num = [[word_dict_tgt.get(w, 1) for w in s] for s in trans_sents_str]
 44 | 
 45 |     with open(test1, 'r') as f:
 46 |         src_sents_str = [s.strip().split() for s in f]
 47 |     src_sents_num = [[word_dict.get(w, 1) for w in s] for s in src_sents_str]
 48 | 
 49 |     with open(args.table, 'rb') as f:
 50 |         src_tgt_table = pkl.load(f)
 51 | 
 52 |     return options, src_sents_num, trans_sents_num, src_sents_str, trans_sents_str, src_tgt_table
 53 | 
 54 | 
 55 | def replace_unk(args, seq_source, seq_trans, src_sents, trans_sents, src_tgt_table):
 56 |     print 'Load and build models...',
 57 |     model, _, (trng, use_noise, x, x_mask, y, y_mask, opt_ret, cost, context_mean) = \
 58 |         build_and_init_model(args.model, build=True)
 59 |     f_get_attention = theano.function([x, x_mask, y, y_mask], opt_ret['dec_alphas'])
 60 |     print 'Done'
 61 | 
 62 |     print 'Start to calculate the scores...'
 63 | 
 64 |     current_id = 0
 65 |     batch_size = 80
 66 | 
 67 |     while True:
 68 |         block_x = seq_source[current_id * batch_size: (current_id + 1) * batch_size]
 69 |         block_y = seq_trans[current_id * batch_size: (current_id + 1) * batch_size]
 70 |         block_x_str = src_sents[current_id * batch_size: (current_id + 1) * batch_size]
 71 |         block_y_str = trans_sents[current_id * batch_size: (current_id + 1) * batch_size]
 72 | 
 73 |         if len(block_x) == 0:
 74 |             break
 75 | 
 76 |         x, x_mask, y, y_mask = prepare_data(block_x, block_y)
 77 |         attn_score_ = f_get_attention(x, x_mask, y, y_mask)
 78 |         srcWordsByAttn = attn_score_.argmax(axis=2)
 79 |         for idx, (sentx, senty, strx, stry) in enumerate(zip(block_x, block_y, block_x_str, block_y_str)):
 80 |             attn_mapping = srcWordsByAttn[:, idx]
 81 |             unk_pos = np.where(np.array(senty, dtype='int64') == 1)
 82 |             badder = 0
 83 |             end_pos = -1
 84 |             for ii in unk_pos[0].tolist():
 85 |                 srcidx = attn_mapping[ii]
 86 |                 if srcidx < len(sentx):
 87 |                     trans_sents[idx + current_id * batch_size][ii] = src_tgt_table.get(strx[srcidx], strx[srcidx])
 88 |                 else:
 89 |                     badder += 1
 90 |                     if badder == 1:
 91 |                         end_pos = ii
 92 |                     if badder > 1:
 93 |                         trans_sents[idx + current_id * batch_size] = \
 94 |                             trans_sents[idx + current_id * batch_size][:end_pos]
 95 |                         break
 96 | 
 97 |         print 'Minibatch', current_id, ' Done'
 98 |         current_id += 1
 99 | 
100 | 
101 | def main():
102 |     parser = argparse.ArgumentParser(description='Replace UNK in the translated file, and get BLEU.')
103 |     parser.add_argument('model', help='The model path')
104 |     parser.add_argument('translated_file', help='The translated file with UNK')
105 |     parser.add_argument('table', nargs='?', default='./data/dic/fastAlign_en2fr.pkl',
106 |                         help='Source-Target table path, default is %(default)s')
107 |     parser.add_argument('--dataset', action='store', dest='dataset', default='en-fr_bpe',
108 |                         help='Dataset, default is "%(default)s"')
109 |     parser.add_argument('--nbest', action="store", metavar="N", dest="nbest", type=int, default=1,
110 |                         help='number of best, default is %(default)s')
111 |     parser.add_argument('-B', action='store_false', default=True, dest='bleu',
112 |                         help='Get BLEU, default is True, set to False')
113 |     parser.add_argument('-d', '--dump', action='store_true', default=False, dest='dump',
114 |                         help='Dump translated file without UNK, default is False, set to True')
115 | 
116 |     args = parser.parse_args()
117 | 
118 |     print 'model: {}, translated file: {}'.format(args.model, args.translated_file)
119 | 
120 |     train1, train2, small1, small2, valid1, valid2, test1, test2, dic1, dic2 = Datasets[args.dataset]
121 | 
122 |     options, src_sents_num, trans_sents_num, src_sents_str, trans_sents_str, src_tgt_table = _load_data(
123 |         args, './data/dic/{}'.format(dic1), './data/dic/{}'.format(dic2), './data/test/{}'.format(test1),
124 |     )
125 | 
126 |     replace_unk(args, src_sents_num, trans_sents_num, src_sents_str, trans_sents_str, src_tgt_table)
127 | 
128 |     translated_string = '\n'.join(' '.join(w for w in s) for s in trans_sents_str) + '\n'
129 | 
130 |     postfix = '.nounk'
131 | 
132 |     if 'bpe' in args.dataset:
133 |         translated_string = de_bpe(translated_string)
134 |         postfix = '.bpe' + postfix
135 | 
136 |     if args.dump:
137 |         with open('{}{}'.format(args.translated_file, postfix), 'w') as f:
138 |             print >>f, translated_string,
139 | 
140 |     if args.bleu:
141 |         bleu = get_bleu(
142 |             './data/test/{}'.format(test2),
143 |             translated_string,
144 |             type_in='string',
145 |         )
146 | 
147 |         print 'BLEU: {:.2f}'.format(bleu)
148 | 
149 | 
150 | if __name__ == '__main__':
151 |     main()
152 | 


--------------------------------------------------------------------------------
/machine-translation/libs/constants.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | __author__ = 'fyabc'
  7 | 
  8 | profile = False
  9 | fX = 'float32'
 10 | 
 11 | ImmediateFilenameBackup = '{}_imm.pkl'
 12 | ImmediateFilenameBackup2 = '{}_imm.pkl.gz'
 13 | ImmediateFilename = '{}_imm.npz'
 14 | TempImmediateFilename = '{}_imm_tmp.npz'
 15 | 
 16 | # Cycle of shuffle data.
 17 | ShuffleCycle = 7
 18 | 
 19 | # Set datasets
 20 | # train1, train2, small1, small2, valid1, valid2(postprocessed, e.g., bpe and truecase), valid3(original), test1, test2, dic1, dic2
 21 | Datasets = {
 22 |     'en-fr': [
 23 |         'filtered_en-fr.en', 'filtered_en-fr.fr',
 24 |         'small_en-fr.en', 'small_en-fr.fr',
 25 |         'dev_en.tok', 'dev_fr.tok', 'dev_fr.tok',
 26 |         'test_en-fr.en.tok','test_en-fr.fr.tok',
 27 |         'filtered_dic_en-fr.en.pkl', 'filtered_dic_en-fr.fr.pkl',
 28 |     ],
 29 |     'en-fr_tc': [
 30 |         'tc_filtered_en-fr.en', 'tc_filtered_en-fr.fr',
 31 |         'small_tc_en-fr.en', 'small_tc_en-fr.fr',
 32 |         'tc_dev_en.tok', 'tc_dev_fr.tok', 'dev_fr.tok',
 33 |         '', '',
 34 |         'tc_filtered_en-fr.en.pkl', 'tc_filtered_en-fr.fr.pkl',
 35 |     ],
 36 | 
 37 |     'en-fr_bpe': [
 38 |         'en-fr_en.tok.bpe.32000', 'en-fr_fr.tok.bpe.32000',
 39 |         'small_en-fr_en.tok.bpe.32000', 'small_en-fr_fr.tok.bpe.32000',
 40 |         'dev_en-fr_en.tok.bpe.32000', 'dev_en-fr_fr.tok.bpe.32000', 'dev_fr.tok',
 41 |         'test_en-fr.en.tok.bpe.32000', 'test_en-fr.fr.tok',
 42 |         'en-fr_vocab.bpe.32000.pkl', 'en-fr_vocab.bpe.32000.pkl',
 43 |     ],
 44 |     'en-fr_bpe_tc': [
 45 |         'tc_en-fr_en.tok.bpe.32000', 'tc_en-fr_fr.tok.bpe.32000',
 46 |         'tc_test_en-fr.en.tok.bpe.32000', 'tc_test_en-fr.fr.tok.bpe.32000',
 47 |         'tc_dev_en-fr_en.tok.bpe.32000', 'tc_dev_en-fr_fr.tok.bpe.32000','dev_fr.tok',
 48 |         'tc_test_en-fr.en.tok.bpe.32000', 'test_en-fr.fr.tok',
 49 |         'tc_en-fr_vocab.bpe.32000.pkl', 'tc_en-fr_vocab.bpe.32000.pkl',
 50 |     ],
 51 | 
 52 |     'large_en-fr_bpe_tc': [
 53 |         'tc_train_enfr_large_bpe.en', 'tc_train_enfr_large_bpe.fr',
 54 |         'tc_small_train_enfr_large_bpe.en', 'tc_small_train_enfr_large_bpe.fr',
 55 |         'tc_valid_enfr_bpe_by_large.en', 'tc_valid_enfr_bpe_by_large.fr', 'dev_fr.tok',
 56 |         'tc_test_enfr_bpe_by_large.en', 'test_en-fr.fr.tok',
 57 |         'tc_enfr_large_bpe.vocab.pkl', 'tc_enfr_large_bpe.vocab.pkl',
 58 |     ],
 59 | 
 60 |     'large_fr-en_bpe_tc': [
 61 |         'tc_train_enfr_large_bpe.fr', 'tc_train_enfr_large_bpe.en',
 62 |         'tc_small_train_enfr_large_bpe.fr', 'tc_small_train_enfr_large_bpe.en',
 63 |         'tc_valid_enfr_bpe_by_large.fr', 'tc_valid_enfr_bpe_by_large.en', 'dev_en.tok',
 64 |         'tc_test_enfr_bpe_by_large.fr', 'test_en-fr.en.tok',
 65 |         'tc_enfr_large_bpe.vocab.pkl', 'tc_enfr_large_bpe.vocab.pkl',
 66 |     ],
 67 | 
 68 |     'fr-en_bpe_tc': [
 69 |         'tc_en-fr_fr.tok.bpe.32000','tc_en-fr_en.tok.bpe.32000',
 70 |         'tc_small_en-fr_fr.tok.bpe.32000', 'tc_small_en-fr_en.tok.bpe.32000',
 71 |         'tc_dev_en-fr_fr.tok.bpe.32000', 'tc_dev_en-fr_en.tok.bpe.32000', 'dev_en.tok',
 72 |         'tc_test_en-fr.fr.tok.bpe.32000', 'test_en-fr.en.tok',
 73 |         'tc_en-fr_vocab.bpe.32000.pkl', 'tc_en-fr_vocab.bpe.32000.pkl',
 74 |     ],
 75 | 
 76 | 
 77 |     'en-de': [
 78 |         'en-de.en_0', 'en-de.de_0',
 79 |         'small_en-de.en_0', 'small_en-de.de_0',
 80 |         'dev_en.tok', 'dev_de.tok', '',
 81 |         '','',
 82 |         'en-de.en.pkl', 'en-de.de.pkl',
 83 |     ],
 84 |     'en-de_tc': [
 85 |         '', '',
 86 |         '', '',
 87 |         '', '', '',
 88 |         '', '',
 89 |         '', '',
 90 |     ],
 91 | 
 92 |     'en-de_bpe': [
 93 |         'train.tok.clean.bpe.32000.en', 'train.tok.clean.bpe.32000.de',
 94 |         'newstest2014.tok.bpe.32000.en', 'newstest2014.tok.bpe.32000.de',
 95 |         'newstest2013.tok.bpe.32000.en', 'newstest2013.tok.bpe.32000.de', '',
 96 |         'newstest2014.tok.bpe.32000.en', 'test_en-de.de.tok',
 97 |         'vocab.bpe.32000.pkl', 'vocab.bpe.32000.pkl',
 98 |     ],
 99 | 
100 |     'en-de-s2s_bpe_tc': [
101 |         'tc_train.tok.clean.bpe.32000.en', 'tc_train.tok.clean.bpe.32000.de',
102 |         'tc_small_train.tok.clean.bpe.32000.en', 'tc_small_train.tok.clean.bpe.32000.de',
103 |         'tc_newstest2013.tok.bpe.32000.en', 'tc_newstest2013.tok.bpe.32000.de', '',
104 |         'tc_newstest2014.tok.bpe.32000.en', 'test_en-de.de.tok',
105 |         'ende_s2s_vocab.bpe.32000.pkl', 'ende_s2s_vocab.bpe.32000.pkl',
106 |     ],
107 | 
108 |     'de-en':[
109 |        'train.de-en.de', 'train.de-en.en',
110 |         'small_train.de-en.de','small_train.de-en.en',
111 |         'dev.de-en.de', 'dev.de-en.en','dev.de-en.en',
112 |         'test.de-en.de','test.de-en.en',
113 |         'de-en_vocab.de.pkl','de-en_vocab.en.pkl',
114 |     ],
115 | 
116 |     'en-de_small':[
117 |        'train.de-en.en', 'train.de-en.de',
118 |        'small_train.de-en.en', 'small_train.de-en.de',
119 |        'dev.de-en.en', 'dev.de-en.de', 'dev.de-en.de',
120 |        'test.de-en.en', 'test.de-en.de',
121 |        'de-en_vocab.en.pkl', 'de-en_vocab.de.pkl',
122 |     ],
123 | 
124 |     'de-en_bpe':[
125 |        'train.de-en.bpe.25000.de', 'train.de-en.bpe.25000.en',
126 |         'test.de-en.bpe.25000.de', 'test.de-en.bpe.25000.en',
127 |         'dev.de-en.bpe.25000.de', 'dev.de-en.bpe.25000.en','dev.de-en.en',
128 |         'test.de-en.bpe.25000.de', 'test.de-en.en',
129 |         'de-en_vocab.bpe.25000.pkl', 'de-en_vocab.bpe.25000.pkl',
130 |     ],
131 | 
132 |     'en-de_small_bpe' :[
133 |        'train.de-en.bpe.25000.en', 'train.de-en.bpe.25000.de',
134 |        '', '',
135 |        'dev.de-en.bpe.25000.en', 'dev.de-en.bpe.25000.de', 'dev.de-en.de',
136 |        'test.de-en.bpe.25000.en', 'test.de-en.de',
137 |        'de-en_vocab.bpe.25000.pkl', 'de-en_vocab.bpe.25000.pkl',
138 |     ],
139 | 
140 |     'zh-en': [
141 |         'zh-en.1.25M.zh', 'zh-en.1.25M.en',
142 |         'small_zh-en.1.25M.zh', 'small_zh-en.1.25M.en',
143 |         'Nist2003.chs.word.max50.snt', 'Nist2003.enu.word.max50.snt', '',
144 |         '','',
145 |         'zh-en.1.25M.zh.pkl', 'zh-en.1.25M.en.pkl',
146 |     ],
147 |     'zh-en_tc': [
148 |         'tc_zh-en.1.25M.zh', 'tc_zh-en.1.25M.en',
149 |         'small_tc_zh-en.1.25M.zh', 'small_tc_zh-en.1.25M.en',
150 |         'tc_Nist2003.chs.word.max50.snt', 'tc_Nist2003.enu.word.max50.snt', '',
151 |         '', '',
152 |         'tc_zh-en.1.25M.zh.pkl', 'tc_zh-en.1.25M.en.pkl',
153 |     ],
154 | }
155 | 


--------------------------------------------------------------------------------
/language-modeling/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | 
  5 | from embed_regularize import embedded_dropout
  6 | from locked_dropout import LockedDropout
  7 | from weight_drop import WeightDrop
  8 | from utils import message
  9 | import g2_lstm
 10 | 
 11 | class RNNModel(nn.Module):
 12 |     """Container module with an encoder, a recurrent module, and a decoder."""
 13 | 
 14 |     def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers,
 15 |                  dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0,
 16 |                  gumbel_noise_p=0.0, gumbel_noise_t=1.0, gumbel_noise_type='new_U_B',
 17 |                  divide_temp=None,
 18 |                  tie_weights=False):
 19 |         super(RNNModel, self).__init__()
 20 |         self.lockdrop = LockedDropout()
 21 |         self.idrop = nn.Dropout(dropouti)
 22 |         self.hdrop = nn.Dropout(dropouth)
 23 |         self.drop = nn.Dropout(dropout)
 24 |         self.encoder = nn.Embedding(ntoken, ninp)
 25 |         assert rnn_type in ['LSTM', 'QRNN', 'GRU', 'G2LSTM'], 'RNN type is not supported'
 26 |         if rnn_type == 'LSTM':
 27 |             self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)]
 28 |             if wdrop:
 29 |                 self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
 30 |         elif rnn_type == 'GRU':
 31 |             self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)]
 32 |             if wdrop:
 33 |                 self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
 34 |         elif rnn_type == 'QRNN':
 35 |             from torchqrnn import QRNNLayer
 36 |             self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)]
 37 |             for rnn in self.rnns:
 38 |                 rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
 39 |         elif rnn_type == 'G2LSTM':
 40 |             self.rnns = [g2_lstm.LSTM(g2_lstm.LSTMCell,
 41 |                                       ninp if l == 0 else nhid,
 42 |                                       nhid if l != nlayers - 1 else (ninp if tie_weights else nhid),
 43 |                                       1,
 44 |                                       dropout=0,
 45 |                                       wdrop=wdrop,
 46 |                                       gumbel_noise_p=gumbel_noise_p,
 47 |                                       gumbel_noise_t=gumbel_noise_t,
 48 |                                       gumbel_noise_type=gumbel_noise_type,
 49 |                                       divide_temp=divide_temp) for l in range(nlayers)]
 50 | 
 51 |         message(repr(self.rnns))
 52 |         self.rnns = torch.nn.ModuleList(self.rnns)
 53 |         self.decoder = nn.Linear(nhid, ntoken)
 54 | 
 55 |         # Optionally tie weights as in:
 56 |         # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
 57 |         # https://arxiv.org/abs/1608.05859
 58 |         # and
 59 |         # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
 60 |         # https://arxiv.org/abs/1611.01462
 61 |         if tie_weights:
 62 |             #if nhid != ninp:
 63 |             #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
 64 |             self.decoder.weight = self.encoder.weight
 65 | 
 66 |         self.init_weights()
 67 | 
 68 |         self.rnn_type = rnn_type
 69 |         self.ninp = ninp
 70 |         self.nhid = nhid
 71 |         self.nlayers = nlayers
 72 |         self.dropout = dropout
 73 |         self.dropouti = dropouti
 74 |         self.dropouth = dropouth
 75 |         self.dropoute = dropoute
 76 |         self.tie_weights = tie_weights
 77 | 
 78 |     def reset(self):
 79 |         if self.rnn_type == 'QRNN': [r.reset() for r in self.rnns]
 80 | 
 81 |     def reset_gumbel_noise(self, gumbel_noise_p=0.0, gumbel_noise_t=1.0,
 82 |                            gumbel_noise_type='new_U_B'):
 83 |         for l, rnn in enumerate(self.rnns):
 84 |             rnn.reset_gumbel_noise(gumbel_noise_p, gumbel_noise_t,
 85 |                                    gumbel_noise_type)
 86 | 
 87 |     def init_weights(self):
 88 |         initrange = 0.1
 89 |         self.encoder.weight.data.uniform_(-initrange, initrange)
 90 |         self.decoder.bias.data.fill_(0)
 91 |         self.decoder.weight.data.uniform_(-initrange, initrange)
 92 | 
 93 |     def forward(self, input, hidden, return_h=False):
 94 |         emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0)
 95 |         #emb = self.idrop(emb)
 96 | 
 97 |         emb = self.lockdrop(emb, self.dropouti)
 98 | 
 99 |         raw_output = emb
100 |         new_hidden = []
101 |         #raw_output, hidden = self.rnn(emb, hidden)
102 |         raw_outputs = []
103 |         outputs = []
104 |         for l, rnn in enumerate(self.rnns):
105 |             current_input = raw_output
106 |             raw_output, new_h = rnn(raw_output, hidden[l])
107 |             new_hidden.append(new_h)
108 |             raw_outputs.append(raw_output)
109 |             if l != self.nlayers - 1:
110 |                 #self.hdrop(raw_output)
111 |                 raw_output = self.lockdrop(raw_output, self.dropouth)
112 |                 outputs.append(raw_output)
113 |         hidden = new_hidden
114 | 
115 |         output = self.lockdrop(raw_output, self.dropout)
116 |         outputs.append(output)
117 | 
118 |         decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
119 |         result = decoded.view(output.size(0), output.size(1), decoded.size(1))
120 |         if return_h:
121 |             return result, hidden, raw_outputs, outputs
122 |         return result, hidden
123 | 
124 |     def init_hidden(self, bsz):
125 |         weight = next(self.parameters()).data
126 |         if self.rnn_type == 'LSTM':
127 |             return [(Variable(weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()),
128 |                     Variable(weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()))
129 |                     for l in range(self.nlayers)]
130 |         elif self.rnn_type == 'QRNN' or self.rnn_type == 'GRU':
131 |             return [Variable(weight.new(1, bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_())
132 |                     for l in range(self.nlayers)]
133 |         elif self.rnn_type == 'G2LSTM':
134 |             return [[(Variable(weight.new(bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()),
135 |                      Variable(weight.new(bsz, self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid)).zero_()))]
136 |                      for l in range(self.nlayers)]
137 | 


--------------------------------------------------------------------------------
/machine-translation/libs/multiverso_/tables.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | import ctypes
  5 | 
  6 | import numpy as np
  7 | 
  8 | from utils import Loader, convert_data
  9 | import api
 10 | 
 11 | mv_lib = Loader.get_lib()
 12 | 
 13 | 
 14 | class TableHandler(object):
 15 |     """`TableHandler` is an interface to sync different kinds of values.
 16 | 
 17 |     If you are not writing python code based on theano or lasagne, you are
 18 |     supposed to sync models (for initialization) and gradients (during
 19 |     training) so as to let multiverso help you manage the models in distributed
 20 |     environments.
 21 |     Otherwise, you'd better use the classes in `multiverso.theano_ext` or
 22 |     `multiverso.theano_ext.lasagne_ext`
 23 |     """
 24 | 
 25 |     def __init__(self, size, init_value=None):
 26 |         raise NotImplementedError("You must implement the __init__ method.")
 27 | 
 28 |     def get(self, size):
 29 |         raise NotImplementedError("You must implement the get method.")
 30 | 
 31 |     def add(self, data, sync=False):
 32 |         raise NotImplementedError("You must implement the add method.")
 33 | 
 34 | 
 35 | # types
 36 | C_FLOAT_P = ctypes.POINTER(ctypes.c_float)
 37 | 
 38 | 
 39 | class ArrayTableHandler(TableHandler):
 40 |     """`ArrayTableHandler` is used to sync array-like (one-dimensional) value."""
 41 | 
 42 |     def __init__(self, size, init_value=None):
 43 |         """Constructor for syncing array-like (one-dimensional) value.
 44 | 
 45 |         The `size` should be a int equal to the size of value we want to sync.
 46 |         If init_value is None, zeros will be used to initialize the tables,
 47 |         otherwise the table will be initialized as the init_value.
 48 |         Notice: if the init_value is different in different processes, the
 49 |         average of them will be used.
 50 |         """
 51 |         self._handler = ctypes.c_void_p()
 52 |         self._size = size
 53 |         mv_lib.MV_NewArrayTable(size, ctypes.byref(self._handler))
 54 |         if init_value is not None:
 55 |             init_value = convert_data(init_value)
 56 |             # sync add is used because we want to make sure that the initial
 57 |             # value has taken effect when the call returns.
 58 |             self.add(init_value / api.workers_num(), sync=True)
 59 | 
 60 |     def get(self):
 61 |         """get the latest value from multiverso ArrayTable
 62 | 
 63 |         Data type of return value is numpy.ndarray with one-dimensional
 64 |         """
 65 |         data = np.zeros((self._size,), dtype=np.dtype("float32"))
 66 |         mv_lib.MV_GetArrayTable(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
 67 |         return data
 68 | 
 69 |     def add(self, data, sync=False):
 70 |         """add the data to the multiverso ArrayTable
 71 | 
 72 |         Data type of `data` is numpy.ndarray with one-dimensional
 73 | 
 74 |         If sync is True, this call will blocked by IO until the call finish.
 75 |         Otherwise it will return immediately
 76 |         """
 77 |         data = convert_data(data)
 78 |         assert (data.size == self._size)
 79 |         if sync:
 80 |             mv_lib.MV_AddArrayTable(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
 81 |         else:
 82 |             mv_lib.MV_AddAsyncArrayTable(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
 83 | 
 84 | 
 85 | class MatrixTableHandler(TableHandler):
 86 |     def __init__(self, num_row, num_col, init_value=None):
 87 |         """Constructor for syncing matrix-like (two-dimensional) value.
 88 | 
 89 |         The `num_row` should be the number of rows and the `num_col` should be
 90 |         the number of columns.
 91 | 
 92 |         If init_value is None, zeros will be used to initialize the tables,
 93 |         otherwise the table will be initialized as the init_value.
 94 |         Notice: if the init_value is different in different processes, the
 95 |         average of them will be used.
 96 |         """
 97 |         self._handler = ctypes.c_void_p()
 98 |         self._num_row = num_row
 99 |         self._num_col = num_col
100 |         self._size = num_col * num_row
101 |         mv_lib.MV_NewMatrixTable(num_row, num_col, ctypes.byref(self._handler))
102 |         if init_value is not None:
103 |             init_value = convert_data(init_value)
104 |             # sync add is used because we want to make sure that the initial
105 |             # value has taken effect when the call returns.
106 |             self.add(init_value / api.workers_num(), sync=True)
107 | 
108 |     def get(self, row_ids=None):
109 |         """get the latest value from multiverso MatrixTable
110 | 
111 |         If row_ids is None, we will return all rows as numpy.narray , e.g.
112 |         array([[1, 3], [3, 4]]).
113 |         Otherwise we will return the data according to the row_ids(e.g. you can
114 |         pass [1] to row_ids to get only the first row, it will return a
115 |         two-dimensional numpy.ndarray with one row)
116 | 
117 |         Data type of return value is numpy.ndarray with two-dimensional
118 |         """
119 |         if row_ids is None:
120 |             data = np.zeros((self._num_row, self._num_col), dtype=np.dtype("float32"))
121 |             mv_lib.MV_GetMatrixTableAll(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
122 |             return data
123 |         else:
124 |             row_ids_n = len(row_ids)
125 |             int_array_type = ctypes.c_int * row_ids_n
126 |             data = np.zeros((row_ids_n, self._num_col), dtype=np.dtype("float32"))
127 |             mv_lib.MV_GetMatrixTableByRows(self._handler, data.ctypes.data_as(C_FLOAT_P),
128 |                                            row_ids_n * self._num_col,
129 |                                            int_array_type(*row_ids), row_ids_n)
130 |             return data
131 | 
132 |     def add(self, data=None, row_ids=None, sync=False):
133 |         """add the data to the multiverso MatrixTable
134 | 
135 |         If row_ids is None, we will add all data, and the data
136 |         should be a list, e.g. [1, 2, 3, ...]
137 | 
138 |         Otherwise we will add the data according to the row_ids
139 | 
140 |         Data type of `data` is numpy.ndarray with two-dimensional
141 | 
142 |         If sync is True, this call will blocked by IO until the call finish.
143 |         Otherwise it will return immediately
144 |         """
145 |         assert (data is not None)
146 |         data = convert_data(data)
147 | 
148 |         if row_ids is None:
149 |             assert (data.size == self._size)
150 |             if sync:
151 |                 mv_lib.MV_AddMatrixTableAll(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
152 |             else:
153 |                 mv_lib.MV_AddAsyncMatrixTableAll(self._handler, data.ctypes.data_as(C_FLOAT_P), self._size)
154 |         else:
155 |             row_ids_n = len(row_ids)
156 |             assert (data.size == row_ids_n * self._num_col)
157 |             int_array_type = ctypes.c_int * row_ids_n
158 |             if sync:
159 |                 mv_lib.MV_AddMatrixTableByRows(self._handler, data.ctypes.data_as(C_FLOAT_P),
160 |                                                row_ids_n * self._num_col,
161 |                                                int_array_type(*row_ids), row_ids_n)
162 |             else:
163 |                 mv_lib.MV_AddAsyncMatrixTableByRows(self._handler, data.ctypes.data_as(C_FLOAT_P),
164 |                                                     row_ids_n * self._num_col,
165 |                                                     int_array_type(*row_ids), row_ids_n)
166 | 
167 | 
168 | __all__ = [
169 |     'ArrayTableHandler',
170 |     'MatrixTableHandler',
171 | ]
172 | 


--------------------------------------------------------------------------------
/language-modeling/pointer.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | import math
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | from torch.autograd import Variable
  8 | 
  9 | import data
 10 | import model
 11 | 
 12 | from utils import batchify, get_batch, repackage_hidden
 13 | 
 14 | parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model')
 15 | parser.add_argument('--data', type=str, default='data/penn',
 16 |                     help='location of the data corpus')
 17 | parser.add_argument('--model', type=str, default='LSTM',
 18 |                     help='type of recurrent net (LSTM, QRNN)')
 19 | parser.add_argument('--save', type=str,default='best.pt',
 20 |                     help='model to use the pointer over')
 21 | parser.add_argument('--cuda', action='store_false',
 22 |                     help='use CUDA')
 23 | parser.add_argument('--bptt', type=int, default=5000,
 24 |                     help='sequence length')
 25 | parser.add_argument('--window', type=int, default=3785,
 26 |                     help='pointer window length')
 27 | parser.add_argument('--theta', type=float, default=0.6625523432485668,
 28 |                     help='mix between uniform distribution and pointer softmax distribution over previous words')
 29 | parser.add_argument('--lambdasm', type=float, default=0.12785920428335693,
 30 |                     help='linear mix between only pointer (1) and only vocab (0) distribution')
 31 | parser.add_argument('--compression', type=str,  default='',
 32 |                     help='compress the model, (svd, precision, clip)')
 33 | parser.add_argument('--compression-k', type=float,  default=None,
 34 |                     help='compression parameter')
 35 | args = parser.parse_args()
 36 | 
 37 | ###############################################################################
 38 | # Load data
 39 | ###############################################################################
 40 | 
 41 | corpus = data.Corpus(args.data)
 42 | 
 43 | eval_batch_size = 1
 44 | test_batch_size = 1
 45 | #train_data = batchify(corpus.train, args.batch_size)
 46 | val_data = batchify(corpus.valid, test_batch_size, args)
 47 | test_data = batchify(corpus.test, test_batch_size, args)
 48 | 
 49 | ###############################################################################
 50 | # Build the model
 51 | ###############################################################################
 52 | 
 53 | ntokens = len(corpus.dictionary)
 54 | criterion = nn.CrossEntropyLoss()
 55 | 
 56 | def one_hot(idx, size, cuda=True):
 57 |     a = np.zeros((1, size), np.float32)
 58 |     a[0][idx] = 1
 59 |     v = Variable(torch.from_numpy(a))
 60 |     if cuda: v = v.cuda()
 61 |     return v
 62 | 
 63 | def evaluate(data_source, batch_size=10, window=args.window):
 64 |     # Turn on evaluation mode which disables dropout.
 65 |     if args.model == 'QRNN': model.reset()
 66 |     model.eval()
 67 |     total_loss = 0
 68 |     ntokens = len(corpus.dictionary)
 69 |     hidden = model.init_hidden(batch_size)
 70 |     next_word_history = None
 71 |     pointer_history = None
 72 |     for i in range(0, data_source.size(0) - 1, args.bptt):
 73 |         if i > 0: print(i, len(data_source), math.exp(total_loss / i))
 74 |         data, targets = get_batch(data_source, i, evaluation=True, args=args)
 75 |         words = data.data.cpu().numpy()[:, 0]
 76 |         words = [corpus.dictionary.idx2word[word] for word in words]
 77 |         print(words)
 78 |         output, hidden, rnn_outs, _ = model(data, hidden, return_h=True)
 79 |         rnn_out = rnn_outs[-1].squeeze()
 80 |         output_flat = output.view(-1, ntokens)
 81 |         ###
 82 |         # Fill pointer history
 83 |         start_idx = len(next_word_history) if next_word_history is not None else 0
 84 |         next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])])
 85 |         #print(next_word_history)
 86 |         pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0)
 87 |         #print(pointer_history)
 88 |         ###
 89 |         # Built-in cross entropy
 90 |         # total_loss += len(data) * criterion(output_flat, targets).data[0]
 91 |         ###
 92 |         # Manual cross entropy
 93 |         # softmax_output_flat = torch.nn.functional.softmax(output_flat)
 94 |         # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1))
 95 |         # entropy = -torch.log(soft)
 96 |         # total_loss += len(data) * entropy.mean().data[0]
 97 |         ###
 98 |         # Pointer manual cross entropy
 99 |         loss = 0
100 |         softmax_output_flat = torch.nn.functional.softmax(output_flat)
101 |         for idx, vocab_loss in enumerate(softmax_output_flat):
102 |             p = vocab_loss
103 |             if start_idx + idx > window:
104 |                 valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx]
105 |                 valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx]
106 |                 logits = torch.mv(valid_pointer_history, rnn_out[idx])
107 |                 theta = args.theta
108 |                 ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1)
109 |                 ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze()
110 |                 lambdah = args.lambdasm
111 |                 p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss
112 |             ###
113 |             target_loss = p[targets[idx].data]
114 |             loss += (-torch.log(target_loss)).data[0]
115 |         total_loss += loss / batch_size
116 |         ###
117 |         hidden = repackage_hidden(hidden)
118 |         next_word_history = next_word_history[-window:]
119 |         pointer_history = pointer_history[-window:]
120 |     return total_loss / len(data_source)
121 | 
122 | # Load the best saved model.
123 | with open(args.save, 'rb') as f:
124 |     if not args.cuda:
125 |         model = torch.load(f, map_location=lambda storage, loc: storage)
126 |     else:
127 |         model = torch.load(f)
128 | print(model)
129 | 
130 | def compression(x):
131 |     if args.compression == 'clip':
132 |         x.clamp_(-2 * args.compression_k, 2 * args.compression_k)
133 |         x.mul_(1.0 / args.compression_k).round_().mul_(args.compression_k)
134 |     if args.compression == 'precision' :
135 |         x.mul_(1.0 / args.compression_k).round_().mul_(args.compression_k)
136 |     if args.compression == 'svd':
137 |         u, s, v = torch.svd(x)
138 |         r = int(args.compression_k)
139 |         x_low_rank = torch.mm((u * s).narrow(1, 0, r),
140 |                               v.narrow(1, 0, r).transpose_(0, 1))
141 |         x.copy_(x_low_rank)
142 | 
143 | for x in model.rnns:
144 |     weight_hh = getattr(x, 'cell_0').weight_hh.data
145 |     hidden_size = weight_hh.size()[1] // 4
146 |     print('hidden_size =', hidden_size)
147 |     compression(weight_hh.narrow(1, 0, hidden_size))
148 |     compression(weight_hh.narrow(1, hidden_size, hidden_size))
149 |     print('weight_hh', weight_hh)
150 |     weight_ih = getattr(x, 'cell_0').weight_ih.data
151 |     hidden_size = weight_ih.size()[1] // 4
152 |     compression(weight_ih.narrow(1, 0, hidden_size))
153 |     compression(weight_ih.narrow(1, hidden_size, hidden_size))
154 |     print('weight_ih', weight_ih)
155 | 
156 | # Run on val data.
157 | val_loss = evaluate(val_data, test_batch_size)
158 | print('=' * 89)
159 | print('| End of pointer | val loss {:5.2f} | val ppl {:8.2f}'.format(
160 |     val_loss, math.exp(val_loss)))
161 | print('=' * 89)
162 | 
163 | # Run on test data.
164 | test_loss = evaluate(test_data, test_batch_size)
165 | print('=' * 89)
166 | print('| End of pointer | test loss {:5.2f} | test ppl {:8.2f}'.format(
167 |     test_loss, math.exp(test_loss)))
168 | print('=' * 89)
169 | 


--------------------------------------------------------------------------------
/machine-translation/libs/utility/optimizers.py:
--------------------------------------------------------------------------------
  1 | import theano
  2 | import theano.tensor as tensor
  3 | import numpy
  4 | 
  5 | from .utils import itemlist
  6 | 
  7 | profile = False
  8 | 
  9 | 
 10 | # todo: change optimizer function to optimizer class, move save/load immediate data here.
 11 | 
 12 | class Optimizer(object):
 13 |     def __init__(self, lr, tparams, grads, inputs, cost, **kwargs):
 14 |         g2 = kwargs.pop('g2', None)
 15 |         self.given_imm_data = kwargs.pop('given_imm_data', None)
 16 | 
 17 |         self.imm_shared = None
 18 |         self.grad_shared = None
 19 |         self.f_grad_shared = None
 20 |         self.f_update = None
 21 | 
 22 |         if g2 is None:
 23 |             self.outputs = cost
 24 |         else:
 25 |             self.outputs = [cost, g2]
 26 | 
 27 |     def load_immediate_data(self, given_imm):
 28 |         pass
 29 | 
 30 |     def dump_immediate_data(self):
 31 |         pass
 32 | 
 33 | 
 34 | class AdamOptimizer(Optimizer):
 35 |     def __init__(self, lr, tparams, grads, inputs, cost, **kwargs):
 36 |         super(AdamOptimizer, self).__init__(lr, tparams, grads, inputs, cost, **kwargs)
 37 | 
 38 |         beta1 = kwargs.pop('beta1', 0.9)
 39 |         beta2 = kwargs.pop('beta2', 0.999)
 40 |         e = kwargs.pop('e', 1e-8)
 41 | 
 42 |         self.grad_shared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
 43 |                             for k, p in tparams.iteritems()]
 44 |         gs_up = [(gs, g) for gs, g in zip(self.grad_shared, grads)]
 45 | 
 46 |         self.f_grad_shared = theano.function(inputs, self.outputs, updates=gs_up, profile=profile)
 47 | 
 48 |         updates = []
 49 | 
 50 |         ms = []
 51 |         vs = []
 52 | 
 53 |         if self.given_imm_data is not None:
 54 |             t_prev = theano.shared(self.given_imm_data[0])
 55 |         else:
 56 |             t_prev = theano.shared(numpy.float32(0.))
 57 |         t = t_prev + 1.
 58 |         lr_t = lr * tensor.sqrt(1. - beta2 ** t) / (1. - beta1 ** t)
 59 | 
 60 |         # todo
 61 | 
 62 | 
 63 | # optimizers
 64 | # name(hyperp, tparams, grads, inputs (list), cost) = f_grad_shared, f_update
 65 | def adam(lr, tparams, grads, inp, cost, beta1=0.9, beta2=0.999, e=1e-8, **kwargs):
 66 |     g2 = kwargs.pop('g2', None)
 67 |     given_imm_data = kwargs.pop('given_imm_data', None)
 68 |     dump_imm = kwargs.pop('dump_imm', False)
 69 |     all_stochastic_updates = kwargs.pop('all_stochastic_updates', None)
 70 | 
 71 |     if g2 is None:
 72 |         outputs = cost
 73 |     else:
 74 |         outputs = [cost, g2]
 75 | 
 76 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
 77 |                for k, p in tparams.iteritems()]
 78 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
 79 | 
 80 |     f_grad_shared = theano.function(inp, outputs, updates=gsup + all_stochastic_updates, profile=profile)
 81 | 
 82 |     updates = []
 83 | 
 84 |     ms = []
 85 |     vs = []
 86 | 
 87 |     if given_imm_data is not None:
 88 |         t_prev = theano.shared(given_imm_data[0])
 89 |     else:
 90 |         t_prev = theano.shared(numpy.float32(0.))
 91 |     t = t_prev + 1.
 92 |     lr_t = lr * tensor.sqrt(1. - beta2 ** t) / (1. - beta1 ** t)
 93 | 
 94 |     for i, (p, g) in enumerate(zip(tparams.values(), gshared)):
 95 |         if given_imm_data is not None:
 96 |             m = theano.shared(given_imm_data[1][i], p.name + '_mean')
 97 |             v = theano.shared(given_imm_data[2][i], p.name + '_variance')
 98 |         else:
 99 |             m = theano.shared(p.get_value() * 0., p.name + '_mean')
100 |             v = theano.shared(p.get_value() * 0., p.name + '_variance')
101 | 
102 |         ms.append(m)
103 |         vs.append(v)
104 | 
105 |         m_t = beta1 * m + (1. - beta1) * g
106 |         v_t = beta2 * v + (1. - beta2) * g ** 2
107 |         step = lr_t * m_t / (tensor.sqrt(v_t) + e)
108 |         p_t = p - step
109 |         updates.append((m, m_t))
110 |         updates.append((v, v_t))
111 |         updates.append((p, p_t))
112 |     updates.append((t_prev, t))
113 | 
114 |     f_update = theano.function([lr], [], updates=updates,
115 |                                on_unused_input='ignore', profile=profile, )
116 | 
117 |     return f_grad_shared, f_update, gshared, [t_prev, ms, vs]
118 | 
119 | 
120 | def adadelta(lr, tparams, grads, inp, cost, **kwargs):
121 |     g2 = kwargs.pop('g2', None)
122 |     given_imm_data = kwargs.pop('given_imm_data', None)
123 |     alpha = kwargs.pop('alpha', 0.95)
124 |     all_stochastic_updates = kwargs.pop('all_stochastic_updates', None)
125 | 
126 |     if g2 is None:
127 |         outputs = cost
128 |     else:
129 |         outputs =[cost, g2]
130 | 
131 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
132 |                                   name='%s_grad' % k)
133 |                     for k, p in tparams.iteritems()]
134 | 
135 |     if given_imm_data is not None:
136 |         running_up2 = [theano.shared(value, name='%s_rup2' % k)
137 |                        for k, value in zip(tparams.iterkeys(), given_imm_data[0])]
138 |         running_grads2 = [theano.shared(value, '%s_rgrad2' % k)
139 |                           for k, value in zip(tparams.iterkeys(), given_imm_data[1])]
140 |     else:
141 |         running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rup2' % k)
142 |                        for k, p in tparams.iteritems()]
143 |         running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2' % k)
144 |                           for k, p in tparams.iteritems()]
145 | 
146 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
147 | 
148 |     f_grad_shared = theano.function(inp, outputs, updates=zgup + all_stochastic_updates,
149 |                                     profile=profile)
150 | 
151 |     rg2up = [(rg2, alpha * rg2 + (1 - alpha) * (g ** 2))
152 |              for rg2, g in zip(running_grads2, zipped_grads)]
153 | 
154 |     updir = [-tensor.sqrt(ru2 + 1e-7) / tensor.sqrt(rg2 + 1e-7) * zg
155 |              for zg, ru2, rg2 in zip(zipped_grads, running_up2,
156 |                                      running_grads2)]
157 |     ru2up = [(ru2, alpha * ru2 + (1 - alpha) * (ud ** 2))
158 |              for ru2, ud in zip(running_up2, updir)]
159 |     param_up = [(p, p + lr * ud) for p, ud in zip(itemlist(tparams), updir)]
160 | 
161 |     f_update = theano.function([lr], [], updates=rg2up + ru2up + param_up,
162 |                                on_unused_input='ignore', profile=profile)
163 | 
164 |     return f_grad_shared, f_update, zipped_grads, [running_up2, running_grads2]
165 | 
166 | def rmsprop(lr, tparams, grads, inp, cost, **kwargs):
167 |     g2 = kwargs.pop('g2', None)
168 |     if g2 is None:
169 |         outputs = cost
170 |     else:
171 |         outputs = [cost, g2]
172 | 
173 |     zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.),
174 |                                   name='%s_grad' % k)
175 |                     for k, p in tparams.iteritems()]
176 |     running_grads = [theano.shared(p.get_value() * numpy.float32(0.),
177 |                                    name='%s_rgrad' % k)
178 |                      for k, p in tparams.iteritems()]
179 |     running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.),
180 |                                     name='%s_rgrad2' % k)
181 |                       for k, p in tparams.iteritems()]
182 | 
183 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
184 |     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
185 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
186 |              for rg2, g in zip(running_grads2, grads)]
187 | 
188 |     f_grad_shared = theano.function(inp, outputs, updates=zgup + rgup + rg2up,
189 |                                     profile=profile)
190 | 
191 |     updir = [theano.shared(p.get_value() * numpy.float32(0.),
192 |                            name='%s_updir' % k)
193 |              for k, p in tparams.iteritems()]
194 |     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
195 |                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
196 |                                             running_grads2)]
197 |     param_up = [(p, p + udn[1])
198 |                 for p, udn in zip(itemlist(tparams), updir_new)]
199 |     f_update = theano.function([lr], [], updates=updir_new + param_up,
200 |                                on_unused_input='ignore', profile=profile)
201 | 
202 |     return f_grad_shared, f_update, zipped_grads, None
203 | 
204 | 
205 | def sgd(lr, tparams, grads, inp, cost, **kwargs):
206 |     g2 = kwargs.pop('g2', None)
207 |     if g2 is None:
208 |         outputs = cost
209 |     else:
210 |         outputs = [cost, g2]
211 | 
212 |     gshared = [theano.shared(p.get_value() * 0.,
213 |                              name='%s_grad' % k)
214 |                for k, p in tparams.iteritems()]
215 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
216 | 
217 |     f_grad_shared = theano.function(inp, outputs, updates=gsup,
218 |                                     profile=profile)
219 | 
220 |     pup = [(p, p - lr * g) for p, g in zip(itemlist(tparams), gshared)]
221 |     f_update = theano.function([lr], [], updates=pup, profile=profile)
222 | 
223 |     return f_grad_shared, f_update, gshared, None
224 | 
225 | 
226 | Optimizers = {
227 |     'adadelta': adadelta,
228 |     'adam': adam,
229 |     'rmsprop': rmsprop,
230 |     'sgd': sgd,
231 | }
232 | 
233 | __all__ = [
234 |     'adadelta',
235 |     'adam',
236 |     'rmsprop',
237 |     'sgd',
238 |     'Optimizers',
239 |     'Optimizers_Set_Imm',
240 | ]
241 | 


--------------------------------------------------------------------------------
/machine-translation/libs/utility/translate.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | import os
  7 | import re
  8 | import cPickle as pkl
  9 | import numpy as np
 10 | import subprocess
 11 | 
 12 | from .utils import prepare_data_x
 13 | 
 14 | __author__ = 'fyabc'
 15 | 
 16 | 
 17 | def translate(input_, model, f_init, f_next, trng, k, normalize):
 18 |     def _trans(seq):
 19 |         # sample given an input sequence and obtain scores
 20 |         sample, score = model.gen_sample(
 21 |             f_init, f_next,
 22 |             np.array(seq).reshape([len(seq), 1]),
 23 |             trng=trng, k=k, maxlen=200,
 24 |             stochastic=False, argmax=False,
 25 |         )
 26 | 
 27 |         # normalize scores according to sequence lengths
 28 |         if normalize:
 29 |             lengths = np.array([len(s) for s in sample])
 30 |             score = score / lengths
 31 |         sidx = np.argmin(score)
 32 |         return sample[sidx]
 33 | 
 34 |     output = []
 35 | 
 36 |     for idx, x in enumerate(input_):
 37 |         output.append(_trans(x))
 38 | 
 39 |     return output
 40 | 
 41 | 
 42 | def translate_block(input_, model, f_init, f_next, trng, k, alpha):
 43 |     """Translate for batch sampler.
 44 | 
 45 |     :return output: a list of word indices
 46 |     """
 47 |     x, x_mask = prepare_data_x(input_, maxlen=None, pad_eos=True, pad_sos=False)
 48 | 
 49 |     batch_sample, batch_sample_score = model.gen_batch_sample(
 50 |         f_init, f_next, x, x_mask, trng,
 51 |         k=k, maxlen=200, eos_id=0,
 52 |     )
 53 |     assert len(batch_sample) == len(batch_sample_score)
 54 | 
 55 |     output = []
 56 | 
 57 |     for sample, sample_score in zip(batch_sample, batch_sample_score):
 58 |         score = sample_score / np.power(np.array([len(s) for s in sample], dtype= np.float32), alpha)
 59 |         # sidx = np.argsort(score)
 60 |         # output.append([sample[ii] for ii in sidx])
 61 |         output.append(sample[np.argmin(score)])
 62 | 
 63 |     return output
 64 | 
 65 | 
 66 | def load_translate_data(dictionary, dictionary_target, source_file, batch_mode=False, **kwargs):
 67 |     chr_level = kwargs.pop('chr_level', False)
 68 |     unk_id = kwargs.pop('unk_id', 1)
 69 |     n_words_src = kwargs.pop('n_words_src', 30000)
 70 |     echo = kwargs.pop('echo', True)
 71 |     load_input = kwargs.pop('load_input', True)
 72 | 
 73 |     # load source dictionary and invert
 74 |     if echo:
 75 |         print('Load and invert source dictionary...', end='')
 76 |     with open(dictionary, 'rb') as f:
 77 |         word_dict = pkl.load(f)
 78 |     word_idict = {v: k for k, v in word_dict.iteritems()}
 79 |     word_idict[0] = '<eos>'
 80 |     word_idict[unk_id] = 'UNK'
 81 |     if echo:
 82 |         print('Done')
 83 | 
 84 |     # load target dictionary and invert
 85 |     if echo:
 86 |         print('Load and invert target dictionary...', end='')
 87 |     with open(dictionary_target, 'rb') as f:
 88 |         word_dict_trg = pkl.load(f)
 89 |     word_idict_trg = {v: k for k, v in word_dict_trg.iteritems()}
 90 |     word_idict_trg[0] = '<eos>'
 91 |     word_idict_trg[unk_id] = 'UNK'
 92 |     if echo:
 93 |         print('Done')
 94 | 
 95 |     if not load_input:
 96 |         return word_dict, word_idict, word_idict_trg
 97 | 
 98 |     if not batch_mode:
 99 |         input_ = []
100 | 
101 |         if echo:
102 |             print('Loading input...', end='')
103 | 
104 |         with open(source_file, 'r') as f:
105 |             for idx, line in enumerate(f):
106 |                 if chr_level:
107 |                     words = list(line.decode('utf-8').strip())
108 |                 else:
109 |                     words = line.strip().split()
110 | 
111 |                 x = [word_dict[w] if w in word_dict else unk_id for w in words]
112 |                 x = [ii if ii < n_words_src else unk_id for ii in x]
113 |                 x.append(0)
114 | 
115 |                 input_.append(x)
116 |         if echo:
117 |             print('Done')
118 | 
119 |         return word_dict, word_idict, word_idict_trg, input_
120 |     else:
121 |         batch_size = kwargs.pop('batch_size', 128)
122 | 
123 |         with open(source_file, 'r') as f:
124 |             all_src_sent = [line.strip().split() for line in f]
125 | 
126 |         all_src_num = []
127 |         for seg in all_src_sent:
128 |             tmp = [word_dict.get(w, unk_id) for w in seg]
129 |             all_src_num.append([w if w < n_words_src else unk_id for w in tmp])
130 | 
131 |         all_src_blocks = []
132 |         m_block = (len(all_src_num) + batch_size - 1) // batch_size
133 | 
134 |         for idx in xrange(m_block):
135 |             all_src_blocks.append(all_src_num[batch_size * idx: batch_size * (idx + 1)])
136 | 
137 |         return word_dict, word_idict, word_idict_trg, all_src_blocks, m_block
138 | 
139 | 
140 | def seqs2words(caps, word_idict_trg):
141 |     """Sequences -> Sentences
142 | 
143 |     :param caps: a list of word indices
144 |     :param word_idict_trg: inverted target word dict
145 |     :return: a list of sentences
146 |     """
147 | 
148 |     capsw = []
149 |     for cc in caps:
150 |         ww = []
151 |         for w in cc:
152 |             if w == 0:
153 |                 break
154 |             ww.append(word_idict_trg[w])
155 |         capsw.append(' '.join(ww))
156 |     return capsw
157 | 
158 | 
159 | def _translate_whole(model, f_init, f_next, trng, dictionary, dictionary_target, source_file,
160 |                      k=5, alpha=1.0, normalize=False, chr_level=False, **kwargs):
161 |     n_words_src = kwargs.pop('n_words_src', model.O['n_words_src'])
162 |     batch_mode = kwargs.pop('batch_mode', False)
163 | 
164 |     # Translate file
165 |     if not batch_mode:
166 |         word_dict, word_idict, word_idict_trg, input_ = load_translate_data(
167 |             dictionary, dictionary_target, source_file,
168 |             batch_mode=batch_mode, chr_level=chr_level, n_words_src=n_words_src,
169 |             echo=False,
170 |         )
171 | 
172 |         trans = seqs2words(
173 |             translate(input_, model, f_init, f_next, trng, k, normalize),
174 |             word_idict_trg,
175 |         )
176 | 
177 |         return '\n'.join(trans) + '\n'
178 |     else:
179 |         word_dict, word_idict, word_idict_trg, all_src_blocks, m_block = load_translate_data(
180 |             dictionary, dictionary_target, source_file,
181 |             batch_mode=batch_mode, chr_level=chr_level, n_words_src=n_words_src,
182 |             echo=False, batch_size=128,
183 |         )
184 | 
185 |         all_sample = []
186 |         for bidx, seqs in enumerate(all_src_blocks):
187 |             all_sample.extend(translate_block(seqs, model, f_init, f_next, trng, k, alpha))
188 |             #print(bidx, '/', m_block, 'Done')
189 | 
190 |         trans = seqs2words(all_sample, word_idict_trg)
191 | 
192 |         return '\n'.join(trans) + '\n'
193 | 
194 | 
195 | def get_bleu(ref_file, hyp_in=None, type_in='filename'):
196 |     """Get BLEU score, it will call script 'multi-bleu.perl'.
197 | 
198 |     :param ref_file: standard test filename of target language.
199 |     :param hyp_in: input from _translate_whole script.
200 |     :param type_in: input type, default is 'filename', can be 'filename' or 'string'.
201 |     :return:
202 |     """
203 |     if type_in == 'filename':
204 |         pl_process = subprocess.Popen(
205 |             'perl scripts/moses/multi-bleu.perl {} < {}\n'.format(ref_file, hyp_in), shell=True,
206 |             stdout=subprocess.PIPE, stderr=open(os.devnull, 'w'))
207 |         pl_output = pl_process.stdout.read()
208 |     elif type_in == 'string':
209 |         pl_process = subprocess.Popen(
210 |             'perl scripts/moses/multi-bleu.perl {}\n'.format(ref_file), shell=True, stdin=subprocess.PIPE,
211 |             stdout=subprocess.PIPE, stderr=open(os.devnull, 'w'))
212 |         pl_output = pl_process.communicate(hyp_in)[0]
213 |     else:
214 |         raise ValueError('Wrong type_in')
215 |     contents = pl_output.split(',')
216 |     if len(contents) == 0:
217 |         return 0.0
218 |     var = contents[0].split(" = ")
219 |     if len(var) <= 1:
220 |         return 0.0
221 |     BLEU = var[1]
222 | 
223 |     return float(BLEU)
224 | 
225 | 
226 | def de_bpe(input_str):
227 |     return re.sub(r'(@@ )|(@@ ?$)', '', input_str)
228 | 
229 | 
230 | def translate_dev_get_bleu(model, f_init, f_next, trng, use_noise, beam_size, alpha, **kwargs):
231 |     dataset = kwargs.pop('dataset', model.O['task'])
232 | 
233 |     # [NOTE]: Filenames here are with path prefix.
234 |     dev1 = kwargs.pop('dev1', model.O['small_train_datasets'][0])
235 |     dev2 = kwargs.pop('dev2', model.O['small_train_datasets'][2])
236 |     dic1 = kwargs.pop('dic1', model.O['vocab_filenames'][0])
237 |     dic2 = kwargs.pop('dic2', model.O['vocab_filenames'][1])
238 |     use_noise.set_value(0.)
239 | 
240 |     translated_string = _translate_whole(
241 |         model, f_init, f_next, trng,
242 |         dic1, dic2, dev1,
243 |         k=beam_size, alpha=alpha, batch_mode=True,
244 |     )
245 | 
246 |     use_noise.set_value(1.)
247 | 
248 |     # first de-truecase, then de-bpe
249 |     if 'tc' in dataset:
250 |         translated_string = subprocess.Popen(
251 |             'perl detruecase.perl',
252 |             stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=open(os.devnull, 'w'),
253 |             shell=True,
254 |         ).communicate(translated_string)[0]
255 | 
256 |     if 'bpe' in dataset:
257 |         translated_string = de_bpe(translated_string)
258 | 
259 |     return get_bleu(dev2, translated_string, type_in='string')
260 | 
261 | 
262 | __all__ = [
263 |     'get_bleu',
264 |     'de_bpe',
265 |     'translate_dev_get_bleu',
266 | ]
267 | 


--------------------------------------------------------------------------------
/language-modeling/g2_lstm.py:
--------------------------------------------------------------------------------
  1 | """Implementation of batch-normalized LSTM."""
  2 | import torch
  3 | from torch import nn
  4 | from torch.autograd import Variable
  5 | from torch.nn import functional, init
  6 | import math
  7 | import numpy as np
  8 | 
  9 | class GumbelNoise(nn.Module):
 10 | 
 11 |     def __init__(self, p=0.0, t=1.0, eps=1e-3, noise_type='new_U_B'):
 12 |         super(GumbelNoise, self).__init__()
 13 |         self.p = p
 14 |         self.t = t
 15 |         self.eps = eps
 16 |         self.noise_type = noise_type
 17 |         self.U = None
 18 |         self.B = None
 19 |         self.noise = None
 20 | 
 21 |     def update_noise(self, input_):
 22 |         if not self.training or self.p == 0.0:
 23 |             return
 24 |         if self.noise_type == 'new_U_B':
 25 |             pass
 26 |         elif self.noise_type == 'new_U':
 27 |             self.B = input_.data.new(input_.size()).bernoulli_(self.p)
 28 |         elif self.noise_type == 'no_new':
 29 |             self.U = input_.data.new(input_.size()).uniform_()
 30 |             self.U = torch.log(self.U + self.eps) - torch.log(1 + self.eps - self.U)
 31 |             self.B = input_.data.new(input_.size()).bernoulli_(self.p)
 32 |             self.noise = self.U * self.B
 33 |         else:
 34 |             raise ValueError('Unknown noise_type', self.noise_type)
 35 | 
 36 |     def forward(self, input_):
 37 |         if not self.training or self.p == 0.0:
 38 |             return input_
 39 |         if self.noise_type == 'new_U_B':
 40 |             self.U = input_.data.new(input_.size()).uniform_()
 41 |             self.U = torch.log(self.U + self.eps) - torch.log(1 + self.eps - self.U)
 42 |             self.B = input_.data.new(input_.size()).bernoulli_(self.p)
 43 |             self.noise = self.U * self.B
 44 |         elif self.noise_type == 'new_U':
 45 |             self.U = input_.data.new(input_.size()).uniform_()
 46 |             self.U = torch.log(self.U + self.eps) - torch.log(1 + self.eps - self.U)
 47 |             self.noise = self.U * self.B
 48 |         elif self.noise_type == 'no_new':
 49 |             pass
 50 |         else:
 51 |             raise ValueError('Unknown noise_type', self.noise_type)
 52 |         return (input_ + Variable(self.noise, requires_grad=False)) * (1/self.t)
 53 | 
 54 | 
 55 | class LSTMCell(nn.Module):
 56 | 
 57 |     """A basic LSTM cell."""
 58 | 
 59 |     def __init__(self, input_size, hidden_size, use_bias=True,
 60 |                  gumbel_noise_p=0.0, gumbel_noise_t=1.0, gumbel_noise_type='new_U_B',
 61 |                  divide_temp=None):
 62 |         """
 63 |         Most parts are copied from torch.nn.LSTMCell.
 64 |         """
 65 | 
 66 |         super(LSTMCell, self).__init__()
 67 |         self.input_size = input_size
 68 |         self.hidden_size = hidden_size
 69 |         self.use_bias = use_bias
 70 |         self.weight_ih = nn.Parameter(
 71 |             torch.FloatTensor(input_size, 4 * hidden_size))
 72 |         self.weight_hh = nn.Parameter(
 73 |             torch.FloatTensor(hidden_size, 4 * hidden_size))
 74 |         self.weight_hh_wdrop = None
 75 |         if use_bias:
 76 |             self.bias = nn.Parameter(torch.FloatTensor(4 * hidden_size))
 77 |         else:
 78 |             self.register_parameter('bias', None)
 79 |         self.noisef = GumbelNoise(p=gumbel_noise_p, t=gumbel_noise_t,
 80 |                                   noise_type=gumbel_noise_type)
 81 |         self.noisei = GumbelNoise(p=gumbel_noise_p, t=gumbel_noise_t,
 82 |                                   noise_type=gumbel_noise_type)
 83 |         self.divide_temp = divide_temp
 84 |         self.reset_parameters()
 85 | 
 86 |     def reset_parameters(self):
 87 |         """
 88 |         Initialize parameters following the way proposed in the paper.
 89 |         """
 90 |         stdv = 1.0 / math.sqrt(self.hidden_size)
 91 |         for weight in self.parameters():
 92 |             weight.data.uniform_(-stdv, stdv)
 93 | 
 94 |         # init.orthogonal(self.weight_ih.data)
 95 |         # weight_hh_data = torch.eye(self.hidden_size)
 96 |         # weight_hh_data = weight_hh_data.repeat(1, 4)
 97 |         # self.weight_hh.data.set_(weight_hh_data)
 98 |         # # The bias is just set to zero vectors.
 99 |         # if self.use_bias:
100 |         #     init.constant(self.bias.data, val=0)
101 | 
102 |     def forward(self, input_, hx, update_noise=True):
103 |         """
104 |         Args:
105 |             input_: A (batch, input_size) tensor containing input
106 |                 features.
107 |             hx: A tuple (h_0, c_0), which contains the initial hidden
108 |                 and cell state, where the size of both states is
109 |                 (batch, hidden_size).
110 |         Returns:
111 |             h_1, c_1: Tensors containing the next hidden and cell state.
112 |         """
113 | 
114 |         h_0, c_0 = hx
115 |         batch_size = h_0.size(0)
116 |         bias_batch = (self.bias.unsqueeze(0)
117 |                       .expand(batch_size, *self.bias.size()))
118 |         new_w_hh = self.weight_hh_wdrop \
119 |             if self.weight_hh_wdrop is not None else self.weight_hh
120 |         # act = torch.addmm(bias_batch,
121 |         #                   torch.cat((h_0, input_), dim=1),
122 |         #                   torch.cat((new_w_hh, self.weight_ih), dim=0))
123 |         wh_b = torch.addmm(bias_batch, h_0, new_w_hh)
124 |         wi = torch.mm(input_, self.weight_ih)
125 |         f, i, o, g = torch.split(wh_b + wi,
126 |                                  split_size=self.hidden_size, dim=1)
127 | 
128 |         if hasattr(self, 'noisef') and hasattr(self, 'noisei'):
129 |             if update_noise:
130 |                 self.noisef.update_noise(f)
131 |                 self.noisei.update_noise(i)
132 |             f = self.noisef(f)
133 |             i = self.noisei(i)
134 | 
135 |         if getattr(self, 'divide_temp', None) is not None:
136 |             f = f * (1 / self.divide_temp)
137 |             i = i * (1 / self.divide_temp)
138 |         sigm_i = torch.sigmoid(i)
139 |         sigm_f = torch.sigmoid(f)
140 |         c_1 = sigm_f*c_0 + sigm_i*torch.tanh(g)
141 |         h_1 = torch.sigmoid(o) * torch.tanh(c_1)
142 |         return h_1, c_1
143 | 
144 |     def __repr__(self):
145 |         s = '{name}({input_size}, {hidden_size})'
146 |         return s.format(name=self.__class__.__name__, **self.__dict__)
147 | 
148 | class LSTM(nn.Module):
149 | 
150 |     """A module that runs multiple steps of LSTM."""
151 | 
152 |     def __init__(self, cell_class, input_size, hidden_size, num_layers=1,
153 |                  use_bias=True, batch_first=False, dropout=0, wdrop=None, **kwargs):
154 |         super(LSTM, self).__init__()
155 |         self.cell_class = cell_class
156 |         self.input_size = input_size
157 |         self.hidden_size = hidden_size
158 |         self.num_layers = num_layers
159 |         self.use_bias = use_bias
160 |         self.batch_first = batch_first
161 |         self.dropout = dropout
162 |         self.wdrop = wdrop
163 | 
164 |         for layer in range(num_layers):
165 |             layer_input_size = input_size if layer == 0 else hidden_size
166 |             cell = cell_class(input_size=layer_input_size,
167 |                               hidden_size=hidden_size,
168 |                               **kwargs)
169 |             setattr(self, 'cell_{}'.format(layer), cell)
170 |         self.dropout_layer = nn.Dropout(dropout)
171 |         self.reset_parameters()
172 | 
173 |     def get_cell(self, layer):
174 |         return getattr(self, 'cell_{}'.format(layer))
175 | 
176 |     def reset_parameters(self):
177 |         for layer in range(self.num_layers):
178 |             cell = self.get_cell(layer)
179 |             cell.reset_parameters()
180 | 
181 |     def reset_gumbel_noise(self, gumbel_noise_p=0.0, gumbel_noise_t=1.0,
182 |                            gumbel_noise_type='new_U_B'):
183 |         for layer in range(self.num_layers):
184 |             cell = self.get_cell(layer)
185 | 
186 |             if not hasattr(cell, 'noisef'):
187 |                 cell.noisef = GumbelNoise(p=gumbel_noise_p, t=gumbel_noise_t,
188 |                                           noise_type=gumbel_noise_type)
189 |             else:
190 |                 cell.noisef.p = gumbel_noise_p
191 |                 cell.noisef.t = gumbel_noise_t
192 |                 cell.noisef.noise_type = gumbel_noise_type
193 | 
194 |             if not hasattr(cell, 'noisei'):
195 |                 cell.noisei = GumbelNoise(p=gumbel_noise_p, t=gumbel_noise_t,
196 |                                           noise_type=gumbel_noise_type)
197 |             else:
198 |                 cell.noisei.p = gumbel_noise_p
199 |                 cell.noisei.t = gumbel_noise_t
200 |                 cell.noisei.noise_type = gumbel_noise_type
201 | 
202 |     @staticmethod
203 |     def _forward_rnn(cell, input_, hx):
204 |         max_time = input_.size(0)
205 |         output = []
206 |         for time in range(max_time):
207 |             h_next, c_next = cell(input_=input_[time], hx=hx,
208 |                                   update_noise=(time == 0))
209 |             hx_next = (h_next, c_next)
210 |             output.append(h_next)
211 |             hx = hx_next
212 |         output = torch.stack(output, 0)
213 |         return output, hx
214 | 
215 |     def forward(self, input_, hx=None):
216 |         if self.batch_first:
217 |             input_ = input_.transpose(0, 1)
218 |         max_time, batch_size, _ = input_.size()
219 |         print("max_time:", max_time)
220 |         print("batch_size:", batch_size)
221 |         if hx is None:
222 |             hx = Variable(input_.data.new(batch_size, self.hidden_size).zero_())
223 |             hx = [(hx, hx) for _ in range(self.num_layers)]
224 |         layer_output = None
225 |         new_hx = []
226 |         for layer in range(self.num_layers):
227 |             global global_layer
228 |             global_layer = layer
229 |             print("layer:", layer)
230 |             cell = self.get_cell(layer)
231 |             if self.wdrop is not None:
232 |                 cell.weight_hh_wdrop = torch.nn.functional.dropout(
233 |                     cell.weight_hh, self.wdrop, training=self.training)
234 |             layer_output, (layer_h_n, layer_c_n) = LSTM._forward_rnn(
235 |                 cell=cell, input_=input_, hx=hx[layer])
236 |             input_ = self.dropout_layer(layer_output)
237 |             new_hx.append((layer_h_n, layer_c_n))
238 |         output = layer_output
239 |         return output, new_hx
240 | 


--------------------------------------------------------------------------------
/language-modeling/finetune.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | import math
  4 | import numpy as np
  5 | np.random.seed(331)
  6 | import torch
  7 | import torch.nn as nn
  8 | from torch.autograd import Variable
  9 | 
 10 | import data
 11 | import model
 12 | 
 13 | from utils import batchify, get_batch, repackage_hidden, message, set_log_file
 14 | 
 15 | parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model')
 16 | parser.add_argument('--data', type=str, default='data/penn/',
 17 |                     help='location of the data corpus')
 18 | parser.add_argument('--model', type=str, default='LSTM',
 19 |                     help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU, G2LSTM)')
 20 | parser.add_argument('--emsize', type=int, default=400,
 21 |                     help='size of word embeddings')
 22 | parser.add_argument('--nhid', type=int, default=1150,
 23 |                     help='number of hidden units per layer')
 24 | parser.add_argument('--nlayers', type=int, default=3,
 25 |                     help='number of layers')
 26 | parser.add_argument('--lr', type=float, default=30,
 27 |                     help='initial learning rate')
 28 | parser.add_argument('--clip', type=float, default=0.25,
 29 |                     help='gradient clipping')
 30 | parser.add_argument('--epochs', type=int, default=8000,
 31 |                     help='upper epoch limit')
 32 | parser.add_argument('--batch_size', type=int, default=80, metavar='N',
 33 |                     help='batch size')
 34 | parser.add_argument('--bptt', type=int, default=70,
 35 |                     help='sequence length')
 36 | parser.add_argument('--dropout', type=float, default=0.4,
 37 |                     help='dropout applied to layers (0 = no dropout)')
 38 | parser.add_argument('--dropouth', type=float, default=0.3,
 39 |                     help='dropout for rnn layers (0 = no dropout)')
 40 | parser.add_argument('--dropouti', type=float, default=0.65,
 41 |                     help='dropout for input embedding layers (0 = no dropout)')
 42 | parser.add_argument('--dropoute', type=float, default=0.1,
 43 |                     help='dropout to remove words from embedding layer (0 = no dropout)')
 44 | parser.add_argument('--wdrop', type=float, default=0.5,
 45 |                     help='amount of weight dropout to apply to the RNN hidden to hidden matrix')
 46 | parser.add_argument('--gumbel-noise-p', type=float, default=0.0,
 47 |                     help='Gmuble_noise_p in Gumbel gate')
 48 | parser.add_argument('--gumbel-noise-t', type=float, default=1.0,
 49 |                     help='Gmuble_noise_t in Gumbel gate')
 50 | parser.add_argument('--gumbel-noise-type', type=str, default='new_U_B',
 51 |                     help='Gmubel_noise_type in Gumbel gate (new_U_B, new_U, no_new)')
 52 | parser.add_argument('--divide-temp', type=float, default=None,
 53 |                     help='Temperature in LSTM gates')
 54 | parser.add_argument('--tied', action='store_false',
 55 |                     help='tie the word embedding and softmax weights')
 56 | parser.add_argument('--seed', type=int, default=1111,
 57 |                     help='random seed')
 58 | parser.add_argument('--nonmono', type=int, default=5,
 59 |                     help='random seed')
 60 | parser.add_argument('--cuda', action='store_false',
 61 |                     help='use CUDA')
 62 | parser.add_argument('--log-interval', type=int, default=200, metavar='N',
 63 |                     help='report interval')
 64 | parser.add_argument('--load', type=str,  default='',
 65 |                     help='path to load the fine-tune model')
 66 | randomhash = ''.join(str(time.time()).split('.'))
 67 | parser.add_argument('--save', type=str,  default=randomhash+'.pt',
 68 |                     help='path to save the final model')
 69 | parser.add_argument('--alpha', type=float, default=2,
 70 |                     help='alpha L2 regularization on RNN activation (alpha = 0 means no regularization)')
 71 | parser.add_argument('--beta', type=float, default=1,
 72 |                     help='beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)')
 73 | parser.add_argument('--wdecay', type=float, default=1.2e-6,
 74 |                     help='weight decay applied to all weights')
 75 | parser.add_argument('--log-file', type=str,  default='',
 76 |                     help='path to save the log')
 77 | args = parser.parse_args()
 78 | 
 79 | set_log_file(args.log_file)
 80 | 
 81 | # Set the random seed manually for reproducibility.
 82 | torch.manual_seed(args.seed)
 83 | if torch.cuda.is_available():
 84 |     if not args.cuda:
 85 |         message("WARNING: You have a CUDA device, so you should probably run with --cuda")
 86 |     else:
 87 |         torch.cuda.manual_seed(args.seed)
 88 | 
 89 | ###############################################################################
 90 | # Load data
 91 | ###############################################################################
 92 | 
 93 | corpus = data.Corpus(args.data)
 94 | 
 95 | eval_batch_size = 10
 96 | test_batch_size = 1
 97 | train_data = batchify(corpus.train, args.batch_size, args)
 98 | val_data = batchify(corpus.valid, eval_batch_size, args)
 99 | test_data = batchify(corpus.test, test_batch_size, args)
100 | 
101 | ###############################################################################
102 | # Build the model
103 | ###############################################################################
104 | 
105 | ntokens = len(corpus.dictionary)
106 | model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers,
107 |                        args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop,
108 |                        args.gumbel_noise_p, args.gumbel_noise_t, args.gumbel_noise_type,
109 |                        args.divide_temp,
110 |                        args.tied)
111 | if args.cuda:
112 |     model.cuda()
113 | total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in model.parameters())
114 | message('Args:' + repr(args))
115 | message('Model total parameters:' + repr(total_params))
116 | 
117 | criterion = nn.CrossEntropyLoss()
118 | 
119 | ###############################################################################
120 | # Training code
121 | ###############################################################################
122 | 
123 | def evaluate(data_source, batch_size=10):
124 |     # Turn on evaluation mode which disables dropout.
125 |     if args.model == 'QRNN': model.reset()
126 |     model.eval()
127 |     total_loss = 0
128 |     ntokens = len(corpus.dictionary)
129 |     hidden = model.init_hidden(batch_size)
130 |     for i in range(0, data_source.size(0) - 1, args.bptt):
131 |         data, targets = get_batch(data_source, i, args, evaluation=True)
132 |         output, hidden = model(data, hidden)
133 |         output_flat = output.view(-1, ntokens)
134 |         total_loss += len(data) * criterion(output_flat, targets).data
135 |         hidden = repackage_hidden(hidden)
136 |     return total_loss[0] / len(data_source)
137 | 
138 | 
139 | def train():
140 |     # Turn on training mode which enables dropout.
141 |     if args.model == 'QRNN': model.reset()
142 |     total_loss = 0
143 |     start_time = time.time()
144 |     ntokens = len(corpus.dictionary)
145 |     hidden = model.init_hidden(args.batch_size)
146 |     batch, i = 0, 0
147 |     while i < train_data.size(0) - 1 - 1:
148 |         bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
149 |         # Prevent excessively small or negative sequence lengths
150 |         seq_len = max(5, int(np.random.normal(bptt, 5)))
151 |         # There's a very small chance that it could select a very long sequence length resulting in OOM
152 |         seq_len = min(seq_len, args.bptt + 10)
153 | 
154 |         lr2 = optimizer.param_groups[0]['lr']
155 |         optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
156 |         model.train()
157 |         data, targets = get_batch(train_data, i, args, seq_len=seq_len)
158 | 
159 |         # Starting each batch, we detach the hidden state from how it was previously produced.
160 |         # If we didn't, the model would try backpropagating all the way to start of the dataset.
161 |         hidden = repackage_hidden(hidden)
162 |         optimizer.zero_grad()
163 | 
164 |         output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
165 |         raw_loss = criterion(output.view(-1, ntokens), targets)
166 | 
167 |         loss = raw_loss
168 |         # Activiation Regularization
169 |         loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
170 |         # Temporal Activation Regularization (slowness)
171 |         loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
172 |         loss.backward()
173 | 
174 |         # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
175 |         torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
176 |         optimizer.step()
177 | 
178 |         total_loss += raw_loss.data
179 |         optimizer.param_groups[0]['lr'] = lr2
180 |         if batch % args.log_interval == 0 and batch > 0:
181 |             cur_loss = total_loss[0] / args.log_interval
182 |             elapsed = time.time() - start_time
183 |             message('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
184 |                     'loss {:5.2f} | ppl {:8.2f}'.format(
185 |                 epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
186 |                 elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
187 |             total_loss = 0
188 |             start_time = time.time()
189 |         ###
190 |         batch += 1
191 |         i += seq_len
192 | 
193 | 
194 | # Load the best saved model.
195 | with open(args.load, 'rb') as f:
196 |     model = torch.load(f)
197 | # if args.model == 'G2LSTM':
198 | #     model.reset_gumbel_noise(args.gumbel_noise_p, args.gumbel_noise_t, args.gumbel_noise_type)
199 | 
200 | # Loop over epochs.
201 | lr = args.lr
202 | stored_loss = evaluate(val_data)
203 | message('| stored loss | valid loss {:5.2f} | valid ppl {:8.2f}'.format(stored_loss, math.exp(stored_loss)))
204 | best_val_loss = []
205 | # At any point you can hit Ctrl + C to break out of training early.
206 | try:
207 |     #optimizer = torch.optim.ASGD(model.parameters(), lr=args.lr, weight_decay=args.wdecay)
208 |     optimizer = torch.optim.ASGD(model.parameters(), lr=args.lr, t0=0, lambd=0., weight_decay=args.wdecay)
209 |     for epoch in range(1, args.epochs+1):
210 |         epoch_start_time = time.time()
211 |         train()
212 |         if 't0' in optimizer.param_groups[0]:
213 |             tmp = {}
214 |             for prm in model.parameters():
215 |                 tmp[prm] = prm.data.clone()
216 |                 prm.data = optimizer.state[prm]['ax'].clone()
217 | 
218 |             val_loss2 = evaluate(val_data)
219 |             message('-' * 89)
220 |             message('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
221 |                     'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
222 |                                                val_loss2, math.exp(val_loss2)))
223 |             message('-' * 89)
224 | 
225 |             if val_loss2 < stored_loss:
226 |                 with open(args.save, 'wb') as f:
227 |                     torch.save(model, f)
228 |                 message('Saving Averaged!')
229 |                 stored_loss = val_loss2
230 | 
231 |             for prm in model.parameters():
232 |                 prm.data = tmp[prm].clone()
233 | 
234 |         if (len(best_val_loss)>args.nonmono and val_loss2 > min(best_val_loss[:-args.nonmono])):
235 |             message('Done!')
236 |             import sys
237 |             sys.exit(1)
238 |             optimizer = torch.optim.ASGD(model.parameters(), lr=args.lr, t0=0, lambd=0., weight_decay=args.wdecay)
239 |             #optimizer.param_groups[0]['lr'] /= 2.
240 |         best_val_loss.append(val_loss2)
241 | 
242 | except KeyboardInterrupt:
243 |     message('-' * 89)
244 |     message('Exiting from training early')
245 | 
246 | # Load the best saved model.
247 | with open(args.save, 'rb') as f:
248 |     model = torch.load(f)
249 |     
250 | # Run on test data.
251 | test_loss = evaluate(test_data, test_batch_size)
252 | message('=' * 89)
253 | message('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
254 |     test_loss, math.exp(test_loss)))
255 | message('=' * 89)
256 | 


--------------------------------------------------------------------------------
/language-modeling/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | import math
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn as nn
  7 | from torch.autograd import Variable
  8 | 
  9 | import data
 10 | import model
 11 | 
 12 | from utils import batchify, get_batch, repackage_hidden, message, set_log_file
 13 | 
 14 | parser = argparse.ArgumentParser(description='PyTorch PennTreeBank RNN/LSTM Language Model')
 15 | parser.add_argument('--data', type=str, default='data/penn/',
 16 |                     help='location of the data corpus')
 17 | parser.add_argument('--model', type=str, default='LSTM',
 18 |                     help='type of recurrent net (LSTM, QRNN, GRU, G2LSTM)')
 19 | parser.add_argument('--emsize', type=int, default=400,
 20 |                     help='size of word embeddings')
 21 | parser.add_argument('--nhid', type=int, default=1150,
 22 |                     help='number of hidden units per layer')
 23 | parser.add_argument('--nlayers', type=int, default=3,
 24 |                     help='number of layers')
 25 | parser.add_argument('--lr', type=float, default=30,
 26 |                     help='initial learning rate')
 27 | parser.add_argument('--clip', type=float, default=0.25,
 28 |                     help='gradient clipping')
 29 | parser.add_argument('--epochs', type=int, default=8000,
 30 |                     help='upper epoch limit')
 31 | parser.add_argument('--batch_size', type=int, default=80, metavar='N',
 32 |                     help='batch size')
 33 | parser.add_argument('--bptt', type=int, default=70,
 34 |                     help='sequence length')
 35 | parser.add_argument('--dropout', type=float, default=0.4,
 36 |                     help='dropout applied to layers (0 = no dropout)')
 37 | parser.add_argument('--dropouth', type=float, default=0.3,
 38 |                     help='dropout for rnn layers (0 = no dropout)')
 39 | parser.add_argument('--dropouti', type=float, default=0.65,
 40 |                     help='dropout for input embedding layers (0 = no dropout)')
 41 | parser.add_argument('--dropoute', type=float, default=0.1,
 42 |                     help='dropout to remove words from embedding layer (0 = no dropout)')
 43 | parser.add_argument('--gumbel-noise-p', type=float, default=0.0,
 44 |                     help='Gmubel_noise_p in Gumbel gate')
 45 | parser.add_argument('--gumbel-noise-t', type=float, default=1.0,
 46 |                     help='Gmubel_noise_t in Gumbel gate')
 47 | parser.add_argument('--gumbel-noise-type', type=str, default='new_U_B',
 48 |                     help='Gmubel_noise_type in Gumbel gate (new_U_B, new_U, no_new)')
 49 | parser.add_argument('--divide-temp', type=float, default=None,
 50 |                     help='Temperature in LSTM gates')
 51 | parser.add_argument('--wdrop', type=float, default=0.5,
 52 |                     help='amount of weight dropout to apply to the RNN hidden to hidden matrix')
 53 | parser.add_argument('--tied', action='store_false',
 54 |                     help='tie the word embedding and softmax weights')
 55 | parser.add_argument('--seed', type=int, default=1111,
 56 |                     help='random seed')
 57 | parser.add_argument('--nonmono', type=int, default=5,
 58 |                     help='random seed')
 59 | parser.add_argument('--cuda', action='store_false',
 60 |                     help='use CUDA')
 61 | parser.add_argument('--log-interval', type=int, default=200, metavar='N',
 62 |                     help='report interval')
 63 | randomhash = ''.join(str(time.time()).split('.'))
 64 | parser.add_argument('--save', type=str,  default=randomhash+'.pt',
 65 |                     help='path to save the final model')
 66 | parser.add_argument('--alpha', type=float, default=2,
 67 |                     help='alpha L2 regularization on RNN activation (alpha = 0 means no regularization)')
 68 | parser.add_argument('--beta', type=float, default=1,
 69 |                     help='beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)')
 70 | parser.add_argument('--wdecay', type=float, default=1.2e-6,
 71 |                     help='weight decay applied to all weights')
 72 | parser.add_argument('--log-file', type=str,  default='',
 73 |                     help='path to save the log')
 74 | args = parser.parse_args()
 75 | 
 76 | set_log_file(args.log_file)
 77 | 
 78 | # Set the random seed manually for reproducibility.
 79 | np.random.seed(args.seed)
 80 | torch.manual_seed(args.seed)
 81 | if torch.cuda.is_available():
 82 |     if not args.cuda:
 83 |         message("WARNING: You have a CUDA device, so you should probably run with --cuda")
 84 |     else:
 85 |         torch.cuda.manual_seed(args.seed)
 86 | 
 87 | ###############################################################################
 88 | # Load data
 89 | ###############################################################################
 90 | 
 91 | corpus = data.Corpus(args.data)
 92 | 
 93 | eval_batch_size = 10
 94 | test_batch_size = 1
 95 | train_data = batchify(corpus.train, args.batch_size, args)
 96 | val_data = batchify(corpus.valid, eval_batch_size, args)
 97 | test_data = batchify(corpus.test, test_batch_size, args)
 98 | 
 99 | ###############################################################################
100 | # Build the model
101 | ###############################################################################
102 | 
103 | ntokens = len(corpus.dictionary)
104 | model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers,
105 |                        args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop,
106 |                        args.gumbel_noise_p, args.gumbel_noise_t, args.gumbel_noise_type,
107 |                        args.divide_temp,
108 |                        args.tied)
109 | if args.cuda:
110 |     model.cuda()
111 | total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in model.parameters())
112 | message('Args:' + repr(args))
113 | message('Model total parameters:' + repr(total_params))
114 | 
115 | criterion = nn.CrossEntropyLoss()
116 | 
117 | ###############################################################################
118 | # Training code
119 | ###############################################################################
120 | 
121 | def evaluate(data_source, batch_size=10):
122 |     # Turn on evaluation mode which disables dropout.
123 |     model.eval()
124 |     if args.model == 'QRNN': model.reset()
125 |     total_loss = 0
126 |     ntokens = len(corpus.dictionary)
127 |     hidden = model.init_hidden(batch_size)
128 |     for i in range(0, data_source.size(0) - 1, args.bptt):
129 |         data, targets = get_batch(data_source, i, args, evaluation=True)
130 |         output, hidden = model(data, hidden)
131 |         output_flat = output.view(-1, ntokens)
132 |         total_loss += len(data) * criterion(output_flat, targets).data
133 |         hidden = repackage_hidden(hidden)
134 |     return total_loss[0] / len(data_source)
135 | 
136 | 
137 | def train():
138 |     # Turn on training mode which enables dropout.
139 |     if args.model == 'QRNN': model.reset()
140 |     total_loss = 0
141 |     start_time = time.time()
142 |     ntokens = len(corpus.dictionary)
143 |     hidden = model.init_hidden(args.batch_size)
144 |     batch, i = 0, 0
145 |     while i < train_data.size(0) - 1 - 1:
146 |         bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
147 |         # Prevent excessively small or negative sequence lengths
148 |         seq_len = max(5, int(np.random.normal(bptt, 5)))
149 |         # There's a very small chance that it could select a very long sequence length resulting in OOM
150 |         # seq_len = min(seq_len, args.bptt + 10)
151 | 
152 |         lr2 = optimizer.param_groups[0]['lr']
153 |         optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
154 |         model.train()
155 |         data, targets = get_batch(train_data, i, args, seq_len=seq_len)
156 | 
157 |         # Starting each batch, we detach the hidden state from how it was previously produced.
158 |         # If we didn't, the model would try backpropagating all the way to start of the dataset.
159 |         hidden = repackage_hidden(hidden)
160 |         optimizer.zero_grad()
161 | 
162 |         output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
163 |         raw_loss = criterion(output.view(-1, ntokens), targets)
164 | 
165 |         loss = raw_loss
166 |         # Activiation Regularization
167 |         loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
168 |         # Temporal Activation Regularization (slowness)
169 |         loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
170 |         loss.backward()
171 | 
172 |         # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
173 |         torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
174 |         optimizer.step()
175 | 
176 |         total_loss += raw_loss.data
177 |         optimizer.param_groups[0]['lr'] = lr2
178 |         if batch % args.log_interval == 0 and batch > 0:
179 |             cur_loss = total_loss[0] / args.log_interval
180 |             elapsed = time.time() - start_time
181 |             message('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
182 |                     'loss {:5.2f} | ppl {:8.2f}'.format(
183 |                 epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
184 |                 elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
185 |             total_loss = 0
186 |             start_time = time.time()
187 |         ###
188 |         batch += 1
189 |         i += seq_len
190 | 
191 | # Loop over epochs.
192 | lr = args.lr
193 | best_val_loss = []
194 | stored_loss = 100000000
195 | 
196 | # At any point you can hit Ctrl + C to break out of training early.
197 | try:
198 |     optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.wdecay)
199 |     for epoch in range(1, args.epochs+1):
200 |         epoch_start_time = time.time()
201 |         train()
202 |         if 't0' in optimizer.param_groups[0]:
203 |             tmp = {}
204 |             for prm in model.parameters():
205 |                 tmp[prm] = prm.data.clone()
206 |                 prm.data = optimizer.state[prm]['ax'].clone()
207 | 
208 |             val_loss2 = evaluate(val_data)
209 |             message('-' * 89)
210 |             message('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
211 |                     'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
212 |                                                val_loss2, math.exp(val_loss2)))
213 |             message('-' * 89)
214 | 
215 |             if val_loss2 < stored_loss:
216 |                 with open(args.save, 'wb') as f:
217 |                     torch.save(model, f)
218 |                 message('Saving Averaged!')
219 |                 stored_loss = val_loss2
220 | 
221 |             for prm in model.parameters():
222 |                 prm.data = tmp[prm].clone()
223 | 
224 |         else:
225 |             val_loss = evaluate(val_data, eval_batch_size)
226 |             message('-' * 89)
227 |             message('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
228 |                     'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
229 |                                                val_loss, math.exp(val_loss)))
230 |             message('-' * 89)
231 | 
232 |             if val_loss < stored_loss:
233 |                 with open(args.save, 'wb') as f:
234 |                     torch.save(model, f)
235 |                 message('Saving Normal!')
236 |                 stored_loss = val_loss
237 | 
238 |             if 't0' not in optimizer.param_groups[0] and (len(best_val_loss)>args.nonmono and val_loss > min(best_val_loss[:-args.nonmono])):
239 |                 message('Switching!')
240 |                 optimizer = torch.optim.ASGD(model.parameters(), lr=args.lr, t0=0, lambd=0., weight_decay=args.wdecay)
241 |                 #optimizer.param_groups[0]['lr'] /= 2.
242 |             best_val_loss.append(val_loss)
243 | 
244 | except KeyboardInterrupt:
245 |     message('-' * 89)
246 |     message('Exiting from training early')
247 | 
248 | # Load the best saved model.
249 | with open(args.save, 'rb') as f:
250 |     model = torch.load(f)
251 | 
252 | # Run on test data.
253 | test_loss = evaluate(test_data, test_batch_size)
254 | message('=' * 89)
255 | message('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
256 |     test_loss, math.exp(test_loss)))
257 | message('=' * 89)
258 | 


--------------------------------------------------------------------------------
/machine-translation/train_nmt.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | import os
  4 | 
  5 | from libs.constants import Datasets
  6 | from libs.gpu_manager import get_gpu_usage
  7 | 
  8 | 
  9 | def main():
 10 |     parser = argparse.ArgumentParser(
 11 |         description='Train the deep NMT model.',
 12 |         fromfile_prefix_chars='@',
 13 |     )
 14 | 
 15 |     parser.add_argument('-R', action="store_false", default=True, dest='reload',
 16 |                         help='Reload old model, default to True, set to False')
 17 |     parser.add_argument('-d', action='store_true', default=False, dest='dump_before_train',
 18 |                         help='Dump before train default to False, set to True')
 19 |     parser.add_argument('--lr', action="store", metavar="learning_rate", dest="learning_rate", type=float, default=1.0,
 20 |                         help='Start learning rate, default is %(default)s')
 21 |     parser.add_argument('--optimizer', action='store', default='adadelta')
 22 |     parser.add_argument('--plot', action='store', default=None,
 23 |                         help='Plot filename, default is None (not plot) (deprecated).')
 24 |     parser.add_argument('--save_freq', action='store', default=10000, type=int, dest='save_freq',
 25 |                         help='Model save frequency, default is %(default)s')
 26 |     parser.add_argument('--dev_bleu_freq', action='store', default=20000, type=int, dest='dev_bleu_freq',
 27 |                         help='Get dev set BLEU frequency, default is %(default)s')
 28 |     parser.add_argument('--dim', action='store', default=512, type=int, dest='dim',
 29 |                         help='Dim of hidden units, default is %(default)s')
 30 |     parser.add_argument('--bs', action='store', default=128, type=int, dest='batch_size',
 31 |                         help='Train batch size, default is %(default)s')
 32 |     parser.add_argument('--valid_bs', action='store', default=128, type=int, dest='valid_batch_size',
 33 |                         help='Valid batch size, default is %(default)s')
 34 |     parser.add_argument('--dim_word', action='store', default=512, type=int, dest='dim_word',
 35 |                         help='Dim of word embedding, default is %(default)s')
 36 |     parser.add_argument('--maxlen', action='store', default=80, type=int, dest='maxlen',
 37 |                         help='Max sentence length, default is %(default)s')
 38 |     parser.add_argument('-S', action='store_false', default=True, dest='shuffle',
 39 |                         help='Shuffle data per epoch, default is True, set to False')
 40 |     parser.add_argument('--train1', action='store', metavar='filename', dest='train1', type=str,
 41 |                         default='filtered_en-fr.en',
 42 |                         help='Source train file, default is %(default)s')
 43 |     parser.add_argument('--train2', action='store', metavar='filename', dest='train2', type=str,
 44 |                         default='filtered_en-fr.fr',
 45 |                         help='Target train file, default is %(default)s')
 46 |     parser.add_argument('--small1', action='store', metavar='filename', dest='small1', type=str,
 47 |                         default='small_en-fr.en',
 48 |                         help='Source small train file, default is %(default)s')
 49 |     parser.add_argument('--small2', action='store', metavar='filename', dest='small2', type=str,
 50 |                         default='small_en-fr.fr',
 51 |                         help='Target small train file, default is %(default)s')
 52 |     parser.add_argument('--valid1', action='store', metavar='filename', dest='valid1', type=str,
 53 |                         default='dev_en.tok',
 54 |                         help='Source valid file, default is %(default)s')
 55 |     parser.add_argument('--valid2', action='store', metavar='filename', dest='valid2', type=str,
 56 |                         default='dev_fr.tok',
 57 |                         help='Target valid file, default is %(default)s')
 58 |     parser.add_argument('--dic1', action='store', metavar='filename', dest='dic1', type=str,
 59 |                         default='filtered_dic_en-fr.en.pkl',
 60 |                         help='Source dict file, default is %(default)s')
 61 |     parser.add_argument('--dic2', action='store', metavar='filename', dest='dic2', type=str,
 62 |                         default='filtered_dic_en-fr.fr.pkl',
 63 |                         help='Target dict file, default is %(default)s')
 64 |     parser.add_argument('--n_words_src', action='store', default=30000, type=int, dest='n_words_src',
 65 |                         help='Vocabularies in source side, default is %(default)s')
 66 |     parser.add_argument('--n_words_tgt', action='store', default=30000, type=int, dest='n_words_tgt',
 67 |                         help='Vocabularies in target side, default is %(default)s')
 68 | 
 69 |     parser.add_argument('model_file', nargs='?', default='model/baseline/baseline.npz',
 70 |                         help='Generated model file, default is "%(default)s"')
 71 |     parser.add_argument('pre_load_file', nargs='?', default='model/en2fr.iter160000.npz',
 72 |                         help='Pre-load model file, default is "%(default)s"')
 73 |     parser.add_argument('--src_vocab_map', action='store', metavar='filename', dest='src_vocab_map_file', type=str,
 74 |                         default=None, help='The file containing source vocab mapping information' 
 75 |                                            'used to initialize a model on large dataset from small one')
 76 |     parser.add_argument('--tgt_vocab_map', action='store', metavar='filename', dest='tgt_vocab_map_file', type=str,
 77 |                         default=None, help='The file containing target vocab mapping information'
 78 |                                            'used to initialize a model on large dataset from small one')
 79 | 
 80 |     parser.add_argument('--enc', action='store', default=1, type=int, dest='n_encoder_layers',
 81 |                         help='Number of encoder layers, default is 1')
 82 |     parser.add_argument('--dec', action='store', default=1, type=int, dest='n_decoder_layers',
 83 |                         help='Number of decoder layers, default is 1')
 84 |     parser.add_argument('--conn', action='store', default=2, type=int, dest='connection_type',
 85 |                         help='Connection type, '
 86 |                              'default is 2 (bidirectional only in first layer, other layers are forward);'
 87 |                              '1 is divided bidirectional GRU')
 88 |     parser.add_argument('--max_epochs', action='store', default=100, type=int, dest='max_epochs',
 89 |                         help='Maximum epoches, default is 100')
 90 |     parser.add_argument('--unit', action='store', metavar='unit', dest='unit', type=str, default='lstm',
 91 |                         help='The unit type, default is "lstm", can be set to "gru".')
 92 |     parser.add_argument('--attention', action='store', metavar='index', dest='attention_layer_id', type=int, default=0,
 93 |                         help='Attention layer index, default is 0')
 94 |     parser.add_argument('--residual_enc', action='store', metavar='type', dest='residual_enc', type=str, default=None,
 95 |                         help='Residual connection of encoder, default is None, candidates are "layer_wise", "last"')
 96 |     parser.add_argument('--residual_dec', action='store', metavar='type', dest='residual_dec', type=str,
 97 |                         default='layer_wise',
 98 |                         help='Residual connection of decoder, default is "layer_wise", candidates are None, "last"')
 99 |     parser.add_argument('-z', '--zigzag', action='store_false', default=True, dest='use_zigzag',
100 |                         help='Use zigzag in encoder, default is True, set to False')
101 |     parser.add_argument('--dropout', action="store", metavar="dropout", dest="dropout", type=float, default=False,
102 |                         help='Dropout rate, default is False (not use dropout)')
103 |     parser.add_argument('--unit_size', action='store', default=2, type=int, dest='unit_size',
104 |                         help='Number of unit size, default is %(default)s')
105 |     # TODO: rename this option to decoder_unit_size in future
106 |     parser.add_argument('--cond_unit_size', action='store', default=2, type=int, dest='cond_unit_size',
107 |                         help='Number of decoder unit size (will rename in future), default is %(default)s')
108 |     parser.add_argument('--clip', action='store', metavar='clip', dest='clip', type=float, default=1.0,
109 |                         help='Gradient clip rate, default is 1.0.')
110 |     parser.add_argument('--manual', action='store_false', dest='auto', default=True,
111 |                         help='Set dropout rate and grad clip rate manually.')
112 |     parser.add_argument('--emb', action='store', metavar='filename', dest='given_embedding', type=str, default=None,
113 |                         help='Given embedding model file, default is None')
114 |     parser.add_argument('--lr_discount', action='store', metavar='freq', dest='lr_discount_freq', type=int,
115 |                         default=-1, help='The learning rate discount frequency, default is -1')
116 | 
117 |     parser.add_argument('--distribute', action = 'store', metavar ='type', dest = 'dist_type', type = str, default= None,
118 |                         help = 'The distribution version, default is None (singe GPU mode), candiates are "mv", "mpi_reduce"')
119 |     parser.add_argument('--nccl', action="store_true", default=False, dest='nccl',
120 |                         help='Use NCCL in distributed mode, default to False, set to True')
121 |     parser.add_argument('--clip_grads_local', action="store_true", default=False, dest='clip_grads_local',
122 |                         help='Whether to clip grads in distributed mode, default to False, set to True')
123 |     parser.add_argument('--recover_lr_iter', action='store', dest='dist_recover_lr', type = int, default=10000,
124 |                         help='The mini-batch index to recover lrate in distributed mode, default is 10000.')
125 | 
126 |     parser.add_argument('--all_att', action='store_true', dest='all_att', default=False,
127 |                         help='Generate attention from all decoder layers, default is False, set to True')
128 |     parser.add_argument('--avg_ctx', action='store_true', dest='avg_ctx', default=False,
129 |                         help='Average all context vectors to get softmax, default is False, set to True')
130 |     parser.add_argument('--dataset', action='store', dest='dataset', default='en-fr',
131 |                         help='Dataset, default is "%(default)s"')
132 |     parser.add_argument('--gpu_map_file', action='store', metavar='filename', dest='gpu_map_file', type=str,
133 |                         default=None, help='The file containing gpu id mapping information, '
134 |                                            'each line is in the form physical_gpu_id\\theano_id')
135 |     parser.add_argument('--ft_patience', action='store', metavar='N', dest='fine_tune_patience', type=int, default=-1,
136 |                         help='Fine tune patience, default is %(default)s, set 8 to enable it')
137 |     parser.add_argument('--valid_freq', action='store', metavar='N', dest='valid_freq', type=int, default=5000,
138 |                         help='Validation frequency, default is 5000')
139 |     parser.add_argument('--trg_att', action='store', metavar='N', dest='trg_attention_layer_id', type=int, default=None,
140 |                         help='Target attention layer id, default is None (not use target attention)')
141 |     parser.add_argument('--fix_dp_bug', action="store_true", default=False, dest='fix_dp_bug',
142 |                         help='Fix previous dropout bug, default to False, set to True')
143 |     parser.add_argument('--abandon_imm', action="store_true", default=False, dest='abandon_imm',
144 |                         help='Whether to load previous immediate params, default to True, set to False')
145 |     parser.add_argument('--tp', action="store", metavar="temperature", dest="temperature", type=float, default=1.0,
146 |                         help='temperature, default is %(default)s')
147 |     parser.add_argument('--scale', action="store", metavar="scale", dest="scale", type=float, default=1.0,
148 |                         help='scale, default is %(default)s')
149 |     parser.add_argument('--gate_dp', action="store", metavar="gate_dropout", dest="gate_dropout", type=float, default=1.0,
150 |                         help='gate_dropout, default is %(default)s')
151 | 
152 |     args = parser.parse_args()
153 |     print args
154 | 
155 |     if args.residual_enc == 'None':
156 |         args.residual_enc = None
157 |     if args.residual_dec == 'None':
158 |         args.residual_dec = None
159 |     if args.dist_type != 'mv' and args.dist_type != 'mpi_reduce':
160 |         args.dist_type = None
161 | 
162 |     # FIXME: Auto mode
163 |     if args.auto:
164 |         if args.n_encoder_layers <= 2:
165 |             args.dropout = False
166 |             args.clip = 1.0
167 |         else:
168 |             args.dropout = 0.1
169 |             args.clip = 5.0
170 | 
171 |         if args.n_encoder_layers <= 1:
172 |             args.residual_enc = None
173 |         if args.n_decoder_layers <= 1:
174 |             args.residual_dec = None
175 |             args.attention_layer_id = 0
176 | 
177 |         args.cond_unit_size = args.unit_size
178 | 
179 |     # If dataset is not 'en-fr', old value of dataset options like 'args.train1' will be omitted
180 |     if args.dataset != 'en-fr':
181 |         args.train1, args.train2, args.small1, args.small2, args.valid1, args.valid2, args.valid3, args.test1, args.test2, args.dic1, args.dic2 = \
182 |             Datasets[args.dataset]
183 | 
184 |     print 'Command line arguments:'
185 |     print args
186 |     sys.stdout.flush()
187 | 
188 |     # Init multiverso or mpi and set theano flags.
189 |     if args.dist_type == 'mv':
190 |         try:
191 |             import multiverso as mv
192 |         except ImportError:
193 |             import libs.multiverso_ as mv
194 | 
195 |         # FIXME: This must before the import of theano!
196 |         mv.init(sync=True)
197 |         worker_id = mv.worker_id()
198 |         workers_cnt = mv.workers_num()
199 |     elif args.dist_type == 'mpi_reduce':
200 |         from mpi4py import MPI
201 | 
202 |         communicator = MPI.COMM_WORLD
203 |         worker_id = communicator.Get_rank()
204 |         workers_cnt = communicator.Get_size()
205 | 
206 |     if args.dist_type:
207 |         available_gpus = get_gpu_usage(workers_cnt)
208 |         gpu_maps_info = {idx: idx for idx in available_gpus}
209 |         if args.gpu_map_file:
210 |             for line in open(os.path.join('resources', args.gpu_map_file), 'r'):
211 |                 phy_id, theano_id = line.split()
212 |                 gpu_maps_info[int(phy_id)] = int(theano_id)
213 |         theano_id = gpu_maps_info[available_gpus[worker_id]]
214 |         print 'worker id:%d, using theano id:%d, physical id %d' % (worker_id, theano_id, available_gpus[worker_id])
215 |         os.environ['THEANO_FLAGS'] = 'device=cuda{},floatX=float32'.format(theano_id)
216 |         sys.stdout.flush()
217 | 
218 |     from libs.nmt import train
219 | 
220 |     train(
221 |         max_epochs= args.max_epochs,
222 |         saveto=args.model_file,
223 |         preload=args.pre_load_file,
224 |         reload_=args.reload,
225 |         dim_word=args.dim_word,
226 |         dim=args.dim,
227 |         decay_c=0.,
228 |         clip_c=args.clip,
229 |         lrate=args.learning_rate,
230 |         optimizer=args.optimizer,
231 |         maxlen=args.maxlen,
232 |         batch_size=args.batch_size,
233 |         valid_batch_size=args.valid_batch_size,
234 |         dispFreq=1,
235 |         saveFreq=args.save_freq,
236 |         validFreq=args.valid_freq,
237 |         datasets=(r'data/train/{}'.format(args.train1),
238 |                   r'data/train/{}'.format(args.train2)),
239 |         valid_datasets=(r'data/dev/{}'.format(args.valid1),
240 |                         r'data/dev/{}'.format(args.valid2)),
241 |         small_train_datasets=(r'data/test/{}'.format(args.small1),r'data/test/{}'.format(args.small2),
242 |                               r'data/test/{}'.format(args.test2)),
243 |         vocab_filenames=(r'data/dic/{}'.format(args.dic1),
244 |                          r'data/dic/{}'.format(args.dic2)),
245 |         task=args.dataset,
246 |         use_dropout=args.dropout,
247 |         overwrite=False,
248 |         n_words=args.n_words_tgt,
249 |         n_words_src=args.n_words_src,
250 | 
251 |         # Options from v-yanfa
252 |         dump_before_train=args.dump_before_train,
253 |         plot_graph=args.plot,
254 |         lr_discount_freq=args.lr_discount_freq,
255 | 
256 |         n_encoder_layers=args.n_encoder_layers,
257 |         n_decoder_layers=args.n_decoder_layers,
258 |         encoder_many_bidirectional=args.connection_type == 1,
259 | 
260 |         attention_layer_id=args.attention_layer_id,
261 |         unit=args.unit,
262 |         residual_enc=args.residual_enc,
263 |         residual_dec=args.residual_dec,
264 |         use_zigzag=args.use_zigzag,
265 |         given_embedding=args.given_embedding,
266 | 
267 |         unit_size=args.unit_size,
268 |         cond_unit_size=args.cond_unit_size,
269 | 
270 |         given_imm = not args.abandon_imm,
271 |         dump_imm=True,
272 |         shuffle_data=args.shuffle,
273 | 
274 |         decoder_all_attention=args.all_att,
275 |         average_context=args.avg_ctx,
276 | 
277 |         dist_type=args.dist_type,
278 |         dist_recover_lr_iter = args.dist_recover_lr,
279 | 
280 |         fine_tune_patience=args.fine_tune_patience,
281 |         nccl= args.nccl,
282 |         src_vocab_map_file= args.src_vocab_map_file,
283 |         tgt_vocab_map_file= args.tgt_vocab_map_file,
284 | 
285 |         trg_attention_layer_id=args.trg_attention_layer_id,
286 |         dev_bleu_freq = args.dev_bleu_freq,
287 |         fix_dp_bug= args.fix_dp_bug,
288 |         temperature=args.temperature,
289 |         scale=args.scale,
290 |         gate_dropout=args.gate_dropout,
291 |     )
292 | 
293 | 
294 | if __name__ == '__main__':
295 |     main()
296 | 


--------------------------------------------------------------------------------