├── .gitignore
├── LICENSE
├── README.md
├── chainer-1.4
    ├── lm_rnn.py
    ├── mt_s2s_attention.py
    ├── mt_s2s_encdec.py
    ├── seg_ffnn.py
    ├── seg_rnn.py
    └── util
    │   ├── __init__.py
    │   ├── chainer_cpu_wrapper.py
    │   ├── chainer_gpu_wrapper.py
    │   ├── functions.py
    │   ├── generators.py
    │   ├── model_file.py
    │   └── vocabulary.py
└── chainer-1.5
    ├── LSTMVariants.py
    ├── attention_lm.py
    ├── mt_s2s_attention.py
    ├── mt_s2s_encdec.py
    └── util
        ├── __init__.py
        ├── functions.py
        ├── generators.py
        └── vocabulary.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | data
3 | hyp
4 | model
5 | nohup.out
6 | test
7 | my_settings.py
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | chainer_examples License
 2 | ()
 3 | 
 4 | Copyright (c) 2015~ Yusuke Oda
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Chainer example code for NLP
 2 | ============================
 3 | 
 4 | **This repository is out of date and rough. I do not guarantee that these code works correctly.**
 5 | 
 6 | **I am developing a new NMT toolkit [NMTKit](https://github.com/odashi/nmtkit) and strongly recommend to use it instead of these samples to train neural translation models.**
 7 | 
 8 | This repository contains some neural network examples
 9 | for natural language processing (NLP)
10 | using **Chainer** framework.
11 | 
12 | [Chainer Official](http://chainer.org/ "Chainer official") ([GitHub](https://github.com/pfnet/chainer "Github"))
13 | 
14 | Making Local Client
15 | -------------------
16 | 
17 | Before running these scripts, making a local python client using `pyenv` is
18 | reccomended, like:
19 | 
20 |     $ pyenv install 3.5.0
21 |     $ pyenv virtualenv 3.5.0 example
22 |     $ pyenv shell example
23 |     $ pip install chainer
24 | 
25 | Contents
26 | --------
27 | 
28 | * **Machine Translation**
29 |     * `mt_s2s_encdec.py` - Using encoder-decoder style recurrent neural network
30 |     * `mt_s2s_attention.py` - Using attentional neural network
31 | 
32 | * **Word Segmentation (Tokenization)**
33 |     * `seg_ffnn.py` - Using feedforward neural network
34 |     * `seg_rnn.py` - Using recurrent neural network
35 | 
36 | * **Language Model**
37 |     * `lm_rnn.py` - Using recurrent neural network (RNNLM)
38 | 
39 | Contact
40 | -------
41 | 
42 | If you find an issue or have some questions, please contact Yusuke Oda:
43 | * @odashi_t on Twitter (faster than other methods)
44 | * yus.takara (at) gmail.com
45 | 
46 | 


--------------------------------------------------------------------------------
/chainer-1.4/lm_rnn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import my_settings
  4 | 
  5 | import datetime
  6 | import sys
  7 | import math
  8 | import numpy as np
  9 | from argparse import ArgumentParser
 10 | from collections import defaultdict
 11 | 
 12 | from chainer import FunctionSet, Variable, cuda, functions, optimizers
 13 | 
 14 | 
 15 | def trace(text):
 16 |     print(datetime.datetime.now(), '...', text, file=sys.stderr)
 17 | 
 18 | 
 19 | def make_var(array, dtype=np.float32):
 20 |     #return Variable(np.array(array, dtype=dtype))
 21 |     return Variable(cuda.to_gpu(np.array(array, dtype=dtype)))
 22 | 
 23 | def get_data(variable):
 24 |     #return variable.data
 25 |     return cuda.to_cpu(variable.data)
 26 | 
 27 | def zeros(shape, dtype=np.float32):
 28 |     #return Variable(np.zeros(shape, dtype=dtype))
 29 |     return Variable(cuda.zeros(shape, dtype=dtype))
 30 | 
 31 | def make_model(**kwargs):
 32 |     #return FunctionSet(**kwargs)
 33 |     return FunctionSet(**kwargs).to_gpu()
 34 | 
 35 | 
 36 | def make_vocab(filename, vocab_size):
 37 |     word_freq = defaultdict(lambda: 0)
 38 |     num_lines = 0
 39 |     num_words = 0
 40 |     with open(filename) as fp:
 41 |         for line in fp:
 42 |             words = line.split()
 43 |             num_lines += 1
 44 |             num_words += len(words)
 45 |             for word in words:
 46 |                 word_freq[word] += 1
 47 | 
 48 |     # 0: unk
 49 |     # 1: <s>
 50 |     # 2: </s>
 51 |     vocab = defaultdict(lambda: 0)
 52 |     vocab['<s>'] = 1
 53 |     vocab['</s>'] = 2
 54 |     for i,(k,v) in zip(range(vocab_size - 3), sorted(word_freq.items(), key=lambda x: -x[1])):
 55 |         vocab[k] = i + 3
 56 | 
 57 |     return vocab, num_lines, num_words
 58 | 
 59 | 
 60 | def generate_batch(filename, batch_size):
 61 |     with open(filename) as fp:
 62 |         batch = []
 63 |         try:
 64 |             while True:
 65 |                 for i in range(batch_size):
 66 |                     batch.append(next(fp).split())
 67 |                 
 68 |                 max_len = max(len(x) for x in batch)
 69 |                 batch = [['<s>'] + x + ['</s>'] * (max_len - len(x) + 1) for x in batch]
 70 |                 yield batch
 71 |                 
 72 |                 batch = []
 73 |         except:
 74 |             pass
 75 | 
 76 |         if batch:
 77 |             max_len = max(len(x) for x in batch)
 78 |             batch = [['<s>'] + x + ['</s>'] * (max_len - len(x) + 1) for x in batch]
 79 |             yield batch
 80 | 
 81 | 
 82 | def make_rnnlm_model(n_vocab, n_embed, n_hidden):
 83 |     return make_model(
 84 |         w_xe = functions.EmbedID(n_vocab, n_embed),
 85 |         w_eh = functions.Linear(n_embed, n_hidden),
 86 |         w_hh = functions.Linear(n_hidden, n_hidden),
 87 |         w_hy = functions.Linear(n_hidden, n_vocab),
 88 |     )
 89 | 
 90 | 
 91 | def save_rnnlm_model(filename, n_vocab, n_embed, n_hidden, vocab, model):
 92 |     fmt = '%.8e'
 93 |     dlm = ' '
 94 | 
 95 |     model.to_cpu()
 96 | 
 97 |     with open(filename, 'w') as fp:
 98 |         print(n_vocab, file=fp)
 99 |         print(n_embed, file=fp)
100 |         print(n_hidden, file=fp)
101 | 
102 |         for k, v in vocab.items():
103 |             if v == 0:
104 |                 continue
105 |             print('%s %d' % (k, v), file=fp)
106 |         
107 |         for row in model.w_xe.W:
108 |             print(dlm.join(fmt % x for x in row), file=fp)
109 |         
110 |         for row in model.w_eh.W:
111 |             print(dlm.join(fmt % x for x in row), file=fp)
112 |         print(dlm.join(fmt % x for x in model.w_eh.b), file=fp)
113 |         
114 |         for row in model.w_hh.W:
115 |             print(dlm.join(fmt % x for x in row), file=fp)
116 |         print(dlm.join(fmt % x for x in model.w_hh.b), file=fp)
117 |         
118 |         for row in model.w_hy.W:
119 |             print(dlm.join(fmt % x for x in row), file=fp)
120 |         print(dlm.join(fmt % x for x in model.w_hy.b), file=fp)
121 |     
122 |     model.to_gpu()
123 | 
124 | 
125 | def parse_args():
126 |     def_vocab = 40000
127 |     def_embed = 200
128 |     def_hidden = 200
129 |     def_epoch = 10
130 |     def_minibatch = 256
131 | 
132 |     p = ArgumentParser(description='RNNLM trainer')
133 | 
134 |     p.add_argument('corpus', help='[in] training corpus')
135 |     p.add_argument('model', help='[out] model file')
136 |     p.add_argument('-V', '--vocab', default=def_vocab, metavar='INT', type=int,
137 |         help='vocabulary size (default: %d)' % def_vocab)
138 |     p.add_argument('-E', '--embed', default=def_embed, metavar='INT', type=int,
139 |         help='embedding layer size (default: %d)' % def_embed)
140 |     p.add_argument('-H', '--hidden', default=def_hidden, metavar='INT', type=int,
141 |         help='hidden layer size (default: %d)' % def_hidden)
142 |     p.add_argument('-I', '--epoch', default=def_epoch, metavar='INT', type=int,
143 |         help='number of training epoch (default: %d)' % def_epoch)
144 |     p.add_argument('-B', '--minibatch', default=def_minibatch, metavar='INT', type=int,
145 |         help='minibatch size (default: %d)' % def_minibatch)
146 | 
147 |     args = p.parse_args()
148 | 
149 |     # check args
150 |     try:
151 |         if (args.vocab < 1): raise ValueError('you must set --vocab >= 1')
152 |         if (args.embed < 1): raise ValueError('you must set --embed >= 1')
153 |         if (args.hidden < 1): raise ValueError('you must set --hidden >= 1')
154 |         if (args.epoch < 1): raise ValueError('you must set --epoch >= 1')
155 |         if (args.minibatch < 1): raise ValueError('you must set --minibatch >= 1')
156 |     except Exception as ex:
157 |         p.print_usage(file=sys.stderr)
158 |         print(ex, file=sys.stderr)
159 |         sys.exit()
160 | 
161 |     return args
162 | 
163 |         
164 | def main():
165 |     args = parse_args()
166 | 
167 |     trace('making vocabulary ...')
168 |     vocab, num_lines, num_words = make_vocab(args.corpus, args.vocab)
169 | 
170 |     trace('initializing CUDA ...')
171 |     cuda.init()
172 | 
173 |     trace('start training ...')
174 |     model = make_rnnlm_model(args.vocab, args.embed, args.hidden)
175 | 
176 |     for epoch in range(args.epoch):
177 |         trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
178 |         log_ppl = 0.0
179 |         trained = 0
180 |         
181 |         opt = optimizers.SGD()
182 |         opt.setup(model)
183 | 
184 |         for batch in generate_batch(args.corpus, args.minibatch):
185 |             batch = [[vocab[x] for x in words] for words in batch]
186 |             K = len(batch)
187 |             L = len(batch[0]) - 1
188 | 
189 |             opt.zero_grads()
190 |             s_h = zeros((K, args.hidden))
191 | 
192 |             for l in range(L):
193 |                 s_x = make_var([batch[k][l] for k in range(K)], dtype=np.int32)
194 |                 s_t = make_var([batch[k][l + 1] for k in range(K)], dtype=np.int32)
195 | 
196 |                 s_e = functions.tanh(model.w_xe(s_x))
197 |                 s_h = functions.tanh(model.w_eh(s_e) + model.w_hh(s_h))
198 |                 s_y = model.w_hy(s_h)
199 | 
200 |                 loss = functions.softmax_cross_entropy(s_y, s_t)
201 |                 loss.backward()
202 |             
203 |                 log_ppl += get_data(loss).reshape(()) * K
204 | 
205 |             opt.update()
206 |             trained += K
207 |             trace('  %d/%d' % (trained, num_lines))
208 |             
209 |         log_ppl /= float(num_words)
210 |         trace('  log(PPL) = %.10f' % log_ppl)
211 |         trace('  PPL      = %.10f' % math.exp(log_ppl))
212 | 
213 |         trace('  writing model ...')
214 |         save_rnnlm_model(args.model + '.%d' % (epoch + 1), args.vocab, args.embed, args.hidden, vocab, model)
215 | 
216 |     trace('training finished.')
217 | 
218 | 
219 | if __name__ == '__main__':
220 |     main()
221 | 
222 | 


--------------------------------------------------------------------------------
/chainer-1.4/mt_s2s_attention.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import my_settings
  4 | 
  5 | import sys
  6 | import math
  7 | import numpy as np
  8 | from argparse import ArgumentParser
  9 | 
 10 | from chainer import functions, optimizers
 11 | import chainer.computational_graph as cg
 12 | 
 13 | import util.generators as gens
 14 | from util.functions import trace, fill_batch2
 15 | from util.model_file import ModelFile
 16 | from util.vocabulary import Vocabulary
 17 | 
 18 | #from util.chainer_cpu_wrapper import wrapper
 19 | from util.chainer_gpu_wrapper import wrapper
 20 | 
 21 |    
 22 | class AttentionalTranslationModel:
 23 |     def __init__(self):
 24 |         pass
 25 | 
 26 |     def __make_model(self):
 27 |         self.__model = wrapper.make_model(
 28 |             # input embedding
 29 |             w_xi = functions.EmbedID(len(self.__src_vocab), self.__n_embed),
 30 |             # forward encoder
 31 |             w_ia = functions.Linear(self.__n_embed, 4 * self.__n_hidden),
 32 |             w_aa = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
 33 |             # backward encoder
 34 |             w_ib = functions.Linear(self.__n_embed, 4 * self.__n_hidden),
 35 |             w_bb = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
 36 |             # attentional weight estimator
 37 |             w_aw = functions.Linear(self.__n_hidden, self.__n_hidden),
 38 |             w_bw = functions.Linear(self.__n_hidden, self.__n_hidden),
 39 |             w_pw = functions.Linear(self.__n_hidden, self.__n_hidden),
 40 |             w_we = functions.Linear(self.__n_hidden, 1),
 41 |             # decoder
 42 |             w_ap = functions.Linear(self.__n_hidden, self.__n_hidden),
 43 |             w_bp = functions.Linear(self.__n_hidden, self.__n_hidden),
 44 |             w_yp = functions.EmbedID(len(self.__trg_vocab), 4 * self.__n_hidden),
 45 |             w_pp = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
 46 |             w_cp = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
 47 |             w_dp = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
 48 |             w_py = functions.Linear(self.__n_hidden, len(self.__trg_vocab)),
 49 |         )
 50 | 
 51 |     @staticmethod
 52 |     def new(src_vocab, trg_vocab, n_embed, n_hidden):
 53 |         self = AttentionalTranslationModel()
 54 |         self.__src_vocab = src_vocab
 55 |         self.__trg_vocab = trg_vocab
 56 |         self.__n_embed = n_embed
 57 |         self.__n_hidden = n_hidden
 58 |         self.__make_model()
 59 |         return self
 60 | 
 61 |     def save(self, filename):
 62 |         with ModelFile(filename, 'w') as fp:
 63 |             self.__src_vocab.save(fp.get_file_pointer())
 64 |             self.__trg_vocab.save(fp.get_file_pointer())
 65 |             fp.write(self.__n_embed)
 66 |             fp.write(self.__n_hidden)
 67 |             wrapper.begin_model_access(self.__model)
 68 |             fp.write_embed(self.__model.w_xi)
 69 |             fp.write_linear(self.__model.w_ia)
 70 |             fp.write_linear(self.__model.w_aa)
 71 |             fp.write_linear(self.__model.w_ib)
 72 |             fp.write_linear(self.__model.w_bb)
 73 |             fp.write_linear(self.__model.w_aw)
 74 |             fp.write_linear(self.__model.w_bw)
 75 |             fp.write_linear(self.__model.w_pw)
 76 |             fp.write_linear(self.__model.w_we)
 77 |             fp.write_linear(self.__model.w_ap)
 78 |             fp.write_linear(self.__model.w_bp)
 79 |             fp.write_embed(self.__model.w_yp)
 80 |             fp.write_linear(self.__model.w_pp)
 81 |             fp.write_linear(self.__model.w_cp)
 82 |             fp.write_linear(self.__model.w_dp)
 83 |             fp.write_linear(self.__model.w_py)
 84 |             wrapper.end_model_access(self.__model)
 85 | 
 86 |     @staticmethod
 87 |     def load(filename):
 88 |         self = AttentionalTranslationModel()
 89 |         with ModelFile(filename) as fp:
 90 |             self.__src_vocab = Vocabulary.load(fp.get_file_pointer())
 91 |             self.__trg_vocab = Vocabulary.load(fp.get_file_pointer())
 92 |             self.__n_embed = int(fp.read())
 93 |             self.__n_hidden = int(fp.read())
 94 |             self.__make_model()
 95 |             wrapper.begin_model_access(self.__model)
 96 |             fp.read_embed(self.__model.w_xi)
 97 |             fp.read_linear(self.__model.w_ia)
 98 |             fp.read_linear(self.__model.w_aa)
 99 |             fp.read_linear(self.__model.w_ib)
100 |             fp.read_linear(self.__model.w_bb)
101 |             fp.read_linear(self.__model.w_aw)
102 |             fp.read_linear(self.__model.w_bw)
103 |             fp.read_linear(self.__model.w_pw)
104 |             fp.read_linear(self.__model.w_we)
105 |             fp.read_linear(self.__model.w_ap)
106 |             fp.read_linear(self.__model.w_bp)
107 |             fp.read_embed(self.__model.w_yp)
108 |             fp.read_linear(self.__model.w_pp)
109 |             fp.read_linear(self.__model.w_cp)
110 |             fp.read_linear(self.__model.w_dp)
111 |             fp.read_linear(self.__model.w_py)
112 |             wrapper.end_model_access(self.__model)
113 |         return self
114 | 
115 |     def init_optimizer(self):
116 |         self.__opt = optimizers.AdaGrad(lr=0.01)
117 |         self.__opt.setup(self.__model)
118 | 
119 |     def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None):
120 |         m = self.__model
121 |         tanh = functions.tanh
122 |         lstm = functions.lstm
123 |         batch_size = len(src_batch)
124 |         hidden_size = self.__n_hidden
125 |         src_len = len(src_batch[0])
126 |         trg_len = len(trg_batch[0]) - 1 if is_training else generation_limit
127 |         src_stoi = self.__src_vocab.stoi
128 |         trg_stoi = self.__trg_vocab.stoi
129 |         trg_itos = self.__trg_vocab.itos
130 | 
131 |         hidden_zeros = wrapper.zeros((batch_size, hidden_size))
132 |         sum_e_zeros = wrapper.zeros((batch_size, 1))
133 | 
134 |         # make embedding
135 |         list_x = []
136 |         for l in range(src_len):
137 |             s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32)
138 |             list_x.append(s_x)
139 | 
140 |         # forward encoding
141 |         c = hidden_zeros
142 |         s_a = hidden_zeros
143 |         list_a = []
144 |         for l in range(src_len):
145 |             s_x = list_x[l]
146 |             s_i = tanh(m.w_xi(s_x))
147 |             c, s_a = lstm(c, m.w_ia(s_i) + m.w_aa(s_a))
148 |             list_a.append(s_a)
149 |         
150 |         # backward encoding
151 |         c = hidden_zeros
152 |         s_b = hidden_zeros
153 |         list_b = []
154 |         for l in reversed(range(src_len)):
155 |             s_x = list_x[l]
156 |             s_i = tanh(m.w_xi(s_x))
157 |             c, s_b = lstm(c, m.w_ib(s_i) + m.w_bb(s_b))
158 |             list_b.insert(0, s_b)
159 | 
160 |         # decoding
161 |         c = hidden_zeros
162 |         s_p = tanh(m.w_ap(list_a[-1]) + m.w_bp(list_b[0]))
163 |         s_y = wrapper.make_var([trg_stoi('<s>') for k in range(batch_size)], dtype=np.int32)
164 | 
165 |         hyp_batch = [[] for _ in range(batch_size)]
166 |         accum_loss = wrapper.zeros(()) if is_training else None
167 |         
168 |         #for n in range(src_len):
169 |         #    print(src_batch[0][n], end=' ')
170 |         #print()
171 | 
172 |         for l in range(trg_len):
173 |             # calculate attention weights
174 |             list_e = []
175 |             sum_e = sum_e_zeros
176 |             for n in range(src_len):
177 |                 s_w = tanh(m.w_aw(list_a[n]) + m.w_bw(list_b[n]) + m.w_pw(s_p))
178 |                 r_e = functions.exp(m.w_we(s_w))
179 |                 #list_e.append(functions.concat(r_e for _ in range(self.__n_hidden)))
180 |                 list_e.append(r_e)
181 |                 sum_e += r_e
182 |             #sum_e = functions.concat(sum_e for _ in range(self.__n_hidden))
183 | 
184 |             # make attention vector
185 |             s_c = hidden_zeros
186 |             s_d = hidden_zeros
187 |             for n in range(src_len):
188 |                 s_e = list_e[n] / sum_e
189 |                 #s_c += s_e * list_a[n]
190 |                 #s_d += s_e * list_b[n]
191 |                 s_c += functions.reshape(functions.batch_matmul(list_a[n], s_e), (batch_size, hidden_size))
192 |                 s_d += functions.reshape(functions.batch_matmul(list_b[n], s_e), (batch_size, hidden_size))
193 | 
194 |                 #zxcv = wrapper.get_data(s_e)[0][0]
195 |                 #if zxcv > 0.9: asdf='#'
196 |                 #elif zxcv > 0.7: asdf='*'
197 |                 #elif zxcv > 0.3: asdf='+'
198 |                 #elif zxcv > 0.1: asdf='.'
199 |                 #else: asdf=' '
200 |                 #print(asdf * len(src_batch[0][n]), end=' ')
201 | 
202 |             # generate next word
203 |             c, s_p = lstm(c, m.w_yp(s_y) + m.w_pp(s_p) + m.w_cp(s_c) + m.w_dp(s_d))
204 |             r_y = m.w_py(s_p)
205 |             output = wrapper.get_data(r_y).argmax(1)
206 |             for k in range(batch_size):
207 |                 hyp_batch[k].append(trg_itos(output[k]))
208 | 
209 |             #print(hyp_batch[0][-1])
210 |             
211 |             if is_training:
212 |                 s_t = wrapper.make_var([trg_stoi(trg_batch[k][l + 1]) for k in range(batch_size)], dtype=np.int32)
213 |                 accum_loss += functions.softmax_cross_entropy(r_y, s_t)
214 |                 s_y = s_t
215 |             else:
216 |                 if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break
217 |                 s_y = wrapper.make_var(output, dtype=np.int32)
218 | 
219 |         return hyp_batch, accum_loss
220 | 
221 |     def train(self, src_batch, trg_batch):
222 |         self.__opt.zero_grads()
223 |         hyp_batch, accum_loss = self.__forward(True, src_batch, trg_batch=trg_batch)
224 |         #g = cg.build_computational_graph([accum_loss])
225 |         #with open('asdf', 'w') as fp: fp.write(g.dump())
226 |         #sys.exit()
227 |         accum_loss.backward()
228 |         self.__opt.clip_grads(10)
229 |         self.__opt.update()
230 |         return hyp_batch
231 | 
232 |     def predict(self, src_batch, generation_limit):
233 |         return self.__forward(False, src_batch, generation_limit=generation_limit)[0]
234 | 
235 | 
236 | def parse_args():
237 |     def_vocab = 32768
238 |     def_embed = 256
239 |     def_hidden = 512
240 |     def_epoch = 100
241 |     def_minibatch = 64
242 |     def_generation_limit = 256
243 | 
244 |     p = ArgumentParser(description='Attentional neural machine translation')
245 | 
246 |     p.add_argument('mode', help='\'train\' or \'test\'')
247 |     p.add_argument('source', help='[in] source corpus')
248 |     p.add_argument('target', help='[in/out] target corpus')
249 |     p.add_argument('model', help='[in/out] model file')
250 |     p.add_argument('--vocab', default=def_vocab, metavar='INT', type=int,
251 |         help='vocabulary size (default: %d)' % def_vocab)
252 |     p.add_argument('--embed', default=def_embed, metavar='INT', type=int,
253 |         help='embedding layer size (default: %d)' % def_embed)
254 |     p.add_argument('--hidden', default=def_hidden, metavar='INT', type=int,
255 |         help='hidden layer size (default: %d)' % def_hidden)
256 |     p.add_argument('--epoch', default=def_epoch, metavar='INT', type=int,
257 |         help='number of training epoch (default: %d)' % def_epoch)
258 |     p.add_argument('--minibatch', default=def_minibatch, metavar='INT', type=int,
259 |         help='minibatch size (default: %d)' % def_minibatch)
260 |     p.add_argument('--generation-limit', default=def_generation_limit, metavar='INT', type=int,
261 |         help='maximum number of words to be generated for test input')
262 | 
263 |     args = p.parse_args()
264 | 
265 |     # check args
266 |     try:
267 |         if args.mode not in ['train', 'test']: raise ValueError('you must set mode = \'train\' or \'test\'')
268 |         if args.vocab < 1: raise ValueError('you must set --vocab >= 1')
269 |         if args.embed < 1: raise ValueError('you must set --embed >= 1')
270 |         if args.hidden < 1: raise ValueError('you must set --hidden >= 1')
271 |         if args.epoch < 1: raise ValueError('you must set --epoch >= 1')
272 |         if args.minibatch < 1: raise ValueError('you must set --minibatch >= 1')
273 |         if args.generation_limit < 1: raise ValueError('you must set --generation-limit >= 1')
274 |     except Exception as ex:
275 |         p.print_usage(file=sys.stderr)
276 |         print(ex, file=sys.stderr)
277 |         sys.exit()
278 | 
279 |     return args
280 | 
281 | 
282 | def train_model(args):
283 |     trace('making vocabularies ...')
284 |     src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
285 |     trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)
286 | 
287 |     trace('making model ...')
288 |     model = AttentionalTranslationModel.new(src_vocab, trg_vocab, args.embed, args.hidden)
289 | 
290 |     for epoch in range(args.epoch):
291 |         trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
292 |         trained = 0
293 |         gen1 = gens.word_list(args.source)
294 |         gen2 = gens.word_list(args.target)
295 |         gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch, order=0), args.minibatch)
296 |         model.init_optimizer()
297 | 
298 |         for src_batch, trg_batch in gen3:
299 |             src_batch = fill_batch2(src_batch)
300 |             trg_batch = fill_batch2(trg_batch)
301 |             K = len(src_batch)
302 |             hyp_batch = model.train(src_batch, trg_batch)
303 | 
304 |             for k in range(K):
305 |                 trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
306 |                 trace('  src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
307 |                 trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
308 |                 trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))
309 | 
310 |             trained += K
311 | 
312 |         trace('saving model ...')
313 |         model.save(args.model + '.%03d' % (epoch + 1))
314 | 
315 |     trace('finished.')
316 | 
317 | 
318 | def test_model(args):
319 |     trace('loading model ...')
320 |     model = AttentionalTranslationModel.load(args.model)
321 |     
322 |     trace('generating translation ...')
323 |     generated = 0
324 | 
325 |     with open(args.target, 'w') as fp:
326 |         for src_batch in gens.batch(gens.word_list(args.source), args.minibatch):
327 |             src_batch = fill_batch2(src_batch)
328 |             K = len(src_batch)
329 | 
330 |             trace('sample %8d - %8d ...' % (generated + 1, generated + K))
331 |             hyp_batch = model.predict(src_batch, args.generation_limit)
332 | 
333 |             for hyp in hyp_batch:
334 |                 hyp.append('</s>')
335 |                 hyp = hyp[:hyp.index('</s>')]
336 |                 print(' '.join(hyp), file=fp)
337 | 
338 |             generated += K
339 | 
340 |     trace('finished.')
341 | 
342 | 
343 | def main():
344 |     args = parse_args()
345 | 
346 |     trace('initializing ...')
347 |     wrapper.init()
348 | 
349 |     if args.mode == 'train': train_model(args)
350 |     elif args.mode == 'test': test_model(args)
351 | 
352 | 
353 | if __name__ == '__main__':
354 |     main()
355 | 
356 | 


--------------------------------------------------------------------------------
/chainer-1.4/mt_s2s_encdec.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import my_settings
  4 | 
  5 | import sys
  6 | import math
  7 | import numpy as np
  8 | from argparse import ArgumentParser
  9 | 
 10 | from chainer import functions, optimizers
 11 | 
 12 | import util.generators as gens
 13 | from util.functions import trace, fill_batch
 14 | from util.model_file import ModelFile
 15 | from util.vocabulary import Vocabulary
 16 | 
 17 | #from util.chainer_cpu_wrapper import wrapper
 18 | from util.chainer_gpu_wrapper import wrapper
 19 | 
 20 |    
 21 | class EncoderDecoderModel:
 22 |     def __init__(self):
 23 |         pass
 24 | 
 25 |     def __make_model(self):
 26 |         self.__model = wrapper.make_model(
 27 |             # encoder
 28 |             w_xi = functions.EmbedID(len(self.__src_vocab), self.__n_embed),
 29 |             w_ip = functions.Linear(self.__n_embed, 4 * self.__n_hidden),
 30 |             w_pp = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
 31 |             # decoder
 32 |             w_pq = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
 33 |             w_qj = functions.Linear(self.__n_hidden, self.__n_embed),
 34 |             w_jy = functions.Linear(self.__n_embed, len(self.__trg_vocab)),
 35 |             w_yq = functions.EmbedID(len(self.__trg_vocab), 4 * self.__n_hidden),
 36 |             w_qq = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
 37 |         )
 38 | 
 39 |     @staticmethod
 40 |     def new(src_vocab, trg_vocab, n_embed, n_hidden):
 41 |         self = EncoderDecoderModel()
 42 |         self.__src_vocab = src_vocab
 43 |         self.__trg_vocab = trg_vocab
 44 |         self.__n_embed = n_embed
 45 |         self.__n_hidden = n_hidden
 46 |         self.__make_model()
 47 |         return self
 48 | 
 49 |     def save(self, filename):
 50 |         with ModelFile(filename, 'w') as fp:
 51 |             self.__src_vocab.save(fp.get_file_pointer())
 52 |             self.__trg_vocab.save(fp.get_file_pointer())
 53 |             fp.write(self.__n_embed)
 54 |             fp.write(self.__n_hidden)
 55 |             wrapper.begin_model_access(self.__model)
 56 |             fp.write_embed(self.__model.w_xi)
 57 |             fp.write_linear(self.__model.w_ip)
 58 |             fp.write_linear(self.__model.w_pp)
 59 |             fp.write_linear(self.__model.w_pq)
 60 |             fp.write_linear(self.__model.w_qj)
 61 |             fp.write_linear(self.__model.w_jy)
 62 |             fp.write_embed(self.__model.w_yq)
 63 |             fp.write_linear(self.__model.w_qq)
 64 |             wrapper.end_model_access(self.__model)
 65 | 
 66 |     @staticmethod
 67 |     def load(filename):
 68 |         self = EncoderDecoderModel()
 69 |         with ModelFile(filename) as fp:
 70 |             self.__src_vocab = Vocabulary.load(fp.get_file_pointer())
 71 |             self.__trg_vocab = Vocabulary.load(fp.get_file_pointer())
 72 |             self.__n_embed = int(fp.read())
 73 |             self.__n_hidden = int(fp.read())
 74 |             self.__make_model()
 75 |             wrapper.begin_model_access(self.__model)
 76 |             fp.read_embed(self.__model.w_xi)
 77 |             fp.read_linear(self.__model.w_ip)
 78 |             fp.read_linear(self.__model.w_pp)
 79 |             fp.read_linear(self.__model.w_pq)
 80 |             fp.read_linear(self.__model.w_qj)
 81 |             fp.read_linear(self.__model.w_jy)
 82 |             fp.read_embed(self.__model.w_yq)
 83 |             fp.read_linear(self.__model.w_qq)
 84 |             wrapper.end_model_access(self.__model)
 85 |         return self
 86 | 
 87 |     def init_optimizer(self):
 88 |         self.__opt = optimizers.AdaGrad(lr=0.01)
 89 |         self.__opt.setup(self.__model)
 90 | 
 91 |     def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None):
 92 |         m = self.__model
 93 |         tanh = functions.tanh
 94 |         lstm = functions.lstm
 95 |         batch_size = len(src_batch)
 96 |         src_len = len(src_batch[0])
 97 |         src_stoi = self.__src_vocab.stoi
 98 |         trg_stoi = self.__trg_vocab.stoi
 99 |         trg_itos = self.__trg_vocab.itos
100 |         s_c = wrapper.zeros((batch_size, self.__n_hidden))
101 |         
102 |         # encoding
103 |         s_x = wrapper.make_var([src_stoi('</s>') for _ in range(batch_size)], dtype=np.int32)
104 |         s_i = tanh(m.w_xi(s_x))
105 |         s_c, s_p = lstm(s_c, m.w_ip(s_i))
106 | 
107 |         for l in reversed(range(src_len)):
108 |             s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32)
109 |             s_i = tanh(m.w_xi(s_x))
110 |             s_c, s_p = lstm(s_c, m.w_ip(s_i) + m.w_pp(s_p))
111 | 
112 |         s_c, s_q = lstm(s_c, m.w_pq(s_p))
113 |         hyp_batch = [[] for _ in range(batch_size)]
114 |         
115 |         # decoding
116 |         if is_training:
117 |             accum_loss = wrapper.zeros(())
118 |             trg_len = len(trg_batch[0])
119 |             
120 |             for l in range(trg_len):
121 |                 s_j = tanh(m.w_qj(s_q))
122 |                 r_y = m.w_jy(s_j)
123 |                 s_t = wrapper.make_var([trg_stoi(trg_batch[k][l]) for k in range(batch_size)], dtype=np.int32)
124 |                 accum_loss += functions.softmax_cross_entropy(r_y, s_t)
125 |                 output = wrapper.get_data(r_y).argmax(1)
126 | 
127 |                 for k in range(batch_size):
128 |                     hyp_batch[k].append(trg_itos(output[k]))
129 | 
130 |                 s_c, s_q = lstm(s_c, m.w_yq(s_t) + m.w_qq(s_q))
131 | 
132 |             return hyp_batch, accum_loss
133 |         else:
134 |             while len(hyp_batch[0]) < generation_limit:
135 |                 s_j = tanh(m.w_qj(s_q))
136 |                 r_y = m.w_jy(s_j)
137 |                 output = wrapper.get_data(r_y).argmax(1)
138 | 
139 |                 for k in range(batch_size):
140 |                     hyp_batch[k].append(trg_itos(output[k]))
141 | 
142 |                 if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break
143 | 
144 |                 s_y = wrapper.make_var(output, dtype=np.int32)
145 |                 s_c, s_q = lstm(s_c, m.w_yq(s_y) + m.w_qq(s_q))
146 |             
147 |             return hyp_batch
148 | 
149 |     def train(self, src_batch, trg_batch):
150 |         self.__opt.zero_grads()
151 |         hyp_batch, accum_loss = self.__forward(True, src_batch, trg_batch=trg_batch)
152 |         accum_loss.backward()
153 |         self.__opt.clip_grads(10)
154 |         self.__opt.update()
155 |         return hyp_batch
156 | 
157 |     def predict(self, src_batch, generation_limit):
158 |         return self.__forward(False, src_batch, generation_limit=generation_limit)
159 | 
160 | 
161 | def parse_args():
162 |     def_vocab = 32768
163 |     def_embed = 256
164 |     def_hidden = 512
165 |     def_epoch = 100
166 |     def_minibatch = 64
167 |     def_generation_limit = 256
168 | 
169 |     p = ArgumentParser(description='Encoder-decoder neural machine trainslation')
170 | 
171 |     p.add_argument('mode', help='\'train\' or \'test\'')
172 |     p.add_argument('source', help='[in] source corpus')
173 |     p.add_argument('target', help='[in/out] target corpus')
174 |     p.add_argument('model', help='[in/out] model file')
175 |     p.add_argument('--vocab', default=def_vocab, metavar='INT', type=int,
176 |         help='vocabulary size (default: %d)' % def_vocab)
177 |     p.add_argument('--embed', default=def_embed, metavar='INT', type=int,
178 |         help='embedding layer size (default: %d)' % def_embed)
179 |     p.add_argument('--hidden', default=def_hidden, metavar='INT', type=int,
180 |         help='hidden layer size (default: %d)' % def_hidden)
181 |     p.add_argument('--epoch', default=def_epoch, metavar='INT', type=int,
182 |         help='number of training epoch (default: %d)' % def_epoch)
183 |     p.add_argument('--minibatch', default=def_minibatch, metavar='INT', type=int,
184 |         help='minibatch size (default: %d)' % def_minibatch)
185 |     p.add_argument('--generation-limit', default=def_generation_limit, metavar='INT', type=int,
186 |         help='maximum number of words to be generated for test input')
187 | 
188 |     args = p.parse_args()
189 | 
190 |     # check args
191 |     try:
192 |         if args.mode not in ['train', 'test']: raise ValueError('you must set mode = \'train\' or \'test\'')
193 |         if args.vocab < 1: raise ValueError('you must set --vocab >= 1')
194 |         if args.embed < 1: raise ValueError('you must set --embed >= 1')
195 |         if args.hidden < 1: raise ValueError('you must set --hidden >= 1')
196 |         if args.epoch < 1: raise ValueError('you must set --epoch >= 1')
197 |         if args.minibatch < 1: raise ValueError('you must set --minibatch >= 1')
198 |         if args.generation_limit < 1: raise ValueError('you must set --generation-limit >= 1')
199 |     except Exception as ex:
200 |         p.print_usage(file=sys.stderr)
201 |         print(ex, file=sys.stderr)
202 |         sys.exit()
203 | 
204 |     return args
205 | 
206 | 
207 | def train_model(args):
208 |     trace('making vocabularies ...')
209 |     src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
210 |     trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)
211 | 
212 |     trace('making model ...')
213 |     model = EncoderDecoderModel.new(src_vocab, trg_vocab, args.embed, args.hidden)
214 | 
215 |     for epoch in range(args.epoch):
216 |         trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
217 |         trained = 0
218 |         gen1 = gens.word_list(args.source)
219 |         gen2 = gens.word_list(args.target)
220 |         gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
221 |         model.init_optimizer()
222 | 
223 |         for src_batch, trg_batch in gen3:
224 |             src_batch = fill_batch(src_batch)
225 |             trg_batch = fill_batch(trg_batch)
226 |             K = len(src_batch)
227 |             hyp_batch = model.train(src_batch, trg_batch)
228 | 
229 |             for k in range(K):
230 |                 trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
231 |                 trace('  src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
232 |                 trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
233 |                 trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))
234 | 
235 |             trained += K
236 | 
237 |         trace('saving model ...')
238 |         model.save(args.model + '.%03d' % (epoch + 1))
239 | 
240 |     trace('finished.')
241 | 
242 | 
243 | def test_model(args):
244 |     trace('loading model ...')
245 |     model = EncoderDecoderModel.load(args.model)
246 |     
247 |     trace('generating translation ...')
248 |     generated = 0
249 | 
250 |     with open(args.target, 'w') as fp:
251 |         for src_batch in gens.batch(gens.word_list(args.source), args.minibatch):
252 |             src_batch = fill_batch(src_batch)
253 |             K = len(src_batch)
254 | 
255 |             trace('sample %8d - %8d ...' % (generated + 1, generated + K))
256 |             hyp_batch = model.predict(src_batch, args.generation_limit)
257 | 
258 |             for hyp in hyp_batch:
259 |                 hyp.append('</s>')
260 |                 hyp = hyp[:hyp.index('</s>')]
261 |                 print(' '.join(hyp), file=fp)
262 | 
263 |             generated += K
264 | 
265 |     trace('finished.')
266 | 
267 | 
268 | def main():
269 |     args = parse_args()
270 | 
271 |     trace('initializing ...')
272 |     wrapper.init()
273 | 
274 |     if args.mode == 'train': train_model(args)
275 |     elif args.mode == 'test': test_model(args)
276 | 
277 | 
278 | if __name__ == '__main__':
279 |     main()
280 | 
281 | 


--------------------------------------------------------------------------------
/chainer-1.4/seg_ffnn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import my_settings
  4 | 
  5 | import sys
  6 | import math
  7 | import numpy as np
  8 | from argparse import ArgumentParser
  9 | 
 10 | from chainer import functions, optimizers
 11 | 
 12 | import util.generators as gens
 13 | from util.functions import trace, fill_batch
 14 | from util.model_file import ModelFile
 15 | from util.vocabulary import Vocabulary
 16 | 
 17 | from util.chainer_cpu_wrapper import wrapper
 18 | #from util.chainer_gpu_wrapper import wrapper
 19 | 
 20 | 
 21 | class SegmentationModel:
 22 |     def __init__(self):
 23 |         pass
 24 | 
 25 |     def __make_model(self):
 26 |         self.__model = wrapper.make_model(
 27 |             w_xh = functions.EmbedID(2 * self.__n_context * len(self.__vocab), self.__n_hidden),
 28 |             w_hy = functions.Linear(self.__n_hidden, 1),
 29 |         )
 30 | 
 31 |     @staticmethod
 32 |     def new(vocab, n_context, n_hidden):
 33 |         self = SegmentationModel()
 34 |         self.__vocab = vocab
 35 |         self.__n_context = n_context
 36 |         self.__n_hidden = n_hidden
 37 |         self.__make_model()
 38 |         return self
 39 | 
 40 |     def save(self, filename):
 41 |         with ModelFile(filename, 'w') as fp:
 42 |             self.__vocab.save(fp.get_file_pointer())
 43 |             fp.write(self.__n_context)
 44 |             fp.write(self.__n_hidden)
 45 |             wrapper.begin_model_access(self.__model)
 46 |             fp.write_embed(self.__model.w_xh)
 47 |             fp.write_linear(self.__model.w_hy)
 48 |             wrapper.end_model_access(self.__model)
 49 | 
 50 |     @staticmethod
 51 |     def load(filename):
 52 |         self = SegmentationModel()
 53 |         with ModelFile(filename) as fp:
 54 |             self.__vocab = Vocabulary.load(fp.get_file_pointer())
 55 |             self.__n_context = int(fp.read())
 56 |             self.__n_hidden = int(fp.read())
 57 |             self.__make_model()
 58 |             wrapper.begin_model_access(self.__model)
 59 |             fp.read_embed(self.__model.w_xh)
 60 |             fp.read_linear(self.__model.w_hy)
 61 |             wrapper.end_model_access(self.__model)
 62 |         return self
 63 | 
 64 |     def init_optimizer(self):
 65 |         self.__opt = optimizers.AdaGrad(lr=0.01)
 66 |         self.__opt.setup(self.__model)
 67 | 
 68 |     def __make_input(self, is_training, text):
 69 |         c = self.__vocab.stoi
 70 |         k = self.__n_context - 1
 71 |         word_list = text.split()
 72 |         letters = [c('<s>')] * k + [c(x) for x in ''.join(word_list)] + [c('</s>')] * k
 73 |         if is_training:
 74 |             labels = []
 75 |             for x in word_list:
 76 |                 labels += [-1] * (len(x) - 1) + [1]
 77 |             return letters, labels[:-1]
 78 |         else:
 79 |             return letters, None
 80 | 
 81 |     def __forward(self, is_training, text):
 82 |         m = self.__model
 83 |         tanh = functions.tanh
 84 |         letters, labels = self.__make_input(is_training, text)
 85 |         scores = []
 86 |         accum_loss = wrapper.zeros(()) if is_training else None
 87 |             
 88 |         for n in range(len(letters) - 2 * self.__n_context + 1):
 89 |             s_hu = wrapper.zeros((1, self.__n_hidden))
 90 |             
 91 |             for k in range(2 * self.__n_context):
 92 |                 wid = k * len(self.__vocab) + letters[n + k]
 93 |                 s_x = wrapper.make_var([wid], dtype=np.int32)
 94 |                 s_hu += m.w_xh(s_x)
 95 |             
 96 |             s_hv = tanh(s_hu)
 97 |             s_y = tanh(m.w_hy(s_hv))
 98 |             scores.append(float(wrapper.get_data(s_y)))
 99 |             
100 |             if is_training:
101 |                 s_t = wrapper.make_var([[labels[n]]])
102 |                 accum_loss += functions.mean_squared_error(s_y, s_t)
103 | 
104 |         return scores, accum_loss
105 | 
106 |     def train(self, text):
107 |         self.__opt.zero_grads()
108 |         scores, accum_loss = self.__forward(True, text)
109 |         accum_loss.backward()
110 |         self.__opt.clip_grads(5)
111 |         self.__opt.update()
112 |         return scores
113 | 
114 |     def predict(self, text):
115 |         return self.__forward(False, text)[0]
116 | 
117 | 
118 | def parse_args():
119 |     def_vocab = 2500
120 |     def_hidden = 100
121 |     def_epoch = 100
122 |     def_context = 3
123 | 
124 |     p = ArgumentParser(description='Word segmentation using feedforward neural network')
125 | 
126 |     p.add_argument('mode', help='\'train\' or \'test\'')
127 |     p.add_argument('corpus', help='[in] source corpus')
128 |     p.add_argument('model', help='[in/out] model file')
129 |     p.add_argument('--vocab', default=def_vocab, metavar='INT', type=int,
130 |         help='vocabulary size (default: %d)' % def_vocab)
131 |     p.add_argument('--hidden', default=def_hidden, metavar='INT', type=int,
132 |         help='hidden layer size (default: %d)' % def_hidden)
133 |     p.add_argument('--epoch', default=def_epoch, metavar='INT', type=int,
134 |         help='number of training epoch (default: %d)' % def_epoch)
135 |     p.add_argument('--context', default=def_context, metavar='INT', type=int,
136 |         help='width of context window (default: %d)' % def_context)
137 | 
138 |     args = p.parse_args()
139 | 
140 |     # check args
141 |     try:
142 |         if args.mode not in ['train', 'test']: raise ValueError('you must set mode = \'train\' or \'test\'')
143 |         if args.vocab < 1: raise ValueError('you must set --vocab >= 1')
144 |         if args.hidden < 1: raise ValueError('you must set --hidden >= 1')
145 |         if args.epoch < 1: raise ValueError('you must set --epoch >= 1')
146 |         if args.context < 1: raise ValueError('you must set --context >= 1')
147 |     except Exception as ex:
148 |         p.print_usage(file=sys.stderr)
149 |         print(ex, file=sys.stderr)
150 |         sys.exit()
151 | 
152 |     return args
153 | 
154 | 
155 | def make_hyp(letters, scores):
156 |     hyp = letters[0]
157 |     for w, s in zip(letters[1:], scores):
158 |         if s >= 0:
159 |             hyp += ' '
160 |         hyp += w
161 |     return hyp
162 | 
163 | 
164 | def train_model(args):
165 |     trace('making vocabularies ...')
166 |     vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab)
167 | 
168 |     trace('start training ...')
169 |     model = SegmentationModel.new(vocab, args.context, args.hidden)
170 | 
171 |     for epoch in range(args.epoch):
172 |         trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
173 |         trained = 0
174 | 
175 |         model.init_optimizer()
176 | 
177 |         with open(args.corpus) as fp:
178 |             for text in fp:
179 |                 word_list = text.split()
180 |                 if not word_list:
181 |                     continue
182 | 
183 |                 text = ' '.join(word_list)
184 |                 letters = ''.join(word_list)
185 |                 scores = model.train(text)
186 |                 trained += 1
187 |                 hyp = make_hyp(letters, scores)
188 |                 
189 |                 trace(trained)
190 |                 trace(text)
191 |                 trace(hyp)
192 |                 trace(' '.join('%+.1f' % x for x in scores))
193 |                 
194 |                 if trained % 100 == 0:
195 |                     trace('  %8d' % trained)
196 | 
197 |         trace('saveing model ...')
198 |         model.save(args.model + '.%03d' % (epoch + 1))
199 | 
200 |     trace('finished.')
201 | 
202 | 
203 | def test_model(args):
204 |     trace('loading model ...')
205 |     model = SegmentationModel.load(args.model)
206 |     
207 |     trace('generating output ...')
208 | 
209 |     with open(args.corpus) as fp:
210 |         for text in fp:
211 |             letters = ''.join(text.split())
212 |             if not letters:
213 |                 print()
214 |                 continue
215 |             scores = model.predict(text)
216 |             hyp = make_hyp(letters, scores)
217 |             print(hyp)
218 | 
219 |     trace('finished.')
220 | 
221 | 
222 | def main():
223 |     args = parse_args()
224 | 
225 |     trace('initializing CUDA ...')
226 |     wrapper.init()
227 | 
228 |     if args.mode == 'train': train_model(args)
229 |     elif args.mode == 'test': test_model(args)
230 | 
231 | 
232 | if __name__ == '__main__':
233 |     main()
234 | 
235 | 


--------------------------------------------------------------------------------
/chainer-1.4/seg_rnn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | #import my_settings
  4 | 
  5 | import sys
  6 | import math
  7 | import numpy as np
  8 | from argparse import ArgumentParser
  9 | 
 10 | from chainer import functions, optimizers
 11 | 
 12 | import util.generators as gens
 13 | from util.functions import trace, fill_batch
 14 | from util.model_file import ModelFile
 15 | from util.vocabulary import Vocabulary
 16 | 
 17 | from util.chainer_cpu_wrapper import wrapper
 18 | #from util.chainer_gpu_wrapper import wrapper
 19 | 
 20 | 
 21 | class RNNSegmentationModel:
 22 |     def __init__(self):
 23 |         pass
 24 | 
 25 |     def __make_model(self):
 26 |         self.__model = wrapper.make_model(
 27 |             w_xe = functions.EmbedID(len(self.__vocab), self.__n_embed),
 28 |             w_ea = functions.Linear(self.__n_embed, 4 * self.__n_hidden),
 29 |             w_aa = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
 30 |             w_eb = functions.Linear(self.__n_embed, 4 * self.__n_hidden),
 31 |             w_bb = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
 32 |             w_ay1 = functions.Linear(self.__n_hidden, 1),
 33 |             w_by1 = functions.Linear(self.__n_hidden, 1),
 34 |             w_ay2 = functions.Linear(self.__n_hidden, 1),
 35 |             w_by2 = functions.Linear(self.__n_hidden, 1),
 36 |         )
 37 | 
 38 |     @staticmethod
 39 |     def new(vocab, n_embed, n_hidden):
 40 |         self = RNNSegmentationModel()
 41 |         self.__vocab = vocab
 42 |         self.__n_embed = n_embed
 43 |         self.__n_hidden = n_hidden
 44 |         self.__make_model()
 45 |         return self
 46 | 
 47 |     def save(self, filename):
 48 |         with ModelFile(filename, 'w') as fp:
 49 |             self.__vocab.save(fp.get_file_pointer())
 50 |             fp.write(self.__n_embed)
 51 |             fp.write(self.__n_hidden)
 52 |             wrapper.begin_model_access(self.__model)
 53 |             fp.write_embed(self.__model.w_xe)
 54 |             fp.write_linear(self.__model.w_ea)
 55 |             fp.write_linear(self.__model.w_aa)
 56 |             fp.write_linear(self.__model.w_eb)
 57 |             fp.write_linear(self.__model.w_bb)
 58 |             fp.write_linear(self.__model.w_ay1)
 59 |             fp.write_linear(self.__model.w_by1)
 60 |             fp.write_linear(self.__model.w_ay2)
 61 |             fp.write_linear(self.__model.w_by2)
 62 |             wrapper.end_model_access(self.__model)
 63 | 
 64 |     @staticmethod
 65 |     def load(filename):
 66 |         self = RNNSegmentationModel()
 67 |         with ModelFile(filename) as fp:
 68 |             self.__vocab = Vocabulary.load(fp.get_file_pointer())
 69 |             self.__n_embed = int(fp.read())
 70 |             self.__n_hidden = int(fp.read())
 71 |             self.__make_model()
 72 |             wrapper.begin_model_access(self.__model)
 73 |             fp.read_embed(self.__model.w_xe)
 74 |             fp.read_linear(self.__model.w_ea)
 75 |             fp.read_linear(self.__model.w_aa)
 76 |             fp.read_linear(self.__model.w_eb)
 77 |             fp.read_linear(self.__model.w_bb)
 78 |             fp.read_linear(self.__model.w_ay1)
 79 |             fp.read_linear(self.__model.w_by1)
 80 |             fp.read_linear(self.__model.w_ay2)
 81 |             fp.read_linear(self.__model.w_by2)
 82 |             wrapper.end_model_access(self.__model)
 83 |         return self
 84 | 
 85 |     def init_optimizer(self):
 86 |         self.__opt = optimizers.AdaGrad(lr=0.001)
 87 |         self.__opt.setup(self.__model)
 88 | 
 89 |     def __make_input(self, is_training, text):
 90 |         word_list = text.split()
 91 |         letters = [self.__vocab.stoi(x) for x in ''.join(word_list)]
 92 |         if is_training:
 93 |             labels = []
 94 |             for x in word_list:
 95 |                 labels += [-1] * (len(x) - 1) + [1]
 96 |             return letters, labels[:-1]
 97 |         else:
 98 |             return letters, None
 99 | 
100 |     def __forward(self, is_training, text):
101 |         m = self.__model
102 |         tanh = functions.tanh
103 |         lstm = functions.lstm
104 |         letters, labels = self.__make_input(is_training, text)
105 |         n_letters = len(letters)
106 | 
107 |         accum_loss = wrapper.zeros(()) if is_training else None
108 |         hidden_zeros = wrapper.zeros((1, self.__n_hidden))
109 | 
110 |         # embedding
111 |         list_e = []
112 |         for i in range(n_letters):
113 |             s_x = wrapper.make_var([letters[i]], dtype=np.int32)
114 |             list_e.append(tanh(m.w_xe(s_x)))
115 | 
116 |         # forward encoding
117 |         s_a = hidden_zeros
118 |         c = hidden_zeros
119 |         list_a = []
120 |         for i in range(n_letters):
121 |             c, s_a = lstm(c, m.w_ea(list_e[i]) + m.w_aa(s_a))
122 |             list_a.append(s_a)
123 |         
124 |         # backward encoding
125 |         s_b = hidden_zeros
126 |         c = hidden_zeros
127 |         list_b = []
128 |         for i in reversed(range(n_letters)):
129 |             c, s_b = lstm(c, m.w_eb(list_e[i]) + m.w_bb(s_b))
130 |             list_b.append(s_b)
131 |         
132 |         # segmentation
133 |         scores = []
134 |         for i in range(n_letters - 1):
135 |             s_y = tanh(m.w_ay1(list_a[i]) + m.w_by1(list_b[i]) + m.w_ay2(list_a[i + 1]) + m.w_by2(list_b[i + 1]))
136 |             scores.append(float(wrapper.get_data(s_y)))
137 |             
138 |             if is_training:
139 |                 s_t = wrapper.make_var([[labels[i]]])
140 |                 accum_loss += functions.mean_squared_error(s_y, s_t)
141 | 
142 |         return scores, accum_loss
143 | 
144 |     def train(self, text):
145 |         self.__opt.zero_grads()
146 |         scores, accum_loss = self.__forward(True, text)
147 |         accum_loss.backward()
148 |         self.__opt.clip_grads(5)
149 |         self.__opt.update()
150 |         return scores
151 | 
152 |     def predict(self, text):
153 |         return self.__forward(False, text)[0]
154 | 
155 | 
156 | def parse_args():
157 |     def_vocab = 2500
158 |     def_embed = 100
159 |     def_hidden = 100
160 |     def_epoch = 20
161 | 
162 |     p = ArgumentParser(description='Word segmentation using LSTM-RNN')
163 | 
164 |     p.add_argument('mode', help='\'train\' or \'test\'')
165 |     p.add_argument('corpus', help='[in] source corpus')
166 |     p.add_argument('model', help='[in/out] model file')
167 |     p.add_argument('--vocab', default=def_vocab, metavar='INT', type=int,
168 |         help='vocabulary size (default: %d)' % def_vocab)
169 |     p.add_argument('--embed', default=def_embed, metavar='INT', type=int,
170 |         help='embedding layer size (default: %d)' % def_embed)
171 |     p.add_argument('--hidden', default=def_hidden, metavar='INT', type=int,
172 |         help='hidden layer size (default: %d)' % def_hidden)
173 |     p.add_argument('--epoch', default=def_epoch, metavar='INT', type=int,
174 |         help='number of training epoch (default: %d)' % def_epoch)
175 | 
176 |     args = p.parse_args()
177 | 
178 |     # check args
179 |     try:
180 |         if args.mode not in ['train', 'test']: raise ValueError('you must set mode = \'train\' or \'test\'')
181 |         if args.vocab < 1: raise ValueError('you must set --vocab >= 1')
182 |         if args.embed < 1: raise ValueError('you must set --embed >= 1')
183 |         if args.hidden < 1: raise ValueError('you must set --hidden >= 1')
184 |         if args.epoch < 1: raise ValueError('you must set --epoch >= 1')
185 |     except Exception as ex:
186 |         p.print_usage(file=sys.stderr)
187 |         print(ex, file=sys.stderr)
188 |         sys.exit()
189 | 
190 |     return args
191 | 
192 | 
193 | def make_hyp(letters, scores):
194 |     hyp = letters[0]
195 |     for w, s in zip(letters[1:], scores):
196 |         if s >= 0:
197 |             hyp += ' '
198 |         hyp += w
199 |     return hyp
200 | 
201 | 
202 | def train_model(args):
203 |     trace('making vocabularies ...')
204 |     vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab)
205 | 
206 |     trace('start training ...')
207 |     model = RNNSegmentationModel.new(vocab, args.embed, args.hidden)
208 | 
209 |     for epoch in range(args.epoch):
210 |         trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
211 |         trained = 0
212 | 
213 |         model.init_optimizer()
214 | 
215 |         with open(args.corpus) as fp:
216 |             for text in fp:
217 |                 word_list = text.split()
218 |                 if not word_list:
219 |                     continue
220 | 
221 |                 text = ' '.join(word_list)
222 |                 letters = ''.join(word_list)
223 |                 scores = model.train(text)
224 |                 trained += 1
225 |                 hyp = make_hyp(letters, scores)
226 |                 
227 |                 trace(trained)
228 |                 trace(text)
229 |                 trace(hyp)
230 |                 trace(' '.join('%+.1f' % x for x in scores))
231 |                 
232 |                 if trained % 100 == 0:
233 |                     trace('  %8d' % trained)
234 | 
235 |         trace('saveing model ...')
236 |         model.save(args.model + '.%03d' % (epoch + 1))
237 | 
238 |     trace('finished.')
239 | 
240 | 
241 | def test_model(args):
242 |     trace('loading model ...')
243 |     model = RNNSegmentationModel.load(args.model)
244 |     
245 |     trace('generating output ...')
246 | 
247 |     with open(args.corpus) as fp:
248 |         for text in fp:
249 |             letters = ''.join(text.split())
250 |             if not letters:
251 |                 print()
252 |                 continue
253 |             scores = model.predict(text)
254 |             hyp = make_hyp(letters, scores)
255 |             print(hyp)
256 | 
257 |     trace('finished.')
258 | 
259 | 
260 | def main():
261 |     args = parse_args()
262 | 
263 |     trace('initializing ...')
264 |     wrapper.init()
265 | 
266 |     if args.mode == 'train': train_model(args)
267 |     elif args.mode == 'test': test_model(args)
268 | 
269 | 
270 | if __name__ == '__main__':
271 |     main()
272 | 
273 | 


--------------------------------------------------------------------------------
/chainer-1.4/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/odashi/chainer_examples/b13ec64e5035b1eb75b873431786d880577b7370/chainer-1.4/util/__init__.py


--------------------------------------------------------------------------------
/chainer-1.4/util/chainer_cpu_wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import chainer
 3 | 
 4 | class wrapper:
 5 |     @staticmethod
 6 |     def init():
 7 |         pass
 8 | 
 9 |     @staticmethod
10 |     def make_var(array, dtype=numpy.float32):
11 |         return chainer.Variable(numpy.array(array, dtype=dtype))
12 | 
13 |     @staticmethod
14 |     def get_data(variable):
15 |         return variable.data
16 | 
17 |     @staticmethod
18 |     def zeros(shape, dtype=numpy.float32):
19 |         return chainer.Variable(numpy.zeros(shape, dtype=dtype))
20 | 
21 |     @staticmethod
22 |     def ones(shape, dtype=numpy.float32):
23 |         return chainer.Variable(numpy.ones(shape, dtype=dtype))
24 | 
25 |     @staticmethod
26 |     def make_model(**kwargs):
27 |         return chainer.FunctionSet(**kwargs)
28 | 
29 |     @staticmethod
30 |     def begin_model_access(model):
31 |         pass
32 | 
33 |     @staticmethod
34 |     def end_model_access(model):
35 |         pass
36 |  
37 | 


--------------------------------------------------------------------------------
/chainer-1.4/util/chainer_gpu_wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import chainer
 3 | 
 4 | class wrapper:
 5 |     @staticmethod
 6 |     def init():
 7 |         chainer.cuda.init()
 8 | 
 9 |     @staticmethod
10 |     def make_var(array, dtype=numpy.float32):
11 |         return chainer.Variable(chainer.cuda.to_gpu(numpy.array(array, dtype=dtype)))
12 | 
13 |     @staticmethod
14 |     def get_data(variable):
15 |         return chainer.cuda.to_cpu(variable.data)
16 | 
17 |     @staticmethod
18 |     def zeros(shape, dtype=numpy.float32):
19 |         return chainer.Variable(chainer.cuda.zeros(shape, dtype=dtype))
20 | 
21 |     @staticmethod
22 |     def ones(shape, dtype=numpy.float32):
23 |         return chainer.Variable(chainer.cuda.ones(shape, dtype=dtype))
24 | 
25 |     @staticmethod
26 |     def make_model(**kwargs):
27 |         return chainer.FunctionSet(**kwargs).to_gpu()        
28 |  
29 |     @staticmethod
30 |     def begin_model_access(model):
31 |         model.to_cpu()
32 | 
33 |     @staticmethod
34 |     def end_model_access(model):
35 |         model.to_gpu()
36 | 
37 | 


--------------------------------------------------------------------------------
/chainer-1.4/util/functions.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import datetime
 3 | 
 4 | def trace(*args):
 5 |     print(datetime.datetime.now(), '...', *args, file=sys.stderr)
 6 |     sys.stderr.flush()
 7 | 
 8 | def fill_batch(batch, token='</s>'):
 9 |     max_len = max(len(x) for x in batch)
10 |     return [x + [token] * (max_len - len(x) + 1) for x in batch]
11 | 
12 | def fill_batch2(batch, start_token='<s>', end_token='</s>'):
13 |     max_len = max(len(x) for x in batch)
14 |     return [[start_token] + x + [end_token] * (max_len - len(x) + 1) for x in batch]
15 | 
16 | def vtos(v, fmt='%.8e'):
17 |     return ' '.join(fmt % x for x in v)
18 | 
19 | def stov(s, tp=float):
20 |     return [tp(x) for x in s.split()]
21 | 
22 | 


--------------------------------------------------------------------------------
/chainer-1.4/util/generators.py:
--------------------------------------------------------------------------------
 1 | def batch(generator, batch_size):
 2 |     batch = []
 3 |     is_tuple = False
 4 |     for l in generator:
 5 |         is_tuple = isinstance(l, tuple)
 6 |         batch.append(l)
 7 |         if len(batch) == batch_size:
 8 |             yield tuple(list(x) for x in zip(*batch)) if is_tuple else batch
 9 |             batch = []
10 |     if batch:
11 |         yield tuple(list(x) for x in zip(*batch)) if is_tuple else batch
12 | 
13 | def sorted_parallel(generator1, generator2, pooling, order=1):
14 |     gen1 = batch(generator1, pooling)
15 |     gen2 = batch(generator2, pooling)
16 |     for batch1, batch2 in zip(gen1, gen2):
17 |         #yield from sorted(zip(batch1, batch2), key=lambda x: len(x[1]))
18 |         for x in sorted(zip(batch1, batch2), key=lambda x: len(x[order])):
19 |             yield x
20 | 
21 | def word_list(filename):
22 |     with open(filename) as fp:
23 |         for l in fp:
24 |             yield l.split()
25 | 
26 | def letter_list(filename):
27 |     with open(filename) as fp:
28 |         for l in fp:
29 |             yield list(''.join(l.split()))
30 | 
31 | 


--------------------------------------------------------------------------------
/chainer-1.4/util/model_file.py:
--------------------------------------------------------------------------------
 1 | from .functions import vtos, stov
 2 | 
 3 | class ModelFile:
 4 |     def __init__(self, filename, mode='r'):
 5 |         self.__fp = open(filename, mode)
 6 | 
 7 |     def __enter__(self):
 8 |         return self
 9 | 
10 |     def __exit__(self, exc_type, exc_value, traceback):
11 |         self.__fp.close()
12 |         return False
13 | 
14 |     def write(self, x):
15 |         print(x, file=self.__fp)
16 | 
17 |     def __write_vector(self, x):
18 |         self.write(vtos(x))
19 | 
20 |     def __write_matrix(self, x):
21 |         for row in x:
22 |             self.__write_vector(row)
23 |     
24 |     def read(self):
25 |         return next(self.__fp).strip()
26 | 
27 |     def __read_vector(self, x, tp):
28 |         data = stov(self.read(), tp)
29 |         for i in range(len(data)):
30 |             x[i] = data[i]
31 | 
32 |     def __read_matrix(self, x, tp):
33 |         for row in x:
34 |             self.__read_vector(row, tp)
35 | 
36 |     def write_embed(self, f):
37 |         self.__write_matrix(f.W)
38 | 
39 |     def write_linear(self, f):
40 |         self.__write_matrix(f.W)
41 |         self.__write_vector(f.b)
42 | 
43 |     def read_embed(self, f):
44 |         self.__read_matrix(f.W, float)
45 | 
46 |     def read_linear(self, f):
47 |         self.__read_matrix(f.W, float)
48 |         self.__read_vector(f.b, float)
49 | 
50 |     def get_file_pointer(self):
51 |         return self.__fp
52 | 
53 | 


--------------------------------------------------------------------------------
/chainer-1.4/util/vocabulary.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | 
 3 | 
 4 | class Vocabulary:
 5 |     def __init__(self):
 6 |         pass
 7 | 
 8 |     def __len__(self):
 9 |         return self.__size
10 | 
11 |     def stoi(self, s):
12 |         return self.__stoi[s]
13 | 
14 |     def itos(self, i):
15 |         return self.__itos[i]
16 | 
17 |     @staticmethod
18 |     def new(list_generator, size):
19 |         self = Vocabulary()
20 |         self.__size = size
21 | 
22 |         word_freq = defaultdict(lambda: 0)
23 |         for words in list_generator:
24 |             for word in words:
25 |                 word_freq[word] += 1
26 | 
27 |         self.__stoi = defaultdict(lambda: 0)
28 |         self.__stoi['<unk>'] = 0
29 |         self.__stoi['<s>'] = 1
30 |         self.__stoi['</s>'] = 2
31 |         self.__itos = [''] * self.__size
32 |         self.__itos[0] = '<unk>'
33 |         self.__itos[1] = '<s>'
34 |         self.__itos[2] = '</s>'
35 |         
36 |         for i, (k, v) in zip(range(self.__size - 3), sorted(word_freq.items(), key=lambda x: -x[1])):
37 |             self.__stoi[k] = i + 3
38 |             self.__itos[i + 3] = k
39 | 
40 |         return self
41 | 
42 |     def save(self, fp):
43 |         print(self.__size, file=fp)
44 |         for i in range(self.__size):
45 |             print(self.__itos[i], file=fp)
46 | 
47 |     @staticmethod
48 |     def load(line_gen):
49 |         self = Vocabulary()
50 |         
51 |         self.__size = int(next(line_gen))
52 | 
53 |         self.__stoi = defaultdict(lambda: 0)
54 |         self.__itos = [''] * self.__size
55 |         for i in range(self.__size):
56 |             s = next(line_gen).strip()
57 |             if s:
58 |                 self.__stoi[s] = i
59 |                 self.__itos[i] = s
60 |         
61 |         return self
62 | 
63 | 


--------------------------------------------------------------------------------
/chainer-1.5/LSTMVariants.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | 
  3 | import chainer
  4 | from chainer.functions.activation import sigmoid
  5 | from chainer.functions.activation import tanh
  6 | from chainer import link
  7 | from chainer.links.connection import linear
  8 | 
  9 | 
 10 | class LSTMBase(link.Chain):
 11 | 
 12 |     def __init__(self, n_units, n_inputs=None):
 13 |         if n_inputs is None:
 14 |             n_inputs = n_units
 15 |         super(LSTMBase, self).__init__(
 16 |             W_fh=linear.Linear(n_inputs, n_units),
 17 |             W_ih=linear.Linear(n_inputs, n_units),
 18 |             W_oh=linear.Linear(n_inputs, n_units),
 19 |             W_ch=linear.Linear(n_inputs, n_units),
 20 |             W_fx=linear.Linear(n_inputs, n_units),
 21 |             W_ix=linear.Linear(n_inputs, n_units),
 22 |             W_ox=linear.Linear(n_inputs, n_units),
 23 |             W_cx=linear.Linear(n_inputs, n_units),
 24 |         )
 25 | 
 26 | class CoupledForgetLSTMBase(link.Chain):
 27 | 
 28 |     def __init__(self, n_units, n_inputs=None):
 29 |         if n_inputs is None:
 30 |             n_inputs = n_units
 31 |         super(LSTMBase, self).__init__(
 32 |             W_fh=linear.Linear(n_inputs, n_units),
 33 |             W_oh=linear.Linear(n_inputs, n_units),
 34 |             W_ch=linear.Linear(n_inputs, n_units),
 35 |             W_fx=linear.Linear(n_inputs, n_units),
 36 |             W_ox=linear.Linear(n_inputs, n_units),
 37 |             W_cx=linear.Linear(n_inputs, n_units),
 38 |         )
 39 | 
 40 | class PeepHoleLSTMBase(link.Chain):
 41 | 
 42 |     def __init__(self, n_units, n_inputs=None):
 43 |         if n_inputs is None:
 44 |             n_inputs = n_units
 45 |         super(PeepHoleLSTMBase, self).__init__(
 46 |             W_fh=linear.Linear(n_inputs, n_units),
 47 |             W_fc=linear.Linear(n_inputs, n_units),
 48 |             W_ih=linear.Linear(n_inputs, n_units),
 49 |             W_ic=linear.Linear(n_inputs, n_units),
 50 |             W_oh=linear.Linear(n_inputs, n_units),
 51 |             W_oc=linear.Linear(n_inputs, n_units),
 52 |             W_ch=linear.Linear(n_inputs, n_units),
 53 |             W_fx=linear.Linear(n_inputs, n_units),
 54 |             W_ix=linear.Linear(n_inputs, n_units),
 55 |             W_ox=linear.Linear(n_inputs, n_units),
 56 |             W_cx=linear.Linear(n_inputs, n_units),
 57 |         )
 58 | 
 59 | class CoupledForgetPeepHoleLSTMBase(link.Chain):
 60 | 
 61 |     def __init__(self, n_units, n_inputs=None):
 62 |         if n_inputs is None:
 63 |             n_inputs = n_units
 64 |         super(PeepHoleLSTMBase, self).__init__(
 65 |             W_fh=linear.Linear(n_inputs, n_units),
 66 |             W_fc=linear.Linear(n_inputs, n_units),
 67 |             W_oh=linear.Linear(n_inputs, n_units),
 68 |             W_oc=linear.Linear(n_inputs, n_units),
 69 |             W_ch=linear.Linear(n_inputs, n_units),
 70 |             W_fx=linear.Linear(n_inputs, n_units),
 71 |             W_ox=linear.Linear(n_inputs, n_units),
 72 |             W_cx=linear.Linear(n_inputs, n_units),
 73 |         )
 74 | 
 75 | class StatefulLSTM(LSTMBase):
 76 | 
 77 | 
 78 |     def __init__(self, in_size, out_size):
 79 |         super(StatefulLSTM, self).__init__(out_size, in_size)
 80 |         self.state_size = out_size
 81 |         self.reset_state()
 82 | 
 83 |     def to_cpu(self):
 84 |         super(StatefulLSTM, self).to_cpu()
 85 |         if self.h is not None:
 86 |             self.h.to_cpu()
 87 |         if self.c is not None:
 88 |             self.c.to_cpu()
 89 | 
 90 |     def to_gpu(self, device=None):
 91 |         super(StatefulLSTM, self).to_gpu(device)
 92 |         if self.c is not None:
 93 |             self.c.to_gpu(device)
 94 |         if self.h is not None:
 95 |             self.h.to_gpu(device)
 96 | 
 97 |     def set_state(self, h, c):
 98 |         assert isinstance(h, chainer.Variable)
 99 |         assert isinstance(c, chainer.Variable)
100 |         h_ = h
101 |         c_ = c
102 |         if self.xp == numpy:
103 |             h_.to_cpu()
104 |             c_.to_cpu()
105 |         else:
106 |             h_.to_gpu()
107 |             c_.to_gpu()
108 |         self.h = h_
109 |         self.c = c_
110 | 
111 |     def reset_state(self):
112 |         self.h = None
113 |         self.c = None
114 | 
115 |     def __call__(self, x):
116 |         ft = self.W_fx(x)
117 |         it = self.W_ix(x)
118 |         ct = self.W_cx(x)
119 |         ot = self.W_ox(x)
120 | 
121 |         if self.h is not None:
122 |             ft += self.W_fh(h)
123 |             it += self.W_ih(h)
124 |             ct += self.W_ch(h)
125 |             ot += self.W_oh(h)
126 |         ft = sigmoid.sigmoid(ft)
127 |         it = sigmoid.sigmoid(it)
128 |         ct = tanh.tanh(ct)
129 |         ot = sigmoid.sigmoid(ot)
130 | 
131 |         c = it * ct
132 |         if self.c is not none:
133 |             c += ft * self.c
134 |         self.c = c
135 |         self.h = ot * tanh.tanh(self.c)
136 |         return self.h
137 | 
138 |     def get_state():
139 |         return self.c
140 | 
141 | 
142 | class StatelessLSTM(LSTMBase):
143 |     def __init__(self, in_size, out_size):
144 |         super(StatelessLSTM, self).__init__(out_size, in_size)
145 |         self.state_size = out_size
146 | 
147 |     def __call__(self, x, h, c):
148 |         ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h))
149 |         it = sigmoid.sigmoid(self.W_ix(x) + self.W_ih(h))
150 |         ct = tanh.tanh(self.W_cx(x) + self.W_ch(h))
151 |         ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h))
152 |         c = ft * c + it * ct
153 |         h = ot * tanh.tanh(c)
154 |         return h, c
155 | 
156 | class StatefulPeepHoleLSTM(PeepHoleLSTMBase):
157 | 
158 | 
159 |     def __init__(self, in_size, out_size):
160 |         super(StatefulPeepHoleLSTM, self).__init__(out_size, in_size)
161 |         self.state_size = out_size
162 |         self.reset_state()
163 | 
164 |     def to_cpu(self):
165 |         super(StatefulPeepHoleLSTM, self).to_cpu()
166 |         if self.h is not None:
167 |             self.h.to_cpu()
168 |         if self.c is not None:
169 |             self.c.to_cpu()
170 | 
171 |     def to_gpu(self, device=None):
172 |         super(StatefulPeepHoleLSTM, self).to_gpu(device)
173 |         if self.c is not None:
174 |             self.c.to_gpu(device)
175 |         if self.h is not None:
176 |             self.h.to_gpu(device)
177 | 
178 |     def set_state(self, h, c):
179 |         assert isinstance(h, chainer.Variable)
180 |         assert isinstance(c, chainer.Variable)
181 |         h_ = h
182 |         c_ = c
183 |         if self.xp == numpy:
184 |             h_.to_cpu()
185 |             c_.to_cpu()
186 |         else:
187 |             h_.to_gpu()
188 |             c_.to_gpu()
189 |         self.h = h_
190 |         self.c = c_
191 | 
192 |     def reset_state(self):
193 |         self.h = None
194 |         self.c = None
195 | 
196 |     def __call__(self, x):
197 |         ft = self.W_fx(x)
198 |         it = self.W_ix(x)
199 |         ct = self.W_cx(x)
200 |         ot = self.W_ox(x)
201 | 
202 |         if self.h is not None and self.c is not None:
203 |             ft += self.W_fh(h) + self.W_fc(self.c)
204 |             it += self.W_ih(h) + self.W_ic(self.c)
205 |             ct += self.W_ch(h)
206 |             ot += self.W_oh(h)
207 |         ft = sigmoid.sigmoid(ft)
208 |         it = sigmoid.sigmoid(it)
209 |         ct = tanh.tanh(ct)
210 |         ot = sigmoid.sigmoid(ot + self.W_oc(ct))
211 | 
212 |         c = it * ct
213 |         if self.c is not none:
214 |             self.c += ft * c
215 | 
216 |         self.h = ot * tanh.tanh(self.c)
217 |         return self.h
218 | 
219 |     def get_state():
220 |         return self.c
221 | 
222 | 
223 | class StatelessPeepHoleLSTM(PeepHoleLSTMBase):
224 | 
225 | 
226 |     def __init__(self, in_size, out_size):
227 |         super(StatelessPeepHoleLSTM, self).__init__(out_size, in_size)
228 |         self.state_size = out_size
229 |         
230 | 
231 |     def __call__(self, x, h, c):
232 |         ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h) + self.W_fc(c))
233 |         it = sigmoid.sigmoid(self.W_ix(x) + self.W_ih(h) + self.W_ic(c))
234 |         ct = tanh.tanh(self.W_cx(x) + self.W_ch(h))
235 |         c = ft * c + it * ct
236 |         ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h) + self.W_oc(c))
237 |         h = ot * tanh.tanh(c)
238 |         return h, c
239 | 
240 | class CoupledForgetStatefulLSTM(CoupledForgetLSTMBase):
241 | 
242 | 
243 |     def __init__(self, in_size, out_size):
244 |         super(CoupledForgetStatefulLSTM, self).__init__(out_size, in_size)
245 |         self.state_size = out_size
246 |         self.reset_state()
247 | 
248 |     def to_cpu(self):
249 |         super(CoupledForgetStatefulLSTM, self).to_cpu()
250 |         if self.h is not None:
251 |             self.h.to_cpu()
252 |         if self.c is not None:
253 |             self.c.to_cpu()
254 | 
255 |     def to_gpu(self, device=None):
256 |         super(CoupledForgetStatefulLSTM, self).to_gpu(device)
257 |         if self.c is not None:
258 |             self.c.to_gpu(device)
259 |         if self.h is not None:
260 |             self.h.to_gpu(device)
261 | 
262 |     def set_state(self, h, c):
263 |         assert isinstance(h, chainer.Variable)
264 |         assert isinstance(c, chainer.Variable)
265 |         h_ = h
266 |         c_ = c
267 |         if self.xp == numpy:
268 |             h_.to_cpu()
269 |             c_.to_cpu()
270 |         else:
271 |             h_.to_gpu()
272 |             c_.to_gpu()
273 |         self.h = h_
274 |         self.c = c_
275 | 
276 |     def reset_state(self):
277 |         self.h = None
278 |         self.c = None
279 | 
280 |     def __call__(self, x):
281 |         ft = self.W_fx(x)
282 |         ct = self.W_cx(x)
283 |         ot = self.W_ox(x)
284 | 
285 |         if self.h is not None:
286 |             ft += self.W_fh(h)
287 |             ct += self.W_ch(h)
288 |             ot += self.W_oh(h)
289 |         ft = sigmoid.sigmoid(ft)
290 |         ct = tanh.tanh(ct)
291 |         ot = sigmoid.sigmoid(ot)
292 | 
293 |         c = (1 - ft) * ct
294 |         if self.c is not none:
295 |             c += ft * self.c
296 |         self.c = c
297 |         self.h = ot * tanh.tanh(self.c)
298 |         return self.h
299 | 
300 |     def get_state():
301 |         return self.c
302 | 
303 | 
304 | class CoupledForgetStatelessLSTM(CoupledForgetLSTMBase):
305 |     def __init__(self, in_size, out_size):
306 |         super(CoupledForgetStatelessLSTM, self).__init__(out_size, in_size)
307 |         self.state_size = out_size
308 | 
309 |     def __call__(self, x, h, c):
310 |         ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h))
311 |         ct = tanh.tanh(self.W_cx(x) + self.W_ch(h))
312 |         ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h))
313 |         c = ft * c + (1 - ft)) * ct
314 |         h = ot * tanh.tanh(c)
315 |         return h, c
316 | 
317 | class CoupledForgetStatefulPeepHoleLSTM(CoupledForgetPeepHoleLSTMBase):
318 | 
319 | 
320 |     def __init__(self, in_size, out_size):
321 |         super(CoupledForgetStatefulPeepHoleLSTM, self).__init__(out_size, in_size)
322 |         self.state_size = out_size
323 |         self.reset_state()
324 | 
325 |     def to_cpu(self):
326 |         super(CoupledForgetStatefulPeepHoleLSTM, self).to_cpu()
327 |         if self.h is not None:
328 |             self.h.to_cpu()
329 |         if self.c is not None:
330 |             self.c.to_cpu()
331 | 
332 |     def to_gpu(self, device=None):
333 |         super(CoupledForgetStatefulPeepHoleLSTM, self).to_gpu(device)
334 |         if self.c is not None:
335 |             self.c.to_gpu(device)
336 |         if self.h is not None:
337 |             self.h.to_gpu(device)
338 | 
339 |     def set_state(self, h, c):
340 |         assert isinstance(h, chainer.Variable)
341 |         assert isinstance(c, chainer.Variable)
342 |         h_ = h
343 |         c_ = c
344 |         if self.xp == numpy:
345 |             h_.to_cpu()
346 |             c_.to_cpu()
347 |         else:
348 |             h_.to_gpu()
349 |             c_.to_gpu()
350 |         self.h = h_
351 |         self.c = c_
352 | 
353 |     def reset_state(self):
354 |         self.h = None
355 |         self.c = None
356 | 
357 |     def __call__(self, x):
358 |         ft = self.W_fx(x)
359 |         ct = self.W_cx(x)
360 |         ot = self.W_ox(x)
361 | 
362 |         if self.h is not None and self.c is not None:
363 |             ft += self.W_fh(h) + self.W_fc(self.c)
364 |             ct += self.W_ch(h)
365 |             ot += self.W_oh(h)
366 |         ft = sigmoid.sigmoid(ft)
367 |         ct = tanh.tanh(ct)
368 |         ot = sigmoid.sigmoid(ot + self.W_oc(ct))
369 | 
370 |         c = (1 - ft) * ct
371 |         if self.c is not none:
372 |             self.c += ft * c
373 | 
374 |         self.h = ot * tanh.tanh(self.c)
375 |         return self.h
376 | 
377 |     def get_state():
378 |         return self.c
379 | 
380 | 
381 | class CoupledForgetStatelessPeepHoleLSTM(CoupledForgetPeepHoleLSTMBase):
382 | 
383 | 
384 |     def __init__(self, in_size, out_size):
385 |         super(CoupledForgetStatelessPeepHoleLSTM, self).__init__(out_size, in_size)
386 |         self.state_size = out_size
387 |         
388 | 
389 |     def __call__(self, x, h, c):
390 |         ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h) + self.W_fc(c))
391 |         ct = tanh.tanh(self.W_cx(x) + self.W_ch(h))
392 |         c = ft * c + (1 - ft) * ct
393 |         ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h) + self.W_oc(c))
394 |         h = ot * tanh.tanh(c)
395 |         return h, c
396 | 
397 | 


--------------------------------------------------------------------------------
/chainer-1.5/attention_lm.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy
  3 | from argparse import ArgumentParser
  4 | from chainer import Chain, Variable, cuda, functions, links, optimizer, optimizers, serializers
  5 | import util.generators as gens
  6 | from util.functions import trace, fill_batch
  7 | from util.vocabulary import Vocabulary
  8 | 
  9 | 
 10 | #Added comment
 11 | 
 12 | def make_vocab(filename, vocab_size):
 13 |     word_freq = defaultdict(lambda: 0)
 14 |     num_lines = 0
 15 |     num_words = 0
 16 |     with open(filename) as fp:
 17 |         for line in fp:
 18 |             words = line.split()
 19 |             num_lines += 1
 20 |             num_words += len(words)
 21 |             for word in words:
 22 |                 word_freq[word] += 1
 23 | 
 24 |     # 0: unk
 25 |     # 1: <s>
 26 |     # 2: </s>
 27 |     vocab = defaultdict(lambda: 0)
 28 |     vocab['<s>'] = 1
 29 |     vocab['</s>'] = 2
 30 |     for i,(k,v) in zip(range(vocab_size - 3), sorted(word_freq.items(), key=lambda x: -x[1])):
 31 |         vocab[k] = i + 3
 32 | 
 33 |     return vocab, num_lines, num_words
 34 | 
 35 | 
 36 | def generate_batch(filename, batch_size):
 37 |     with open(filename) as fp:
 38 |         batch = []
 39 |         try:
 40 |             while True:
 41 |                 for i in range(batch_size):
 42 |                     batch.append(next(fp).split())
 43 |                 
 44 |                 max_len = max(len(x) for x in batch)
 45 |                 batch = [['<s>'] + x + ['</s>'] * (max_len - len(x) + 1) for x in batch]
 46 |                 yield batch
 47 |                 
 48 |                 batch = []
 49 |         except:
 50 |             pass
 51 | 
 52 |         if batch:
 53 |             max_len = max(len(x) for x in batch)
 54 |             batch = [['<s>'] + x + ['</s>'] * (max_len - len(x) + 1) for x in batch]
 55 |             yield batch
 56 | 
 57 | 
 58 | 
 59 | def get_data(variable):
 60 |     #return variable.data
 61 |     return cuda.to_cpu(variable.data)
 62 | 
 63 | def parse_args():
 64 |     def_vocab = 40000
 65 |     def_embed = 200
 66 |     def_hidden = 200
 67 |     def_epoch = 10
 68 |     def_minibatch = 256
 69 |     def_model = 0
 70 |     p = ArgumentParser(description='RNNLM trainer')
 71 | 
 72 |     p.add_argument('corpus', help='[in] training corpus')
 73 |     p.add_argument('valid', help='[in] validation corpus')
 74 |     p.add_argument('model', help='[out] model file')
 75 |     p.add_argument('-V', '--vocab', default=def_vocab, metavar='INT', type=int,
 76 |         help='vocabulary size (default: %d)' % def_vocab)
 77 |     p.add_argument('-E', '--embed', default=def_embed, metavar='INT', type=int,
 78 |         help='embedding layer size (default: %d)' % def_embed)
 79 |     p.add_argument('-H', '--hidden', default=def_hidden, metavar='INT', type=int,
 80 |         help='hidden layer size (default: %d)' % def_hidden)
 81 |     p.add_argument('-I', '--epoch', default=def_epoch, metavar='INT', type=int,
 82 |         help='number of training epoch (default: %d)' % def_epoch)
 83 |     p.add_argument('-B', '--minibatch', default=def_minibatch, metavar='INT', type=int,
 84 |         help='minibatch size (default: %d)' % def_minibatch)
 85 |     p.add_argument('-M', '--model', default=def_model, metavar='INT', type=int,
 86 |         help='RNN used for LM (default: %d) where 0: Default RNNLM, 1: LSTM RNNLM, 2: Attention RNNLM' % def_model)
 87 | 
 88 |     args = p.parse_args()
 89 | 
 90 |     # check args
 91 |     try:
 92 |         if (args.vocab < 1): raise ValueError('you must set --vocab >= 1')
 93 |         if (args.embed < 1): raise ValueError('you must set --embed >= 1')
 94 |         if (args.hidden < 1): raise ValueError('you must set --hidden >= 1')
 95 |         if (args.epoch < 1): raise ValueError('you must set --epoch >= 1')
 96 |         if (args.minibatch < 1): raise ValueError('you must set --minibatch >= 1')
 97 |     except Exception as ex:
 98 |         p.print_usage(file=sys.stderr)
 99 |         print(ex)
100 |         sys.exit()
101 | 
102 |     return args
103 | 
104 | 
105 | class XP:
106 |   __lib = None
107 | 
108 |   @staticmethod
109 |   def set_library(args):
110 |     if args.use_gpu:
111 |       XP.__lib = cuda.cupy
112 |       cuda.get_device(args.gpu_device).use()
113 |     else:
114 |       XP.__lib = numpy
115 | 
116 |   @staticmethod
117 |   def __zeros(shape, dtype):
118 |     return Variable(XP.__lib.zeros(shape, dtype=dtype))
119 | 
120 |   @staticmethod
121 |   def fzeros(shape):
122 |     return XP.__zeros(shape, XP.__lib.float32)
123 | 
124 |   @staticmethod
125 |   def __nonzeros(shape, dtype, val):
126 |     return Variable(val * XP.__lib.ones(shape, dtype=dtype))
127 | 
128 |   @staticmethod
129 |   def fnonzeros(shape, val=1):
130 |     return XP.__nonzeros(shape, XP.__lib.float32, val)
131 | 
132 |   @staticmethod
133 |   def __array(array, dtype):
134 |     return Variable(XP.__lib.array(array, dtype=dtype))
135 | 
136 |   @staticmethod
137 |   def iarray(array):
138 |     return XP.__array(array, XP.__lib.int32)
139 | 
140 |   @staticmethod
141 |   def farray(array):
142 |     return XP.__array(array, XP.__lib.float32)
143 | 
144 | class SrcEmbed(Chain):
145 |   def __init__(self, vocab_size, embed_size):
146 |     super(SrcEmbed, self).__init__(
147 |         xe = links.EmbedID(vocab_size, embed_size),
148 |     )
149 | 
150 |   def __call__(self, x):
151 |     return functions.tanh(self.xe(x))
152 | 
153 | class BasicRnnLM(Chain):
154 |     def __init__(self, embed_size, hidden_size, vocab_size):
155 |         super(BasicRnn, self).__init__(
156 |             xe = SrcEmbed(vocab_size, embed_size),
157 |             eh = links.Linear(embed_size, hidden_size),
158 |             hh = links.Linear(hidden_size, hidden_size),
159 |             hy = links.Linear(hidden_size, vocab_size),
160 |         )
161 |     self.reset_state()
162 | 
163 |     def reset_state():
164 |         self.h = None
165 | 
166 |     def __call__(self, x):
167 | 
168 |         e = self.xe(x)
169 |         h = self.eh(e)
170 |         if self.h is not None:
171 |             h += self.hh(self.h)
172 |         self.h = functions.tanh(h)
173 |         y = self.hy(self.h)
174 |         return y
175 | 
176 | class LSTMLM(Chain):
177 |     def __init__(self, embed_size, hidden_size, vocab_size):
178 |         super(LSTMRnn, self).__init__(
179 |             xe = SrcEmbed(vocab_size, embed_size),
180 |             lstm = links.LSTM(embed_size, hidden_size),
181 |             hy = links.Linear(hidden_size, vocab_size),
182 |         )
183 | 
184 |     def reset(self):
185 |         self.zerograds()
186 | 
187 |     def __call__(self, x):
188 |         e = self.xe(x)
189 |         h = self.lstm(e)
190 |         y = self.hy(h)
191 |         return y
192 | 
193 | class LSTMEncoder(Chain):
194 |     def __init__(self, embed_size, hidden_size):
195 |         super(LSTMEncoder, self).__init__(
196 |             lstm = links.LSTM(embed_size, hidden_size),
197 |         )
198 |     def reset(self):
199 |         self.zerograds()
200 |     def __call__(self, x):
201 |         h = self.lstm(x)
202 |         return h
203 | 
204 | class Attention(Chain):
205 |   def __init__(self, hidden_size, embed_size):
206 |     super(Attention, self).__init__(
207 |         aw = links.Linear(embed_size, hidden_size),
208 |         pw = links.Linear(hidden_size, hidden_size),
209 |         we = links.Linear(hidden_size, 1),
210 |     )
211 |     self.hidden_size = hidden_size
212 |   
213 |   
214 |     
215 |   def __call__(self, a_list, p):
216 |     batch_size = p.data.shape[0]
217 |     e_list = []
218 |     sum_e = XP.fzeros((batch_size, 1))
219 |     for a in a_list:
220 |       w = functions.tanh(self.aw(a) + self.pw(p))
221 |       e = functions.exp(self.we(w))
222 |       e_list.append(e)
223 |       sum_e += e
224 |     ZEROS = XP.fzeros((batch_size, self.hidden_size))
225 |     aa = ZEROS
226 |     for a, e in zip(a_list, e_list):
227 |       e /= sum_e
228 |       aa += a * e
229 |       #aa += functions.reshape(functions.batch_matmul(a, e), (batch_size, self.hidden_size))
230 |     return aa
231 | 
232 | class AttentionLM(Chain):
233 |   def __init__(self, embed_size, hidden_size, vocab_size):
234 |     super(AttentionMT, self).__init__(
235 |         emb = SrcEmbed(vocab_size, embed_size),
236 |         enc = LSTMEncoder(embed_size, hidden_size),
237 |         att = Attention(hidden_size, embed_size),
238 |         outhe = links.Linear(hidden_size, hidden_size),
239 |         outae = links.Linear(hidden_size, hidden_size),
240 |         outey = links.Linear(hidden_size, vocab_size),
241 |     )
242 |     self.vocab_size = vocab_size
243 |     self.embed_size = embed_size
244 |     self.hidden_size = hidden_size
245 | 
246 |   def reset(self):
247 |     self.zerograds()
248 |     self.x_list = []
249 | 
250 |   def embed(self, x):
251 |     self.x_list.append(self.emb(x))
252 | 
253 |   def encode(self, x):
254 |     self.h = self.enc(x)
255 | 
256 |   def decode(self, atts_list):
257 |     aa = self.att(self.atts_list, self.h)
258 |     y = tanh(self.outhe(self.h) + self.outae(aa))
259 |     return self.outey(y)
260 | 
261 |   def save_spec(self, filename):
262 |     with open(filename, 'w') as fp:
263 |       print(self.vocab_size, file=fp)
264 |       print(self.embed_size, file=fp)
265 |       print(self.hidden_size, file=fp)
266 | 
267 |   @staticmethod
268 |   def load_spec(filename):
269 |     with open(filename) as fp:
270 |       vocab_size = int(next(fp))
271 |       embed_size = int(next(fp))
272 |       hidden_size = int(next(fp))
273 |       return AttentionLM(embed_size, hidden_size, vocab_size)
274 | 
275 | def forward(batch, model):
276 |     batch = [[vocab[x] for x in words] for words in batch]
277 |     K = len(batch)
278 |     L = len(batch[0]) - 1
279 | 
280 |     opt.zero_grads()
281 |     accum_loss = XP.fzeros(())
282 |     accum_log_ppl = XP.fzeros(())
283 | 
284 |     if args.model is 0 or args.model is 1:
285 |         
286 |         for l in range(L):
287 |             s_x = make_var([batch[k][l] for k in range(K)], dtype=np.int32)
288 |             s_t = make_var([batch[k][l + 1] for k in range(K)], dtype=np.int32)
289 | 
290 |             s_y = model(s_x)
291 | 
292 |             loss_i = functions.softmax_cross_entropy(s_y, s_t)
293 |             accum_loss += loss_i
294 |         
295 |             accum_log_ppl += get_data(loss_i)
296 | 
297 |         
298 | 
299 |     else:
300 |         for l in range(L):
301 |             s_x = make_var([batch[k][l] for k in range(K)], dtype=np.int32)
302 |             model.embed(s_x)
303 |         for l in range(L):
304 |             s_t = make_var([batch[k][l + 1] for k in range(K)], dtype=np.int32)
305 |             model.encode(self.x_list[l])
306 |             s_y = model.decode(self.x_list[0:l]+self.x_list[l+1:L])
307 |             
308 |             loss_i = functions.softmax_cross_entropy(s_y, s_t)
309 |             accum_loss += loss_i
310 |         
311 |             accum_log_ppl += get_data(loss_i)
312 |     
313 |     return accum_loss, accum_log_ppl
314 |             
315 | 
316 | def main():
317 |     args = parse_args()
318 | 
319 |     trace('making vocabulary ...')
320 |     vocab, num_lines, num_words = make_vocab(args.corpus, args.vocab)
321 | 
322 |     trace('initializing CUDA ...')
323 |     cuda.init()
324 | 
325 |     trace('start training ...')
326 |     if args.model is 0:
327 |         model = BasicRnnLM(args.embed, args.hidden, args.vocab)
328 |         model.reset()
329 |     elif args.model is 1:
330 |         model = LSTMRnn(args.embed, args.hidden, args.vocab)
331 |         model.reset()
332 |     elif args.model is 2:
333 |         model = AttentionLM(args.embed, args.hidden, args.vocab)
334 |         model.reset()
335 |     model.to_gpu()
336 | 
337 |     for epoch in range(args.epoch):
338 |         trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
339 |         log_ppl = 0.0
340 |         trained = 0
341 |         
342 |         opt = optimizers.AdaGrad(lr = 0.01)
343 |         opt.setup(model)
344 |         opt.add_hook(optimizer.GradientClipping(5))
345 | 
346 |         for batch in generate_batch(args.corpus, args.minibatch):
347 |             K = len(batch)
348 |             loss, perplexity= forward(batch, model)
349 |             loss.backward()
350 |             log_ppl += perplexity 
351 |             opt.update()
352 |             trained += K
353 |             model.reset()
354 | 
355 |         trace('  %d/%d' % (trained, num_lines))      
356 |         log_ppl /= float(num_words)
357 |         trace('Train  log(PPL) = %.10f' % log_ppl)
358 |         trace('Train  PPL      = %.10f' % math.exp(log_ppl))
359 | 
360 |         log_ppl = 0.0
361 | 
362 |         for batch in generate_batch(args.valid, args.minibatch):
363 |             K = len(batch)
364 |             loss, perplexity= forward(batch, model)
365 |             log_ppl += perplexity 
366 |             model.reset()
367 | 
368 |         trace('Valid  log(PPL) = %.10f' % log_ppl)
369 |         trace('Valid  PPL      = %.10f' % math.exp(log_ppl))
370 | 
371 |         trace('  writing model ...')
372 |         trace('saving model ...')
373 |         prefix = 'RNNLM-'+str(args.model) + '.%03.d' % (epoch + 1)
374 |         save_vocab(prefix + '.srcvocab',vocab) #Fix this # Fixed
375 |         model.save_spec(prefix + '.spec')
376 |         serializers.save_hdf5(prefix + '.weights', model)
377 | 
378 |     trace('training finished.')
379 | 
380 | 
381 | if __name__ == '__main__':
382 |     main()
383 | 
384 | 
385 | def save_vocab(filename, vocab):
386 |     with open(filename, 'w') as fp:
387 |         for k, v in vocab.items():
388 |             if v == 0:
389 |                 continue
390 |             print('%s %d' % (k, v), file=fp)
391 | 


--------------------------------------------------------------------------------
/chainer-1.5/mt_s2s_attention.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy
  3 | from argparse import ArgumentParser
  4 | from chainer import Chain, ChainList, Variable, cuda, functions, links, optimizer, optimizers, serializers
  5 | import util.generators as gens
  6 | from util.functions import trace, fill_batch
  7 | from util.vocabulary import Vocabulary
  8 | 
  9 | def parse_args():
 10 |   def_gpu_device = 0
 11 |   def_vocab = 1000
 12 |   def_embed = 100
 13 |   def_hidden = 200
 14 |   def_epoch = 10
 15 |   def_minibatch = 64
 16 |   def_generation_limit = 128
 17 | 
 18 |   p = ArgumentParser(
 19 |     description='Attentional neural machine trainslation',
 20 |     usage=
 21 |       '\n  %(prog)s train [options] source target model'
 22 |       '\n  %(prog)s test source target model'
 23 |       '\n  %(prog)s -h',
 24 |   )
 25 | 
 26 | 
 27 |   p.add_argument('mode', help='\'train\' or \'test\'')
 28 |   p.add_argument('source', help='[in] source corpus')
 29 |   p.add_argument('target', help='[in/out] target corpus')
 30 |   p.add_argument('model', help='[in/out] model file')
 31 |   p.add_argument('--use-gpu', action='store_true', default=False,
 32 |     help='use GPU calculation')
 33 |   p.add_argument('--gpu-device', default=def_gpu_device, metavar='INT', type=int,
 34 |     help='GPU device ID to be used (default: %(default)d)')
 35 |   p.add_argument('--vocab', default=def_vocab, metavar='INT', type=int,
 36 |     help='vocabulary size (default: %(default)d)')
 37 |   p.add_argument('--embed', default=def_embed, metavar='INT', type=int,
 38 |     help='embedding layer size (default: %(default)d)')
 39 |   p.add_argument('--hidden', default=def_hidden, metavar='INT', type=int,
 40 |     help='hidden layer size (default: %(default)d)')
 41 |   p.add_argument('--epoch', default=def_epoch, metavar='INT', type=int,
 42 |     help='number of training epoch (default: %(default)d)')
 43 |   p.add_argument('--minibatch', default=def_minibatch, metavar='INT', type=int,
 44 |     help='minibatch size (default: %(default)d)')
 45 |   p.add_argument('--generation-limit', default=def_generation_limit, metavar='INT', type=int,
 46 |     help='maximum number of words to be generated for test input (default: %(default)d)')
 47 | 
 48 |   args = p.parse_args()
 49 | 
 50 |   # check args
 51 |   try:
 52 |     if args.mode not in ['train', 'test']: raise ValueError('you must set mode = \'train\' or \'test\'')
 53 |     if args.vocab < 1: raise ValueError('you must set --vocab >= 1')
 54 |     if args.embed < 1: raise ValueError('you must set --embed >= 1')
 55 |     if args.hidden < 1: raise ValueError('you must set --hidden >= 1')
 56 |     if args.epoch < 1: raise ValueError('you must set --epoch >= 1')
 57 |     if args.minibatch < 1: raise ValueError('you must set --minibatch >= 1')
 58 |     if args.generation_limit < 1: raise ValueError('you must set --generation-limit >= 1')
 59 |   except Exception as ex:
 60 |     p.print_usage(file=sys.stderr)
 61 |     print(ex, file=sys.stderr)
 62 |     sys.exit()
 63 | 
 64 |   return args
 65 | 
 66 | class XP:
 67 |   __lib = None
 68 | 
 69 |   @staticmethod
 70 |   def set_library(args):
 71 |     if args.use_gpu:
 72 |       XP.__lib = cuda.cupy
 73 |       cuda.get_device(args.gpu_device).use()
 74 |     else:
 75 |       XP.__lib = numpy
 76 | 
 77 |   @staticmethod
 78 |   def __zeros(shape, dtype):
 79 |     return Variable(XP.__lib.zeros(shape, dtype=dtype))
 80 | 
 81 |   @staticmethod
 82 |   def fzeros(shape):
 83 |     return XP.__zeros(shape, XP.__lib.float32)
 84 | 
 85 |   @staticmethod
 86 |   def __nonzeros(shape, dtype, val):
 87 |     return Variable(val * XP.__lib.ones(shape, dtype=dtype))
 88 | 
 89 |   @staticmethod
 90 |   def fnonzeros(shape, val=1):
 91 |     return XP.__nonzeros(shape, XP.__lib.float32, val)
 92 | 
 93 |   @staticmethod
 94 |   def __array(array, dtype):
 95 |     return Variable(XP.__lib.array(array, dtype=dtype))
 96 | 
 97 |   @staticmethod
 98 |   def iarray(array):
 99 |     return XP.__array(array, XP.__lib.int32)
100 | 
101 |   @staticmethod
102 |   def farray(array):
103 |     return XP.__array(array, XP.__lib.float32)
104 | 
105 | class SrcEmbed(Chain):
106 |   def __init__(self, vocab_size, embed_size):
107 |     super(SrcEmbed, self).__init__(
108 |         xe = links.EmbedID(vocab_size, embed_size),
109 |     )
110 | 
111 |   def __call__(self, x):
112 |     return functions.tanh(self.xe(x))
113 | 
114 | 
115 | 
116 | 
117 | class MultiLayerStatefulLSTMEncoder(ChainList):
118 |   """
119 |   This is an implementation of a Multilayered Stateful LSTM.
120 |   The underlying idea is to simply stack multiple LSTMs where the LSTM at the bottom takes the regular input,
121 |   and the LSTMs after that simply take the outputs (represented by h) of the previous LSMTs as inputs.
122 |   This is simply an analogous version of the Multilayered Stateless LSTM Encoder where the LSTM states are kept hidden.
123 |   This LSTM is to be called only by passing the input (x).
124 |   To access the cell states you must call the "get_states" function with parameter "num_layers" indicating the number of layers.
125 |   Although the cell outputs for each layer are returned, typically only the one of the topmost layer is used for various purposes like attention.
126 |   Note that in Tensorflow the concept of "number of attention heads" is used which probably points to attention using the output of each layer.
127 | 
128 |   Args: 
129 |         embed_size - The size of embeddings of the inputs
130 |         hidden_size - The size of the hidden layer representation of the RNN
131 |         num_layers - The number of layers of the RNN (Indicates the number of RNNS stacked on top of each other)
132 | 
133 |   Attributes: 
134 |         num_layers: Indicates the number of layers in the RNN
135 |   User Defined Methods:
136 |         get_states: This simply returns the latest cell states (c) as an array for all layers.
137 | 
138 |   """
139 | 
140 |   def __init__(self, embed_size, hidden_size, num_layers):
141 |     super(MultiLayerStatefulLSTMEncoder, self).__init__()
142 |     self.add_link(links.LSTM(embed_size,hidden_size))
143 |     for i in range(1, num_layers):
144 |       self.add_link(links.LSTM(hidden_size, hidden_size))
145 |     self.num_layers = num_layers
146 |       
147 |   def __call__(self, x):
148 |     """
149 |     Updates the internal state and returns the RNN outputs for each layer as a list.
150 | 
151 |     Args:
152 |         x : A new batch from the input sequence.
153 | 
154 |     Returns:
155 |         A list of the outputs (h) of updated RNN units over all the layers.
156 | 
157 |     """
158 |     h_list = []
159 |     h_curr = self[0](x)
160 |     h_list.append(h_curr)
161 |     for i in range(1,self.num_layers):
162 |       h_curr = self[1](h_curr)
163 |       h_list.append(h_curr)
164 |     return h_list
165 | 
166 |   def get_states():
167 |     c_list = []
168 |     for i in range(self.num_layers):
169 |       c_list.append(self[i].c)
170 |     return c_list
171 | 
172 | class MultiLayerStatelessLSTMEncoder(ChainList):
173 |   """
174 |   This is an implementation of a Multilayered Stateless LSTM.
175 |   The underlying idea is to simply stack multiple LSTMs where the LSTM at the bottom takes the regular input,
176 |   and the LSTMs after that simply take the outputs (represented by h) of the previous LSMTs as inputs.
177 |   This is simply an analogous version of the Multilayered Stateful LSTM Encoder where the LSTM states are not hidden.
178 |   You have to pass the previous cell states (c) and outputs (h) along with the input (x) when calling the LSTM.
179 |   Although the cell outputs for each layer are returned, typically only the one of the topmost layer is used for various purposes like attention.
180 |   Note that in Tensorflow the concept of "number of attention heads" is used which probably points to attention using the output of each layer.
181 | 
182 |   Args: 
183 |         embed_size - The size of embeddings of the inputs
184 |         hidden_size - The size of the hidden layer representation of the RNN
185 |         num_layers - The number of layers of the RNN (Indicates the number of RNNS stacked on top of each other)
186 | 
187 |   Attributes: 
188 |         num_layers: Indicates the number of layers in the RNN
189 |   User Defined Methods:
190 |         
191 |   """
192 |   def __init__(self, embed_size, hidden_size, num_layers):
193 |     super(MultiLayerStatelessLSTMEncoder, self).__init__()
194 | 
195 |     self.add_link(links.Linear(embed_size, 4 * hidden_size))
196 |     self.add_link(links.Linear(hidden_size, 4 * hidden_size))
197 |     for i in range(1,num_layers):
198 |       self.add_link(links.Linear(hidden_size, 4 * hidden_size))
199 |       self.add_link(links.Linear(hidden_size, 4 * hidden_size))
200 |     self.num_layers = num_layers
201 |   def __call__(self, x, c, h):
202 |     """
203 |     Updates the internal state and returns the RNN outputs for each layer as a list.
204 | 
205 |     Args:
206 |         x : A new batch from the input sequence.
207 |         c : The list of the previous cell states.
208 |         h : The list of the previous cell outputs.
209 |     Returns:
210 |         A list of the outputs (h) and another of the states (c) of the updated RNN units over all the layers.
211 | 
212 |     """
213 |     c_list = []
214 |     h_list = []
215 |     c_curr, h_curr = functions.lstm(c[0], self[0](x) + self[1](h[0]))
216 |     c_list.append(c_curr)
217 |     h_list.append(h_curr)
218 |     for i in range(1,self.num_layers):
219 |       c_curr, h_curr = functions.lstm(c[i], self[(i*num_layers)+0](h_curr) + self[(i*num_layers)+1](h[i]))
220 |       c_list.append(c_curr)
221 |       h_list.append(h_curr)
222 |     return c_list, h_list
223 | 
224 | class MultiLayerGRUEncoder(ChainList):
225 |   """
226 |   This is an implementation of a Multilayered Stateless GRU.
227 |   The underlying idea is to simply stack multiple GRUs where the GRU at the bottom takes the regular input,
228 |   and the GRUs after that simply take the outputs (represented by h) of the previous GRUs as inputs.
229 |   You have to pass the previous cell outputs (h) along with the input (x) when calling the LSTM.
230 |   The implementation for the Stateful GRU just saves the cell state and thus its multilayered version wont be implemented unless demanded.
231 | 
232 |   Args: 
233 |         embed_size - The size of embeddings of the inputs
234 |         hidden_size - The size of the hidden layer representation of the RNN
235 |         num_layers - The number of layers of the RNN (Indicates the number of RNNS stacked on top of each other)
236 | 
237 |   Attributes: 
238 |         num_layers: Indicates the number of layers in the RNN
239 |   User Defined Methods:
240 |         
241 |   """
242 | 
243 |   def __init__(self, embed_size, hidden_size, num_layers):
244 |     super(MultiLayerGRUEncoder, self).__init__()
245 |     self.add_link(links.GRU(hidden_size,embed_size))
246 |     for i in num_layers:
247 |       self.add_link(links.GRU(hidden_size,hidden_size))
248 |     self.num_layers = num_layers
249 | 
250 |   def __call__(self, x, h):
251 |     """
252 |     Updates the internal state and returns the RNN outputs for each layer as a list.
253 | 
254 |     Args:
255 |         x : A new batch from the input sequence.
256 |         h : The list of the previous cell outputs.
257 |     Returns:
258 |         A list of the outputs (h) of the updated RNN units over all the layers.
259 | 
260 |     """
261 |     h_list = []
262 |     h_curr = self[0](h[0], x)
263 |     h_list.append(h_curr)
264 |     for i in range(1,self.num_layers):
265 |       h_curr = self[i](h[i], h_curr)
266 |       h_list.append(h_curr)
267 |     return h_list
268 | 
269 | 
270 | class GRUEncoder(Chain):
271 |   
272 |   """
273 |   This is just the same Encoder as below.
274 |   The only difference is that the RNN cell is a GRU.
275 |   
276 | 
277 |   Args: 
278 |         embed_size - The size of embeddings of the inputs
279 |         hidden_size - The size of the hidden layer representation of the RNN
280 |         
281 | 
282 |   Attributes: 
283 |         
284 |   User Defined Methods:
285 |   
286 |   """
287 | 
288 |   def __init__(self, embed_size, hidden_size):
289 |     super(Encoder, self).__init__(
290 |         GRU = links.GRU(embed_size, hidden_size),
291 |     )
292 | 
293 |   def __call__(self, x):
294 |     """
295 |     Updates the internal state and returns the RNN output (h).
296 |     Note that for a GRU the internal state is the same as the output. (c and h are the same)
297 | 
298 |     Args:
299 |         x : A new batch from the input sequence.
300 | 
301 |     Returns:
302 |         The output (h) of updated RNN unit.
303 | 
304 |     """
305 |     return self.GRU(x)
306 | 
307 | class StatefulEncoder(Chain):
308 |   
309 |   """
310 |   This is just the same Encoder as below.
311 |   The only difference is that the LSTM class implementation is used instead of the LSTM function.
312 |   Instead of explicitly defining the LSTM components, the LSTM class encapsulates these components making the Encoder look simpler.
313 | 
314 |   Args: 
315 |         embed_size - The size of embeddings of the inputs
316 |         hidden_size - The size of the hidden layer representation of the RNN
317 |         
318 | 
319 |   Attributes: 
320 |         
321 |   User Defined Methods:
322 |         get_state: This simply returns the latest cell state (c).
323 |   """
324 | 
325 |   def __init__(self, embed_size, hidden_size):
326 |     super(Encoder, self).__init__(
327 |         LSTM = links.LSTM(embed_size, hidden_size),
328 |     )
329 | 
330 |   def __call__(self, x):
331 |     """
332 |     Updates the internal state and returns the RNN output (h).
333 | 
334 |     Args:
335 |         x : A new batch from the input sequence.
336 | 
337 |     Returns:
338 |         The output (h) of updated RNN unit.
339 | 
340 |     """
341 |     return self.LSTM(x)
342 | 
343 |   def get_state():
344 |     return self.LSTM.c
345 | 
346 | class StateLessEncoder(Chain):
347 |   """
348 |   This is just the same Encoder as below. The name is changed for the sake of disambiguation.
349 |   The LSTM components are explicitly defined and the LSTM function is used in place of the LSTM class.
350 | 
351 |   Args: 
352 |         embed_size - The size of embeddings of the inputs
353 |         hidden_size - The size of the hidden layer representation of the RNN
354 |         
355 | 
356 |   Attributes: 
357 |         
358 |   User Defined Methods:
359 |   """
360 |   def __init__(self, embed_size, hidden_size):
361 |     super(Encoder, self).__init__(
362 |         xh = links.Linear(embed_size, 4 * hidden_size),
363 |         hh = links.Linear(hidden_size, 4 * hidden_size),
364 |     )
365 | 
366 |   def __call__(self, x, c, h):
367 |     """
368 |     Updates the internal state and returns the RNN outputs for each layer as a list.
369 | 
370 |     Args:
371 |         x : A new batch from the input sequence.
372 |         c : The previous cell state.
373 |         h : The previous cell output.
374 |     Returns:
375 |         The output (h) and the state (c) of the updated RNN unit.
376 | 
377 |     """
378 |     return functions.lstm(c, self.xh(x) + self.hh(h))
379 |     
380 | class Encoder(Chain):
381 |   def __init__(self, embed_size, hidden_size):
382 |     super(Encoder, self).__init__(
383 |         xh = links.Linear(embed_size, 4 * hidden_size),
384 |         hh = links.Linear(hidden_size, 4 * hidden_size),
385 |     )
386 | 
387 |   def __call__(self, x, c, h):
388 |     return functions.lstm(c, self.xh(x) + self.hh(h))
389 | 
390 | class Attention(Chain):
391 |   def __init__(self, hidden_size):
392 |     super(Attention, self).__init__(
393 |         aw = links.Linear(hidden_size, hidden_size),
394 |         bw = links.Linear(hidden_size, hidden_size),
395 |         pw = links.Linear(hidden_size, hidden_size),
396 |         we = links.Linear(hidden_size, 1),
397 |     )
398 |     self.hidden_size = hidden_size
399 | 
400 |   def __call__(self, a_list, b_list, p):
401 |     batch_size = p.data.shape[0]
402 |     e_list = []
403 |     sum_e = XP.fzeros((batch_size, 1))
404 |     for a, b in zip(a_list, b_list):
405 |       w = functions.tanh(self.aw(a) + self.bw(b) + self.pw(p))
406 |       e = functions.exp(self.we(w))
407 |       e_list.append(e)
408 |       sum_e += e
409 |     ZEROS = XP.fzeros((batch_size, self.hidden_size))
410 |     aa = ZEROS
411 |     bb = ZEROS
412 |     for a, b, e in zip(a_list, b_list, e_list):
413 |       e /= sum_e
414 |       aa += functions.reshape(functions.batch_matmul(a, e), (batch_size, self.hidden_size))
415 |       bb += functions.reshape(functions.batch_matmul(b, e), (batch_size, self.hidden_size))
416 |     return aa, bb
417 | 
418 | class LocalAttention(Chain):
419 |   def __init__(self, hidden_size):
420 |     super(Attention, self).__init__(
421 |         aw = links.Linear(hidden_size, hidden_size),
422 |         bw = links.Linear(hidden_size, hidden_size),
423 |         pw = links.Linear(hidden_size, hidden_size),
424 |         we = links.Linear(hidden_size, 1),
425 |         ts = links.Linear(hidden_size, hidden_size),
426 |         sp = links.Linear(hidden_size, 1),
427 |     )
428 |     self.hidden_size = hidden_size
429 | 
430 |   def __call__(self, a_list, b_list, p, sentence_length, window_size):
431 |     batch_size = p.data.shape[0]
432 |     SENTENCE_LENGTH = XP.fnonzeros((batch_size, 1),sentence_length)
433 |     e_list = []
434 |     sum_e = XP.fzeros((batch_size, 1))
435 |     s = functions.tanh(self.ts(p))
436 |     pos =  SENTENCE_LENGTH * functions.sigmoid(self.sp(s))
437 | 
438 |     # Develop batch logic to set to zero the components of a and b which are out of the window
439 |     # Big question: Do I have to iterate over each element in the batch? That would suck.
440 |     # One logic: Get global alignment matrix of (batch x) hidden size x sentence length and then another matrix of (batch x) sentence length which
441 |     # will essentially be a matrix containing the gaussian distrubution weight and there will be zeros where the sentence position falls out of the window
442 |     # Another logic: Create a matrix of (batch x) sentence length where there will be 1 for each position in the window
443 | 
444 |     # Separate the attention weights for a and b cause forward is different from backward.
445 | 
446 |     for a, b in zip(a_list, b_list):
447 |       w = functions.tanh(self.aw(a) + self.bw(b) + self.pw(p))
448 |       e = functions.exp(self.we(w))
449 |       e_list.append(e)
450 |       sum_e += e
451 |     ZEROS = XP.fzeros((batch_size, self.hidden_size))
452 |     aa = ZEROS
453 |     bb = ZEROS
454 |     for a, b, e in zip(a_list, b_list, e_list):
455 |       e /= sum_e
456 |       aa += a * e
457 |       bb += b * e
458 |     return aa, bb
459 | 
460 | 
461 | class Decoder(Chain):
462 |   def __init__(self, vocab_size, embed_size, hidden_size):
463 |     super(Decoder, self).__init__(
464 |         ye = links.EmbedID(vocab_size, embed_size),
465 |         eh = links.Linear(embed_size, 4 * hidden_size),
466 |         hh = links.Linear(hidden_size, 4 * hidden_size),
467 |         ah = links.Linear(hidden_size, 4 * hidden_size),
468 |         bh = links.Linear(hidden_size, 4 * hidden_size),
469 |         hf = links.Linear(hidden_size, embed_size),
470 |         fy = links.Linear(embed_size, vocab_size),
471 |     )
472 | 
473 |   def __call__(self, y, c, h, a, b):
474 |     e = functions.tanh(self.ye(y))
475 |     c, h = functions.lstm(c, self.eh(e) + self.hh(h) + self.ah(a) + self.bh(b))
476 |     f = functions.tanh(self.hf(h))
477 |     return self.fy(f), c, h
478 | 
479 | class AttentionMT(Chain):
480 |   def __init__(self, vocab_size, embed_size, hidden_size):
481 |     super(AttentionMT, self).__init__(
482 |         emb = SrcEmbed(vocab_size, embed_size),
483 |         fenc = Encoder(embed_size, hidden_size),
484 |         benc = Encoder(embed_size, hidden_size),
485 |         att = Attention(hidden_size),
486 |         dec = Decoder(vocab_size, embed_size, hidden_size),
487 |     )
488 |     self.vocab_size = vocab_size
489 |     self.embed_size = embed_size
490 |     self.hidden_size = hidden_size
491 | 
492 |   def reset(self, batch_size):
493 |     self.zerograds()
494 |     self.x_list = []
495 | 
496 |   def embed(self, x):
497 |     self.x_list.append(self.emb(x))
498 | 
499 |   def encode(self):
500 |     src_len = len(self.x_list)
501 |     batch_size = self.x_list[0].data.shape[0]
502 |     ZEROS = XP.fzeros((batch_size, self.hidden_size))
503 |     c = ZEROS
504 |     a = ZEROS
505 |     a_list = []
506 |     for x in self.x_list:
507 |       c, a = self.fenc(x, c, a)
508 |       a_list.append(a)
509 |     c = ZEROS
510 |     b = ZEROS
511 |     b_list = []
512 |     for x in reversed(self.x_list):
513 |       c, b = self.benc(x, c, b)
514 |       b_list.insert(0, b)
515 |     self.a_list = a_list
516 |     self.b_list = b_list
517 |     self.c = ZEROS
518 |     self.h = ZEROS
519 | 
520 |   def decode(self, y):
521 |     aa, bb = self.att(self.a_list, self.b_list, self.h)
522 |     y, self.c, self.h = self.dec(y, self.c, self.h, aa, bb)
523 |     return y
524 | 
525 |   def save_spec(self, filename):
526 |     with open(filename, 'w') as fp:
527 |       print(self.vocab_size, file=fp)
528 |       print(self.embed_size, file=fp)
529 |       print(self.hidden_size, file=fp)
530 | 
531 |   @staticmethod
532 |   def load_spec(filename):
533 |     with open(filename) as fp:
534 |       vocab_size = int(next(fp))
535 |       embed_size = int(next(fp))
536 |       hidden_size = int(next(fp))
537 |       return AttentionMT(vocab_size, embed_size, hidden_size)
538 | 
539 | def forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, is_training, generation_limit):
540 |   batch_size = len(src_batch)
541 |   src_len = len(src_batch[0])
542 |   trg_len = len(trg_batch[0]) if trg_batch else 0
543 |   src_stoi = src_vocab.stoi
544 |   trg_stoi = trg_vocab.stoi
545 |   trg_itos = trg_vocab.itos
546 |   attmt.reset(batch_size)
547 | 
548 |   x = XP.iarray([src_stoi('<s>') for _ in range(batch_size)])
549 |   attmt.embed(x)
550 |   for l in range(src_len):
551 |     x = XP.iarray([src_stoi(src_batch[k][l]) for k in range(batch_size)])
552 |     attmt.embed(x)
553 |   x = XP.iarray([src_stoi('</s>') for _ in range(batch_size)])
554 |   attmt.embed(x)
555 | 
556 |   attmt.encode()
557 |   
558 |   t = XP.iarray([trg_stoi('<s>') for _ in range(batch_size)])
559 |   hyp_batch = [[] for _ in range(batch_size)]
560 | 
561 |   if is_training:
562 |     loss = XP.fzeros(())
563 |     for l in range(trg_len):
564 |       y = attmt.decode(t)
565 |       t = XP.iarray([trg_stoi(trg_batch[k][l]) for k in range(batch_size)])
566 |       loss += functions.softmax_cross_entropy(y, t)
567 |       output = cuda.to_cpu(y.data.argmax(1))
568 |       for k in range(batch_size):
569 |         hyp_batch[k].append(trg_itos(output[k]))
570 |     return hyp_batch, loss
571 |   
572 |   else:
573 |     while len(hyp_batch[0]) < generation_limit:
574 |       y = attmt.decode(t)
575 |       output = cuda.to_cpu(y.data.argmax(1))
576 |       t = XP.iarray(output)
577 |       for k in range(batch_size):
578 |         hyp_batch[k].append(trg_itos(output[k]))
579 |       if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)):
580 |         break
581 | 
582 |     return hyp_batch
583 | 
584 | def train(args):
585 |   trace('making vocabularies ...')
586 |   src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
587 |   trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)
588 | 
589 |   trace('making model ...')
590 |   attmt = AttentionMT(args.vocab, args.embed, args.hidden)
591 |   if args.use_gpu:
592 |     attmt.to_gpu()
593 | 
594 |   for epoch in range(args.epoch):
595 |     trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
596 |     trained = 0
597 |     gen1 = gens.word_list(args.source)
598 |     gen2 = gens.word_list(args.target)
599 |     gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
600 |     opt = optimizers.AdaGrad(lr = 0.01)
601 |     opt.setup(attmt)
602 |     opt.add_hook(optimizer.GradientClipping(5))
603 | 
604 |     for src_batch, trg_batch in gen3:
605 |       src_batch = fill_batch(src_batch)
606 |       trg_batch = fill_batch(trg_batch)
607 |       K = len(src_batch)
608 |       hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, True, 0)
609 |       loss.backward()
610 |       opt.update()
611 | 
612 |       for k in range(K):
613 |         trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
614 |         trace('  src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
615 |         trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
616 |         trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))
617 | 
618 |       trained += K
619 | 
620 |     trace('saving model ...')
621 |     prefix = args.model + '.%03.d' % (epoch + 1)
622 |     src_vocab.save(prefix + '.srcvocab')
623 |     trg_vocab.save(prefix + '.trgvocab')
624 |     attmt.save_spec(prefix + '.spec')
625 |     serializers.save_hdf5(prefix + '.weights', attmt)
626 | 
627 |   trace('finished.')
628 | 
629 | def test(args):
630 |   trace('loading model ...')
631 |   src_vocab = Vocabulary.load(args.model + '.srcvocab')
632 |   trg_vocab = Vocabulary.load(args.model + '.trgvocab')
633 |   attmt = AttentionMT.load_spec(args.model + '.spec')
634 |   if args.use_gpu:
635 |     attmt.to_gpu()
636 |   serializers.load_hdf5(args.model + '.weights', attmt)
637 |   
638 |   trace('generating translation ...')
639 |   generated = 0
640 | 
641 |   with open(args.target, 'w') as fp:
642 |     for src_batch in gens.batch(gens.word_list(args.source), args.minibatch):
643 |       src_batch = fill_batch(src_batch)
644 |       K = len(src_batch)
645 | 
646 |       trace('sample %8d - %8d ...' % (generated + 1, generated + K))
647 |       hyp_batch = forward(src_batch, None, src_vocab, trg_vocab, attmt, False, args.generation_limit)
648 | 
649 |       for hyp in hyp_batch:
650 |         hyp.append('</s>')
651 |         hyp = hyp[:hyp.index('</s>')]
652 |         print(' '.join(hyp), file=fp)
653 | 
654 |       generated += K
655 | 
656 |   trace('finished.')
657 | 
658 | def main():
659 |   args = parse_args()
660 |   XP.set_library(args)
661 |   if args.mode == 'train': train(args)
662 |   elif args.mode == 'test': test(args)
663 | 
664 | if __name__ == '__main__':
665 |   main()
666 | 
667 | 


--------------------------------------------------------------------------------
/chainer-1.5/mt_s2s_encdec.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy
  3 | from argparse import ArgumentParser
  4 | from chainer import Chain, Variable, cuda, functions, links, optimizer, optimizers, serializers
  5 | import util.generators as gens
  6 | from util.functions import trace, fill_batch
  7 | from util.vocabulary import Vocabulary
  8 | 
  9 | def parse_args():
 10 |   def_gpu_device = 0
 11 |   def_vocab = 1000
 12 |   def_embed = 100
 13 |   def_hidden = 200
 14 |   def_epoch = 10
 15 |   def_minibatch = 64
 16 |   def_generation_limit = 128
 17 | 
 18 |   p = ArgumentParser(
 19 |     description='Encoder-decoder neural machine trainslation',
 20 |     usage=
 21 |       '\n  %(prog)s train [options] source target model'
 22 |       '\n  %(prog)s test source target model'
 23 |       '\n  %(prog)s -h',
 24 |   )
 25 | 
 26 |   p.add_argument('mode', help='\'train\' or \'test\'')
 27 |   p.add_argument('source', help='[in] source corpus')
 28 |   p.add_argument('target', help='[in/out] target corpus')
 29 |   p.add_argument('model', help='[in/out] model file')
 30 |   p.add_argument('--use-gpu', action='store_true', default=False,
 31 |     help='use GPU calculation')
 32 |   p.add_argument('--gpu-device', default=def_gpu_device, metavar='INT', type=int,
 33 |     help='GPU device ID to be used (default: %(default)d)')
 34 |   p.add_argument('--vocab', default=def_vocab, metavar='INT', type=int,
 35 |     help='vocabulary size (default: %(default)d)')
 36 |   p.add_argument('--embed', default=def_embed, metavar='INT', type=int,
 37 |     help='embedding layer size (default: %(default)d)')
 38 |   p.add_argument('--hidden', default=def_hidden, metavar='INT', type=int,
 39 |     help='hidden layer size (default: %(default)d)')
 40 |   p.add_argument('--epoch', default=def_epoch, metavar='INT', type=int,
 41 |     help='number of training epoch (default: %(default)d)')
 42 |   p.add_argument('--minibatch', default=def_minibatch, metavar='INT', type=int,
 43 |     help='minibatch size (default: %(default)d)')
 44 |   p.add_argument('--generation-limit', default=def_generation_limit, metavar='INT', type=int,
 45 |     help='maximum number of words to be generated for test input (default: %(default)d)')
 46 | 
 47 |   args = p.parse_args()
 48 | 
 49 |   # check args
 50 |   try:
 51 |     if args.mode not in ['train', 'test']: raise ValueError('you must set mode = \'train\' or \'test\'')
 52 |     if args.vocab < 1: raise ValueError('you must set --vocab >= 1')
 53 |     if args.embed < 1: raise ValueError('you must set --embed >= 1')
 54 |     if args.hidden < 1: raise ValueError('you must set --hidden >= 1')
 55 |     if args.epoch < 1: raise ValueError('you must set --epoch >= 1')
 56 |     if args.minibatch < 1: raise ValueError('you must set --minibatch >= 1')
 57 |     if args.generation_limit < 1: raise ValueError('you must set --generation-limit >= 1')
 58 |   except Exception as ex:
 59 |     p.print_usage(file=sys.stderr)
 60 |     print(ex, file=sys.stderr)
 61 |     sys.exit()
 62 | 
 63 |   return args
 64 | 
 65 | class XP:
 66 |   __lib = None
 67 | 
 68 |   @staticmethod
 69 |   def set_library(args):
 70 |     if args.use_gpu:
 71 |       XP.__lib = cuda.cupy
 72 |       cuda.get_device(args.gpu_device).use()
 73 |     else:
 74 |       XP.__lib = numpy
 75 | 
 76 |   @staticmethod
 77 |   def __zeros(shape, dtype):
 78 |     return Variable(XP.__lib.zeros(shape, dtype=dtype))
 79 | 
 80 |   @staticmethod
 81 |   def fzeros(shape):
 82 |     return XP.__zeros(shape, XP.__lib.float32)
 83 | 
 84 |   @staticmethod
 85 |   def __array(array, dtype):
 86 |     return Variable(XP.__lib.array(array, dtype=dtype))
 87 | 
 88 |   @staticmethod
 89 |   def iarray(array):
 90 |     return XP.__array(array, XP.__lib.int32)
 91 | 
 92 |   @staticmethod
 93 |   def farray(array):
 94 |     return XP.__array(array, XP.__lib.float32)
 95 | 
 96 | class Encoder(Chain):
 97 |   def __init__(self, vocab_size, embed_size, hidden_size):
 98 |     super(Encoder, self).__init__(
 99 |         xe = links.EmbedID(vocab_size, embed_size),
100 |         eh = links.Linear(embed_size, 4 * hidden_size),
101 |         hh = links.Linear(hidden_size, 4 * hidden_size),
102 |     )
103 | 
104 |   def __call__(self, x, c, h):
105 |     e = functions.tanh(self.xe(x))
106 |     return functions.lstm(c, self.eh(e) + self.hh(h)) 
107 | 
108 | class Decoder(Chain):
109 |   def __init__(self, vocab_size, embed_size, hidden_size):
110 |     super(Decoder, self).__init__(
111 |         ye = links.EmbedID(vocab_size, embed_size),
112 |         eh = links.Linear(embed_size, 4 * hidden_size),
113 |         hh = links.Linear(hidden_size, 4 * hidden_size),
114 |         hf = links.Linear(hidden_size, embed_size),
115 |         fy = links.Linear(embed_size, vocab_size),
116 |     )
117 | 
118 |   def __call__(self, y, c, h):
119 |     e = functions.tanh(self.ye(y))
120 |     c, h = functions.lstm(c, self.eh(e) + self.hh(h))
121 |     f = functions.tanh(self.hf(h))
122 |     return self.fy(f), c, h
123 | 
124 | class EncoderDecoder(Chain):
125 |   def __init__(self, vocab_size, embed_size, hidden_size):
126 |     super(EncoderDecoder, self).__init__(
127 |         enc = Encoder(vocab_size, embed_size, hidden_size),
128 |         dec = Decoder(vocab_size, embed_size, hidden_size),
129 |     )
130 |     self.vocab_size = vocab_size
131 |     self.embed_size = embed_size
132 |     self.hidden_size = hidden_size
133 | 
134 |   def reset(self, batch_size):
135 |     self.zerograds()
136 |     self.c = XP.fzeros((batch_size, self.hidden_size))
137 |     self.h = XP.fzeros((batch_size, self.hidden_size))
138 | 
139 |   def encode(self, x):
140 |     self.c, self.h = self.enc(x, self.c, self.h)
141 | 
142 |   def decode(self, y):
143 |     y, self.c, self.h = self.dec(y, self.c, self.h)
144 |     return y
145 | 
146 |   def save_spec(self, filename):
147 |     with open(filename, 'w') as fp:
148 |       print(self.vocab_size, file=fp)
149 |       print(self.embed_size, file=fp)
150 |       print(self.hidden_size, file=fp)
151 | 
152 |   @staticmethod
153 |   def load_spec(filename):
154 |     with open(filename) as fp:
155 |       vocab_size = int(next(fp))
156 |       embed_size = int(next(fp))
157 |       hidden_size = int(next(fp))
158 |       return EncoderDecoder(vocab_size, embed_size, hidden_size)
159 | 
160 | def forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, is_training, generation_limit):
161 |   batch_size = len(src_batch)
162 |   src_len = len(src_batch[0])
163 |   trg_len = len(trg_batch[0]) if trg_batch else 0
164 |   src_stoi = src_vocab.stoi
165 |   trg_stoi = trg_vocab.stoi
166 |   trg_itos = trg_vocab.itos
167 |   encdec.reset(batch_size)
168 | 
169 |   x = XP.iarray([src_stoi('</s>') for _ in range(batch_size)])
170 |   encdec.encode(x)
171 |   for l in reversed(range(src_len)):
172 |     x = XP.iarray([src_stoi(src_batch[k][l]) for k in range(batch_size)])
173 |     encdec.encode(x)
174 |   
175 |   t = XP.iarray([trg_stoi('<s>') for _ in range(batch_size)])
176 |   hyp_batch = [[] for _ in range(batch_size)]
177 | 
178 |   if is_training:
179 |     loss = XP.fzeros(())
180 |     for l in range(trg_len):
181 |       y = encdec.decode(t)
182 |       t = XP.iarray([trg_stoi(trg_batch[k][l]) for k in range(batch_size)])
183 |       loss += functions.softmax_cross_entropy(y, t)
184 |       output = cuda.to_cpu(y.data.argmax(1))
185 |       for k in range(batch_size):
186 |         hyp_batch[k].append(trg_itos(output[k]))
187 |     return hyp_batch, loss
188 |   
189 |   else:
190 |     while len(hyp_batch[0]) < generation_limit:
191 |       y = encdec.decode(t)
192 |       output = cuda.to_cpu(y.data.argmax(1))
193 |       t = XP.iarray(output)
194 |       for k in range(batch_size):
195 |         hyp_batch[k].append(trg_itos(output[k]))
196 |       if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)):
197 |         break
198 | 
199 |     return hyp_batch
200 | 
201 | def train(args):
202 |   trace('making vocabularies ...')
203 |   src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
204 |   trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)
205 | 
206 |   trace('making model ...')
207 |   encdec = EncoderDecoder(args.vocab, args.embed, args.hidden)
208 |   if args.use_gpu:
209 |     encdec.to_gpu()
210 | 
211 |   for epoch in range(args.epoch):
212 |     trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
213 |     trained = 0
214 |     gen1 = gens.word_list(args.source)
215 |     gen2 = gens.word_list(args.target)
216 |     gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
217 |     opt = optimizers.AdaGrad(lr = 0.01)
218 |     opt.setup(encdec)
219 |     opt.add_hook(optimizer.GradientClipping(5))
220 | 
221 |     for src_batch, trg_batch in gen3:
222 |       src_batch = fill_batch(src_batch)
223 |       trg_batch = fill_batch(trg_batch)
224 |       K = len(src_batch)
225 |       hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0)
226 |       loss.backward()
227 |       opt.update()
228 | 
229 |       for k in range(K):
230 |         trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
231 |         trace('  src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
232 |         trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
233 |         trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))
234 | 
235 |       trained += K
236 | 
237 |     trace('saving model ...')
238 |     prefix = args.model + '.%03.d' % (epoch + 1)
239 |     src_vocab.save(prefix + '.srcvocab')
240 |     trg_vocab.save(prefix + '.trgvocab')
241 |     encdec.save_spec(prefix + '.spec')
242 |     serializers.save_hdf5(prefix + '.weights', encdec)
243 | 
244 |   trace('finished.')
245 | 
246 | def test(args):
247 |   trace('loading model ...')
248 |   src_vocab = Vocabulary.load(args.model + '.srcvocab')
249 |   trg_vocab = Vocabulary.load(args.model + '.trgvocab')
250 |   encdec = EncoderDecoder.load_spec(args.model + '.spec')
251 |   if args.use_gpu:
252 |     encdec.to_gpu()
253 |   serializers.load_hdf5(args.model + '.weights', encdec)
254 |   
255 |   trace('generating translation ...')
256 |   generated = 0
257 | 
258 |   with open(args.target, 'w') as fp:
259 |     for src_batch in gens.batch(gens.word_list(args.source), args.minibatch):
260 |       src_batch = fill_batch(src_batch)
261 |       K = len(src_batch)
262 | 
263 |       trace('sample %8d - %8d ...' % (generated + 1, generated + K))
264 |       hyp_batch = forward(src_batch, None, src_vocab, trg_vocab, encdec, False, args.generation_limit)
265 | 
266 |       for hyp in hyp_batch:
267 |         hyp.append('</s>')
268 |         hyp = hyp[:hyp.index('</s>')]
269 |         print(' '.join(hyp), file=fp)
270 | 
271 |       generated += K
272 | 
273 |   trace('finished.')
274 | 
275 | def main():
276 |   args = parse_args()
277 |   XP.set_library(args)
278 |   if args.mode == 'train': train(args)
279 |   elif args.mode == 'test': test(args)
280 | 
281 | if __name__ == '__main__':
282 |   main()
283 | 
284 | 


--------------------------------------------------------------------------------
/chainer-1.5/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/odashi/chainer_examples/b13ec64e5035b1eb75b873431786d880577b7370/chainer-1.5/util/__init__.py


--------------------------------------------------------------------------------
/chainer-1.5/util/functions.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import datetime
 3 | 
 4 | def trace(*args):
 5 |     print(datetime.datetime.now(), '...', *args, file=sys.stderr)
 6 |     sys.stderr.flush()
 7 | 
 8 | def fill_batch(batch, token='</s>'):
 9 |     max_len = max(len(x) for x in batch)
10 |     return [x + [token] * (max_len - len(x) + 1) for x in batch]
11 | 
12 | def fill_batch2(batch, start_token='<s>', end_token='</s>'):
13 |     max_len = max(len(x) for x in batch)
14 |     return [[start_token] + x + [end_token] * (max_len - len(x) + 1) for x in batch]
15 | 
16 | 


--------------------------------------------------------------------------------
/chainer-1.5/util/generators.py:
--------------------------------------------------------------------------------
 1 | def batch(generator, batch_size):
 2 |     batch = []
 3 |     is_tuple = False
 4 |     for l in generator:
 5 |         is_tuple = isinstance(l, tuple)
 6 |         batch.append(l)
 7 |         if len(batch) == batch_size:
 8 |             yield tuple(list(x) for x in zip(*batch)) if is_tuple else batch
 9 |             batch = []
10 |     if batch:
11 |         yield tuple(list(x) for x in zip(*batch)) if is_tuple else batch
12 | 
13 | def sorted_parallel(generator1, generator2, pooling, order=1):
14 |     gen1 = batch(generator1, pooling)
15 |     gen2 = batch(generator2, pooling)
16 |     for batch1, batch2 in zip(gen1, gen2):
17 |         #yield from sorted(zip(batch1, batch2), key=lambda x: len(x[1]))
18 |         for x in sorted(zip(batch1, batch2), key=lambda x: len(x[order])):
19 |             yield x
20 | 
21 | def word_list(filename):
22 |     with open(filename) as fp:
23 |         for l in fp:
24 |             yield l.split()
25 | 
26 | def letter_list(filename):
27 |     with open(filename) as fp:
28 |         for l in fp:
29 |             yield list(''.join(l.split()))
30 | 
31 | 


--------------------------------------------------------------------------------
/chainer-1.5/util/vocabulary.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | 
 3 | 
 4 | class Vocabulary:
 5 |   def __init__(self):
 6 |     pass
 7 | 
 8 |   def __len__(self):
 9 |     return self.__size
10 | 
11 |   def stoi(self, s):
12 |     return self.__stoi[s]
13 | 
14 |   def itos(self, i):
15 |     return self.__itos[i]
16 | 
17 |   @staticmethod
18 |   def new(list_generator, size):
19 |     self = Vocabulary()
20 |     self.__size = size
21 | 
22 |     word_freq = defaultdict(lambda: 0)
23 |     for words in list_generator:
24 |       for word in words:
25 |         word_freq[word] += 1
26 | 
27 |     self.__stoi = defaultdict(lambda: 0)
28 |     self.__stoi['<unk>'] = 0
29 |     self.__stoi['<s>'] = 1
30 |     self.__stoi['</s>'] = 2
31 |     self.__itos = [''] * self.__size
32 |     self.__itos[0] = '<unk>'
33 |     self.__itos[1] = '<s>'
34 |     self.__itos[2] = '</s>'
35 |     
36 |     for i, (k, v) in zip(range(self.__size - 3), sorted(word_freq.items(), key=lambda x: -x[1])):
37 |       self.__stoi[k] = i + 3
38 |       self.__itos[i + 3] = k
39 | 
40 |     return self
41 | 
42 |   def save(self, filename):
43 |     with open(filename, 'w') as fp:
44 |       print(self.__size, file=fp)
45 |       for i in range(self.__size):
46 |         print(self.__itos[i], file=fp)
47 | 
48 |   @staticmethod
49 |   def load(filename):
50 |     with open(filename) as fp:
51 |       self = Vocabulary()
52 |       self.__size = int(next(fp))
53 |       self.__stoi = defaultdict(lambda: 0)
54 |       self.__itos = [''] * self.__size
55 |       for i in range(self.__size):
56 |         s = next(fp).strip()
57 |         if s:
58 |           self.__stoi[s] = i
59 |           self.__itos[i] = s
60 |     
61 |     return self
62 | 
63 | 


--------------------------------------------------------------------------------