├── FSRNN.py
├── LICENSE
├── LNLSTM.py
├── README.md
├── config.py
├── data
    └── ptb
    │   ├── test
    │   ├── train
    │   └── valid
├── helper.py
├── main.py
└── reader.py


/FSRNN.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.autograd import Variable
 6 | 
 7 | class FSRNNCell(object):
 8 |     
 9 |     def __init__(self, fast_cells,slow_cell, keep_prob = 1.0, training = True):
10 |         super(FSRNNCell, self).__init__()
11 |         self.fast_layers = len(fast_cells)
12 |         assert self.fast_layers >=2 , 'Atleast 2 fast layers are needed.'
13 | 
14 |         self.fast_cells = fast_cells
15 |         self.slow_cell = slow_cell
16 |         self.keep_prob = keep_prob
17 |         if not training:
18 |             self.keep_prob = 1.0
19 | 
20 |         self.dropout = nn.Dropout(p = 1-self.keep_prob)
21 | 
22 |     def __call__(self,inputs,state):
23 |             F_state = state[0]
24 |             S_state = state[1]
25 | 
26 |             inputs = self.dropout(inputs)
27 |             
28 |             F_output, F_state = self.fast_cells[0](inputs, F_state)
29 |             
30 |             F_output_drop = self.dropout(F_output)
31 | 
32 |             S_output, S_state = self.slow_cell(F_output_drop, S_state)
33 |             S_output_drop = self.dropout(S_output)
34 | 
35 |             F_output, F_state = self.fast_cells[1](S_output_drop, F_state)
36 | 
37 |             for i in range(2, self.fast_layers):
38 |                 F_output, F_state = self.fast_cells[i](F_output[:, 0:1] * 0.0, F_state)
39 | 
40 | 
41 |             F_output_drop = self.dropout(F_output)
42 | 
43 |             return F_output_drop, (F_state, S_state)
44 | 
45 |     def zero_state(self, batch_size, dtype = torch.FloatTensor):
46 |             F_state = self.fast_cells[0].zero_state(batch_size, dtype)
47 |             S_state = self.slow_cell.zero_state(batch_size, dtype)
48 |             
49 | 
50 |             return (F_state, S_state)
51 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Ujjawal Prasad
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/LNLSTM.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.autograd import Variable
 6 | import torch.nn.functional as F
 7 | 
 8 | import numpy as np
 9 | 
10 | import helper
11 | import config
12 | 
13 | class LN_LSTMCell(object):
14 |     """docstring for LN_LSTMCell"""
15 |     def __init__(self,num_units, use_zoneout, is_training,
16 |                  zoneout_keep_h, zoneout_keep_c, f_bias = 0.5):
17 |         super(LN_LSTMCell, self).__init__()
18 | 
19 |         self.num_units = num_units
20 |         self.f_bias = f_bias
21 | 
22 |         self.use_zoneout  = use_zoneout
23 |         self.zoneout_keep_h = zoneout_keep_h
24 |         self.zoneout_keep_c = zoneout_keep_c
25 | 
26 |         self.is_training = is_training
27 | 
28 |     def __call__(self, x ,state):
29 |         h, c = state
30 |         h_size = self.num_units
31 |         x_size = int(x.size()[1])
32 |         
33 | 
34 |         W_xh = helper.orthogonal_initializer([x_size, 4 * h_size] , scale = 1.0)
35 | 
36 |         W_hh = helper.orthogonal_initializer([h_size, 4 * h_size] , scale = 1.0)
37 | 
38 |         bias = torch.zeros([4*h_size])
39 |         
40 |         print(x.shape, 'shape of x')
41 |         print(h.shape, 'shape of h')
42 |         
43 |         
44 |         W_full = np.concatenate((W_xh,W_hh),axis = 0)
45 |         concat = np.concatenate((x,h), axis = 1)
46 | 
47 |         concat = torch.mm(concat,W_full) + bias
48 |         concat = helper.layer_norm_all(concat, 4, h_size)
49 | 
50 |         i,j,f,o = torch.split(tensor = concat, split_size = int(concat.size()[1])//4, dim=1)
51 | 
52 |         new_c = c * F.sigmoid(f + self.f_bias) + F.sigmoid(i) * F.tanh(j)
53 |         new_h = F.tanh(helper.layer_norm(new_c)) * F.sigmoid(o)
54 | 
55 |         if self.use_zoneout:
56 |             new_h, new_c = helper.zoneout(new_h, new_c, h, c, self.zoneout_keep_h,
57 |                                           self.zoneout_keep_c, self.is_training)
58 | 
59 |         return new_h, (new_h, new_c)
60 | 
61 | 
62 |     def zero_state(self, batch_size, dtype):
63 |         h = torch.zeros([batch_size, self.num_units]).type(dtype)
64 |         c = torch.zeros([batch_size, self.num_units]).type(dtype)
65 |         return (h, c)
66 | 
67 | def repackage_hidden(h):
68 |         
69 |         if type(h) == Variable:
70 |             return Variable(h.data)
71 |         else:
72 |             return tuple(repackage_hidden(v) for v in h)
73 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # fast-slow-lstm
 2 | Fast-Slow Recurrent Neural Networks
 3 | 
 4 | 
 5 | This repo contains PyTorch Implementation of Fast-Slow Recurrent Neural Networks -
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | https://papers.nips.cc/paper/7173-fast-slow-recurrent-neural-networks.pdf
13 | 
14 | 
15 | 
16 | 
17 | 
18 | ![fsrnn](https://user-images.githubusercontent.com/16559097/34203667-024bd9e4-e5a2-11e7-9869-1f7dc234c038.png)
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | #-*- coding: utf-8 -*-
 2 | import argparse
 3 | import pickle
 4 | 
 5 | def str2bool(v):
 6 |   return v.lower() in ('true', '1')
 7 | 
 8 | 
 9 | arg_lists = []
10 | parser = argparse.ArgumentParser()
11 | 
12 | def add_argument_group(name):
13 |   arg = parser.add_argument_group(name)
14 |   arg_lists.append(arg)
15 |   return arg
16 | 
17 | 
18 | # Network
19 | net_arg = add_argument_group('Network')
20 | net_arg.add_argument('--num_steps', type=int, default=150, help='')
21 | net_arg.add_argument('--cell_size', type=int, default=700, help='')
22 | net_arg.add_argument('--hyper_size', type=int, default=400, help='')
23 | net_arg.add_argument('--embed_size', type=int, default=128, help='')
24 | net_arg.add_argument('--hidden_size', type=int, default=256, help='')
25 | net_arg.add_argument('--num_layers', type=int, default=2, help='')
26 | net_arg.add_argument('--fast_layers', type=int, default=2, help='')
27 | net_arg.add_argument('--zoneout_c', type=float, default=0.5, help='')
28 | net_arg.add_argument('--zoneout_h', type=float, default=0.9, help='')
29 | net_arg.add_argument('--keep_prob', type=float, default=0.65, help='')
30 | net_arg.add_argument('--input_dim', type=int, default=300, help='')
31 | net_arg.add_argument('--num_glimpse', type=int, default=1, help='')
32 | net_arg.add_argument('--use_terminal_symbol', type=str2bool, default=True, help='Not implemented yet')
33 | 
34 | # Data
35 | data_arg = add_argument_group('Data')
36 | data_arg.add_argument('--task', type=str, default='ptb')
37 | data_arg.add_argument('--batch_size', type=int, default=128)
38 | data_arg.add_argument('--vocab_size', type=int, default=50)
39 | data_arg.add_argument('--input_size', type=int, default=300)
40 | data_arg.add_argument('--min_data_length', type=int, default=5)
41 | data_arg.add_argument('--max_data_length', type=int, default=80)
42 | data_arg.add_argument('--train_num', type=int, default=1000000)
43 | data_arg.add_argument('--valid_num', type=int, default=1000)
44 | data_arg.add_argument('--test_num', type=int, default=1000)
45 | 
46 | # Training / test parameters
47 | train_arg = add_argument_group('Training')
48 | train_arg.add_argument('--is_train', type=str2bool, default=True, help='')
49 | train_arg.add_argument('--optimizer', type=str, default='rmsprop', help='')
50 | 
51 | train_arg.add_argument('--max_epoch', type=int, default=200, help='')
52 | train_arg.add_argument('--max_max_epoch', type=int, default=200, help='')
53 | 
54 | 
55 | train_arg.add_argument('--max_step', type=int, default=1000000, help='')
56 | train_arg.add_argument('--init_scale', type=float, default=0.002, help='')
57 | train_arg.add_argument('--lr_start', type=float, default=0.01, help='')
58 | train_arg.add_argument('--lr_decay_step', type=int, default=5000, help='')
59 | train_arg.add_argument('--lr_decay_rate', type=float, default= 0.1, help='')
60 | train_arg.add_argument('--max_grad_norm', type=float, default=1.0, help='')
61 | train_arg.add_argument('--checkpoint_secs', type=int, default=300, help='')
62 | 
63 | # Misc
64 | misc_arg = add_argument_group('Misc')
65 | misc_arg.add_argument('--log_step', type=int, default=2, help='')
66 | misc_arg.add_argument('--num_log_samples', type=int, default=3, help='')
67 | misc_arg.add_argument('--log_level', type=str, default='INFO', choices=['INFO', 'DEBUG', 'WARN'], help='')
68 | misc_arg.add_argument('--log_dir', type=str, default='logs')
69 | misc_arg.add_argument('--data_dir', type=str, default='data')
70 | misc_arg.add_argument('--output_dir', type=str, default='outputs')
71 | misc_arg.add_argument('--data_path', type=str, default='/Ujjawal/fast-slow-lstm/data' )
72 | misc_arg.add_argument('--debug', type=str2bool, default=False)
73 | misc_arg.add_argument('--gpu_memory_fraction', type=float, default=1.0)
74 | misc_arg.add_argument('--random_seed', type=int, default=123, help='')
75 | 
76 | def get_config():
77 |   config, unparsed = parser.parse_known_args()
78 |   return config
79 | 
80 | 


--------------------------------------------------------------------------------
/helper.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | import torch.nn as nn
 6 | from torch.autograd import Variable
 7 | 
 8 | 
 9 | def orthogonal(shape):
10 | 	flat_shape = (shape[0], np.prod(shape[1:]))
11 | 	a = np.random.normal(0.0, 1.0, flat_shape)
12 | 	u, _, v = np.linalg.svd(a, full_matrices=False)
13 | 	q = u if u.shape == flat_shape else v
14 | 	return torch.Tensor(q.reshape(shape))
15 | 
16 | def orthogonal_initializer(shape, scale=1.0, dtype=torch.FloatTensor):
17 | 	return torch.Tensor(orthogonal(shape) * scale).type(dtype)
18 | 	
19 | 
20 | def layer_norm_all(h, base, num_units):
21 | 	
22 | 	h_reshape = h_reshape.view([-1, base, num_units])
23 | 	mean = h_reshape.mean(dim = 2)
24 | 	temp = (h_reshape - mean)**2
25 | 	var = temp.mean(dim = 2)
26 | 
27 | 	epsilon = nn.init.constant(1e-3)
28 | 	dtype = torch.FloatTensor
29 | 	
30 | 	h = h_reshape.view([-1, base * num_units])
31 | 
32 | 	alpha = Variable(torch.ones(4*num_units).type(dtype), requires_grad=True)
33 | 
34 | 	bias = Variable(torch.ones(4*num_units).type(dtype), requires_grad=True)
35 | 
36 | 	return (h*alpha) + bias
37 | 
38 | 
39 | 
40 | def moments_for_layer_norm(x, axes=1, name=None):
41 | 	
42 | 	epsilon = 1e-3  # found this works best.
43 | 	if not isinstance(axes, int): axes = axes[0]
44 | 
45 | 	mean = x.mean(dim = axes)
46 | 
47 | 	variance = (((x-mean)**2).mean(dim = axes) + epsilon)**0.5
48 | 
49 | 	return mean, variance
50 | 
51 | 
52 | def layer_norm(x, alpha_start=1.0, bias_start=0.0):
53 | 	
54 | 	with tf.variable_scope(scope):
55 | 		num_units = int(x.size()[1])
56 | 
57 | 		alpha = Variable(torch.ones(4*num_units).type(dtype), requires_grad=True)
58 | 
59 | 		bias = Variable(torch.ones(4*num_units).type(dtype), requires_grad=True)
60 | 
61 | 		mean, variance = moments_for_layer_norm(x)
62 | 		y = (alpha * (x - mean)) / (variance) + bias
63 | 	return y
64 | 
65 | def zoneout(new_h, new_c, h, c, h_keep, c_keep, is_training):
66 | 	mask_c = torch.ones_like(c)
67 | 	mask_h = torch.ones_like(h)
68 | 
69 | 	c_dropout = nn.Dropout(p = 1-c_keep)
70 | 	h_dropout = nn.Dropout(p= 1-h_keep)
71 | 
72 | 	if is_training:
73 | 		mask_c = c_dropout(mask_c)
74 | 		mask_h = h_dropout(mask_h)
75 | 
76 | 	mask_c *= c_keep
77 | 	mask_h *= h_keep
78 | 
79 | 	h = new_h * mask_h + (-mask_h + 1.) * h
80 | 	c = new_c * mask_c + (-mask_c + 1.) * c
81 | 
82 | 	return h, c
83 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.autograd import Variable
  6 | import torch.nn.functional as F
  7 | 
  8 | import LNLSTM
  9 | import FSRNN
 10 | 
 11 | import reader
 12 | import config
 13 | 
 14 | import time
 15 | import numpy as np
 16 | 
 17 | criterion = nn.CrossEntropyLoss()
 18 | #optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
 19 | args = config.get_config()
 20 | 
 21 | 
 22 | class PTB_Model(nn.Module):
 23 |     def __init__(self, embedding_dim=args.hidden_size, num_steps=args.num_steps, batch_size=args.batch_size,
 24 |                   vocab_size=args.vocab_size, num_layers=args.num_layers, dp_keep_prob=args.keep_prob,name=None):
 25 |         super(PTB_Model, self).__init__()
 26 |         self.batch_size = batch_size  
 27 |         self.num_steps = num_steps 
 28 |         self.vocab_size = vocab_size
 29 | 
 30 |         self.F_size = args.cell_size
 31 |         self.S_size = args.hyper_size
 32 | 
 33 |         self.num_steps = num_steps
 34 |         self.emb_size = embedding_dim
 35 |         self.is_train = False
 36 | 
 37 |         self.embedding = nn.Embedding(self.vocab_size, self.emb_size)
 38 | 
 39 |         self.F_cells = [LNLSTM.LN_LSTMCell(self.F_size, use_zoneout=True, is_training=self.is_train,
 40 |                                            zoneout_keep_h=args.zoneout_h, zoneout_keep_c=args.zoneout_c)
 41 |                         for _ in range(args.fast_layers)]
 42 | 
 43 |         self.S_cell  = LNLSTM.LN_LSTMCell(self.S_size, use_zoneout=True, is_training=self.is_train,
 44 |                                           zoneout_keep_h=args.zoneout_h, zoneout_keep_c=args.zoneout_c)
 45 | 
 46 | 
 47 |         self.FS_cell = FSRNN.FSRNNCell(self.F_cells, self.S_cell, args.keep_prob, self.is_train)
 48 | 
 49 |         self._initial_state = self.FS_cell.zero_state(batch_size, torch.FloatTensor)
 50 | 
 51 |     def forward(self,inputs):
 52 |         state = self._initial_state
 53 |         outputs = []
 54 |         inputs = self.embedding(inputs)
 55 |         
 56 |         for time_step in range(self.num_steps):
 57 |             out , state = self.FS_cell(inputs[:,time_step,:],state)
 58 |             outputs.append(out)
 59 | 
 60 |         output = torch.cat(outputs,dim =1).view([-1,self.F_size])
 61 | 
 62 |         softmax_w = helper.orthogonal_initializer([self.F_size, self.vocab_size])
 63 |         softmax_b = helper.orthogonal_initializer([self.vocab_size])
 64 | 
 65 |         logits = torch.mm(output , softmax_w) + softmax_b
 66 | 
 67 |         return logits.view([self.num_steps,self.batch_size,self.vocab_size]), state
 68 | 
 69 | 
 70 | def run_epoch(model, data, is_train=False, lr=1.0):
 71 |     """Runs the model on the given data."""
 72 |     if is_train:
 73 |         model.is_train = True
 74 |     else:
 75 |         model.eval()
 76 |     
 77 |     epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps
 78 |     start_time = time.time()
 79 |     #hidden = model.init_hidden()
 80 |     costs = 0.0
 81 |     iters = 0.0
 82 | 
 83 |     for step, (x, y) in enumerate(reader.ptb_iterator(data, model.batch_size, model.num_steps)):
 84 |         inputs = Variable(torch.from_numpy(x.astype(np.int64)).transpose(0, 1).contiguous()).cuda()
 85 |         inputs = torch.transpose(inputs, 0, 1)
 86 |         
 87 |         model.zero_grad()
 88 |         #hidden = repackage_hidden(hidden)
 89 |         outputs, hidden = model(inputs)
 90 |         targets = Variable(torch.from_numpy(y.astype(np.int64)).transpose(0, 1).contiguous()).cuda()
 91 |         tt = torch.squeeze(targets.view(-1, model.batch_size * model.num_steps))
 92 | 
 93 |         loss = criterion(outputs.view(-1, model.vocab_size), tt)
 94 |         costs += loss.data[0] * model.num_steps
 95 |         iters += model.num_steps
 96 | 
 97 |         if is_train:
 98 |             loss.backward()
 99 |             torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
100 |             for p in model.parameters():
101 |                 p.data.add_(-lr, p.grad.data)
102 |             if step % (epoch_size // 10) == 10:
103 |                 print("{} perplexity: {:8.2f} speed: {} wps".format(step * 1.0 / epoch_size, np.exp(costs / iters),
104 |                                   iters * model.batch_size / (time.time() - start_time)))
105 |     return np.exp(costs / iters)
106 | 
107 | if __name__ == "__main__":
108 |     raw_data = reader.ptb_raw_data(data_path=args.data_path)
109 |     train_data, valid_data, test_data, word_to_id, id_to_word = raw_data
110 |     vocab_size = len(word_to_id)
111 |     print('Vocabulary size: {}'.format(vocab_size))
112 |     model = PTB_Model(embedding_dim=args.hidden_size, num_steps=args.num_steps, batch_size=args.batch_size,
113 |                       vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.keep_prob)
114 |     model.cuda()
115 |     lr = args.lr_start
116 |     # decay factor for learning rate
117 |     lr_decay_base = args.lr_decay_rate
118 |     # we will not touch lr for the first m_flat_lr epochs
119 |     m_flat_lr = 14.0
120 | 
121 |     print("########## Training ##########################")
122 | 
123 |     for epoch in range(args.max_max_epoch):
124 |         lr_decay = lr_decay_base ** max(epoch - m_flat_lr, 0)
125 |         lr = lr * lr_decay # decay lr if it is time
126 |         train_p = run_epoch(model, train_data, True, lr)
127 |         print('Train perplexity at epoch {}: {:8.2f}'.format(epoch, train_p))
128 |         print('Validation perplexity at epoch {}: {:8.2f}'.format(epoch, run_epoch(model, valid_data)))
129 | 
130 | 
131 |     print("########## Testing ##########################")
132 |     model.batch_size = 1 # to make sure we process all the data
133 |     print('Test Perplexity: {:8.2f}'.format(run_epoch(model, test_data)))
134 |     with open(args.save, 'wb') as f:
135 |         torch.save(model, f)
136 |     
137 | 


--------------------------------------------------------------------------------
/reader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import collections
  6 | import os
  7 | 
  8 | 
  9 | import torch 
 10 | import torch.nn as nn
 11 | from torch.autograd import Variable
 12 | 
 13 | 
 14 | import collections
 15 | import os
 16 | 
 17 | import numpy as np
 18 | import config
 19 | 
 20 | args = config.get_config()
 21 | 
 22 | """
 23 | def _read_words(filename):
 24 |   with open(filename, "r") as f:
 25 |     return f.read().replace("\n", "<eos>").split()
 26 | """
 27 | 
 28 | import tensorflow as tf
 29 | 
 30 | 
 31 | def _read_words(filename):
 32 |   with tf.gfile.GFile(filename, "rb") as f:
 33 |     return list(f.read())
 34 | 
 35 | def _build_vocab(filename):
 36 |   data = _read_words(filename)
 37 | 
 38 |   counter = collections.Counter(data)
 39 |   count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
 40 | 
 41 |   words, _ = list(zip(*count_pairs))
 42 |   word_to_id = dict(zip(words, range(len(words))))
 43 |   id_to_word = dict((v, k) for k, v in word_to_id.items())
 44 | 
 45 |   return word_to_id, id_to_word
 46 | 
 47 | 
 48 | def _file_to_word_ids(filename, word_to_id):
 49 |   data = _read_words(filename)
 50 |   return [word_to_id[word] for word in data if word in word_to_id]
 51 | 
 52 | 
 53 | def ptb_raw_data(data_path=None, prefix="ptb"):
 54 |   """Load PTB raw data from data directory "data_path".
 55 |   Reads PTB text files, converts strings to integer ids,
 56 |   and performs mini-batching of the inputs.
 57 |   The PTB dataset comes from Tomas Mikolov's webpage:
 58 |   http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
 59 |   Args:
 60 |     data_path: string path to the directory where simple-examples.tgz has
 61 |       been extracted.
 62 |   Returns:
 63 |     tuple (train_data, valid_data, test_data, vocabulary)
 64 |     where each of the data objects can be passed to PTBIterator.
 65 |   """
 66 | 
 67 |   train_path = os.path.join(args.data_path, "train")
 68 |   valid_path = os.path.join(args.data_path, "valid")
 69 |   test_path = os.path.join(args.data_path, "test")
 70 | 
 71 |   word_to_id, id_2_word = _build_vocab(train_path)
 72 |   train_data = _file_to_word_ids(train_path, word_to_id)
 73 |   valid_data = _file_to_word_ids(valid_path, word_to_id)
 74 |   test_data = _file_to_word_ids(test_path, word_to_id)
 75 |   return train_data, valid_data, test_data, word_to_id, id_2_word
 76 | 
 77 | 
 78 | 
 79 | def ptb_iterator(raw_data, batch_size, num_steps):
 80 |   """Iterate on the raw PTB data.
 81 |   This generates batch_size pointers into the raw PTB data, and allows
 82 |   minibatch iteration along these pointers.
 83 |   Args:
 84 |     raw_data: one of the raw data outputs from ptb_raw_data.
 85 |     batch_size: int, the batch size.
 86 |     num_steps: int, the number of unrolls.
 87 |   Yields:
 88 |     Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
 89 |     The second element of the tuple is the same data time-shifted to the
 90 |     right by one.
 91 |   Raises:
 92 |     ValueError: if batch_size or num_steps are too high.
 93 |   """
 94 |   raw_data = np.array(raw_data, dtype=np.int32)
 95 | 
 96 |   data_len = len(raw_data)
 97 |   batch_len = data_len // batch_size
 98 |   data = np.zeros([batch_size, batch_len], dtype=np.int32)
 99 |   for i in range(batch_size):
100 |     data[i] = raw_data[batch_len * i:batch_len * (i + 1)]
101 | 
102 | 
103 |   epoch_size = (batch_len - 1) // num_steps
104 | 
105 |   if epoch_size == 0:
106 |     raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
107 | 
108 |   for i in range(epoch_size):
109 |     x = data[:, i*num_steps:(i+1)*num_steps]
110 |     y = data[:, i*num_steps+1:(i+1)*num_steps+1]
111 |     yield (x, y)
112 | 


--------------------------------------------------------------------------------