├── FSRNN.py ├── LICENSE ├── LNLSTM.py ├── README.md ├── config.py ├── data └── ptb │ ├── test │ ├── train │ └── valid ├── helper.py ├── main.py └── reader.py /FSRNN.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | 7 | class FSRNNCell(object): 8 | 9 | def __init__(self, fast_cells,slow_cell, keep_prob = 1.0, training = True): 10 | super(FSRNNCell, self).__init__() 11 | self.fast_layers = len(fast_cells) 12 | assert self.fast_layers >=2 , 'Atleast 2 fast layers are needed.' 13 | 14 | self.fast_cells = fast_cells 15 | self.slow_cell = slow_cell 16 | self.keep_prob = keep_prob 17 | if not training: 18 | self.keep_prob = 1.0 19 | 20 | self.dropout = nn.Dropout(p = 1-self.keep_prob) 21 | 22 | def __call__(self,inputs,state): 23 | F_state = state[0] 24 | S_state = state[1] 25 | 26 | inputs = self.dropout(inputs) 27 | 28 | F_output, F_state = self.fast_cells[0](inputs, F_state) 29 | 30 | F_output_drop = self.dropout(F_output) 31 | 32 | S_output, S_state = self.slow_cell(F_output_drop, S_state) 33 | S_output_drop = self.dropout(S_output) 34 | 35 | F_output, F_state = self.fast_cells[1](S_output_drop, F_state) 36 | 37 | for i in range(2, self.fast_layers): 38 | F_output, F_state = self.fast_cells[i](F_output[:, 0:1] * 0.0, F_state) 39 | 40 | 41 | F_output_drop = self.dropout(F_output) 42 | 43 | return F_output_drop, (F_state, S_state) 44 | 45 | def zero_state(self, batch_size, dtype = torch.FloatTensor): 46 | F_state = self.fast_cells[0].zero_state(batch_size, dtype) 47 | S_state = self.slow_cell.zero_state(batch_size, dtype) 48 | 49 | 50 | return (F_state, S_state) 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Ujjawal Prasad 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LNLSTM.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | import torch.nn.functional as F 7 | 8 | import numpy as np 9 | 10 | import helper 11 | import config 12 | 13 | class LN_LSTMCell(object): 14 | """docstring for LN_LSTMCell""" 15 | def __init__(self,num_units, use_zoneout, is_training, 16 | zoneout_keep_h, zoneout_keep_c, f_bias = 0.5): 17 | super(LN_LSTMCell, self).__init__() 18 | 19 | self.num_units = num_units 20 | self.f_bias = f_bias 21 | 22 | self.use_zoneout = use_zoneout 23 | self.zoneout_keep_h = zoneout_keep_h 24 | self.zoneout_keep_c = zoneout_keep_c 25 | 26 | self.is_training = is_training 27 | 28 | def __call__(self, x ,state): 29 | h, c = state 30 | h_size = self.num_units 31 | x_size = int(x.size()[1]) 32 | 33 | 34 | W_xh = helper.orthogonal_initializer([x_size, 4 * h_size] , scale = 1.0) 35 | 36 | W_hh = helper.orthogonal_initializer([h_size, 4 * h_size] , scale = 1.0) 37 | 38 | bias = torch.zeros([4*h_size]) 39 | 40 | print(x.shape, 'shape of x') 41 | print(h.shape, 'shape of h') 42 | 43 | 44 | W_full = np.concatenate((W_xh,W_hh),axis = 0) 45 | concat = np.concatenate((x,h), axis = 1) 46 | 47 | concat = torch.mm(concat,W_full) + bias 48 | concat = helper.layer_norm_all(concat, 4, h_size) 49 | 50 | i,j,f,o = torch.split(tensor = concat, split_size = int(concat.size()[1])//4, dim=1) 51 | 52 | new_c = c * F.sigmoid(f + self.f_bias) + F.sigmoid(i) * F.tanh(j) 53 | new_h = F.tanh(helper.layer_norm(new_c)) * F.sigmoid(o) 54 | 55 | if self.use_zoneout: 56 | new_h, new_c = helper.zoneout(new_h, new_c, h, c, self.zoneout_keep_h, 57 | self.zoneout_keep_c, self.is_training) 58 | 59 | return new_h, (new_h, new_c) 60 | 61 | 62 | def zero_state(self, batch_size, dtype): 63 | h = torch.zeros([batch_size, self.num_units]).type(dtype) 64 | c = torch.zeros([batch_size, self.num_units]).type(dtype) 65 | return (h, c) 66 | 67 | def repackage_hidden(h): 68 | 69 | if type(h) == Variable: 70 | return Variable(h.data) 71 | else: 72 | return tuple(repackage_hidden(v) for v in h) 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fast-slow-lstm 2 | Fast-Slow Recurrent Neural Networks 3 | 4 | 5 | This repo contains PyTorch Implementation of Fast-Slow Recurrent Neural Networks - 6 | 7 | 8 | 9 | 10 | 11 | 12 | https://papers.nips.cc/paper/7173-fast-slow-recurrent-neural-networks.pdf 13 | 14 | 15 | 16 | 17 | 18 | ![fsrnn](https://user-images.githubusercontent.com/16559097/34203667-024bd9e4-e5a2-11e7-9869-1f7dc234c038.png) 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | import argparse 3 | import pickle 4 | 5 | def str2bool(v): 6 | return v.lower() in ('true', '1') 7 | 8 | 9 | arg_lists = [] 10 | parser = argparse.ArgumentParser() 11 | 12 | def add_argument_group(name): 13 | arg = parser.add_argument_group(name) 14 | arg_lists.append(arg) 15 | return arg 16 | 17 | 18 | # Network 19 | net_arg = add_argument_group('Network') 20 | net_arg.add_argument('--num_steps', type=int, default=150, help='') 21 | net_arg.add_argument('--cell_size', type=int, default=700, help='') 22 | net_arg.add_argument('--hyper_size', type=int, default=400, help='') 23 | net_arg.add_argument('--embed_size', type=int, default=128, help='') 24 | net_arg.add_argument('--hidden_size', type=int, default=256, help='') 25 | net_arg.add_argument('--num_layers', type=int, default=2, help='') 26 | net_arg.add_argument('--fast_layers', type=int, default=2, help='') 27 | net_arg.add_argument('--zoneout_c', type=float, default=0.5, help='') 28 | net_arg.add_argument('--zoneout_h', type=float, default=0.9, help='') 29 | net_arg.add_argument('--keep_prob', type=float, default=0.65, help='') 30 | net_arg.add_argument('--input_dim', type=int, default=300, help='') 31 | net_arg.add_argument('--num_glimpse', type=int, default=1, help='') 32 | net_arg.add_argument('--use_terminal_symbol', type=str2bool, default=True, help='Not implemented yet') 33 | 34 | # Data 35 | data_arg = add_argument_group('Data') 36 | data_arg.add_argument('--task', type=str, default='ptb') 37 | data_arg.add_argument('--batch_size', type=int, default=128) 38 | data_arg.add_argument('--vocab_size', type=int, default=50) 39 | data_arg.add_argument('--input_size', type=int, default=300) 40 | data_arg.add_argument('--min_data_length', type=int, default=5) 41 | data_arg.add_argument('--max_data_length', type=int, default=80) 42 | data_arg.add_argument('--train_num', type=int, default=1000000) 43 | data_arg.add_argument('--valid_num', type=int, default=1000) 44 | data_arg.add_argument('--test_num', type=int, default=1000) 45 | 46 | # Training / test parameters 47 | train_arg = add_argument_group('Training') 48 | train_arg.add_argument('--is_train', type=str2bool, default=True, help='') 49 | train_arg.add_argument('--optimizer', type=str, default='rmsprop', help='') 50 | 51 | train_arg.add_argument('--max_epoch', type=int, default=200, help='') 52 | train_arg.add_argument('--max_max_epoch', type=int, default=200, help='') 53 | 54 | 55 | train_arg.add_argument('--max_step', type=int, default=1000000, help='') 56 | train_arg.add_argument('--init_scale', type=float, default=0.002, help='') 57 | train_arg.add_argument('--lr_start', type=float, default=0.01, help='') 58 | train_arg.add_argument('--lr_decay_step', type=int, default=5000, help='') 59 | train_arg.add_argument('--lr_decay_rate', type=float, default= 0.1, help='') 60 | train_arg.add_argument('--max_grad_norm', type=float, default=1.0, help='') 61 | train_arg.add_argument('--checkpoint_secs', type=int, default=300, help='') 62 | 63 | # Misc 64 | misc_arg = add_argument_group('Misc') 65 | misc_arg.add_argument('--log_step', type=int, default=2, help='') 66 | misc_arg.add_argument('--num_log_samples', type=int, default=3, help='') 67 | misc_arg.add_argument('--log_level', type=str, default='INFO', choices=['INFO', 'DEBUG', 'WARN'], help='') 68 | misc_arg.add_argument('--log_dir', type=str, default='logs') 69 | misc_arg.add_argument('--data_dir', type=str, default='data') 70 | misc_arg.add_argument('--output_dir', type=str, default='outputs') 71 | misc_arg.add_argument('--data_path', type=str, default='/Ujjawal/fast-slow-lstm/data' ) 72 | misc_arg.add_argument('--debug', type=str2bool, default=False) 73 | misc_arg.add_argument('--gpu_memory_fraction', type=float, default=1.0) 74 | misc_arg.add_argument('--random_seed', type=int, default=123, help='') 75 | 76 | def get_config(): 77 | config, unparsed = parser.parse_known_args() 78 | return config 79 | 80 | -------------------------------------------------------------------------------- /helper.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | 8 | 9 | def orthogonal(shape): 10 | flat_shape = (shape[0], np.prod(shape[1:])) 11 | a = np.random.normal(0.0, 1.0, flat_shape) 12 | u, _, v = np.linalg.svd(a, full_matrices=False) 13 | q = u if u.shape == flat_shape else v 14 | return torch.Tensor(q.reshape(shape)) 15 | 16 | def orthogonal_initializer(shape, scale=1.0, dtype=torch.FloatTensor): 17 | return torch.Tensor(orthogonal(shape) * scale).type(dtype) 18 | 19 | 20 | def layer_norm_all(h, base, num_units): 21 | 22 | h_reshape = h_reshape.view([-1, base, num_units]) 23 | mean = h_reshape.mean(dim = 2) 24 | temp = (h_reshape - mean)**2 25 | var = temp.mean(dim = 2) 26 | 27 | epsilon = nn.init.constant(1e-3) 28 | dtype = torch.FloatTensor 29 | 30 | h = h_reshape.view([-1, base * num_units]) 31 | 32 | alpha = Variable(torch.ones(4*num_units).type(dtype), requires_grad=True) 33 | 34 | bias = Variable(torch.ones(4*num_units).type(dtype), requires_grad=True) 35 | 36 | return (h*alpha) + bias 37 | 38 | 39 | 40 | def moments_for_layer_norm(x, axes=1, name=None): 41 | 42 | epsilon = 1e-3 # found this works best. 43 | if not isinstance(axes, int): axes = axes[0] 44 | 45 | mean = x.mean(dim = axes) 46 | 47 | variance = (((x-mean)**2).mean(dim = axes) + epsilon)**0.5 48 | 49 | return mean, variance 50 | 51 | 52 | def layer_norm(x, alpha_start=1.0, bias_start=0.0): 53 | 54 | with tf.variable_scope(scope): 55 | num_units = int(x.size()[1]) 56 | 57 | alpha = Variable(torch.ones(4*num_units).type(dtype), requires_grad=True) 58 | 59 | bias = Variable(torch.ones(4*num_units).type(dtype), requires_grad=True) 60 | 61 | mean, variance = moments_for_layer_norm(x) 62 | y = (alpha * (x - mean)) / (variance) + bias 63 | return y 64 | 65 | def zoneout(new_h, new_c, h, c, h_keep, c_keep, is_training): 66 | mask_c = torch.ones_like(c) 67 | mask_h = torch.ones_like(h) 68 | 69 | c_dropout = nn.Dropout(p = 1-c_keep) 70 | h_dropout = nn.Dropout(p= 1-h_keep) 71 | 72 | if is_training: 73 | mask_c = c_dropout(mask_c) 74 | mask_h = h_dropout(mask_h) 75 | 76 | mask_c *= c_keep 77 | mask_h *= h_keep 78 | 79 | h = new_h * mask_h + (-mask_h + 1.) * h 80 | c = new_c * mask_c + (-mask_c + 1.) * c 81 | 82 | return h, c 83 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | import torch.nn.functional as F 7 | 8 | import LNLSTM 9 | import FSRNN 10 | 11 | import reader 12 | import config 13 | 14 | import time 15 | import numpy as np 16 | 17 | criterion = nn.CrossEntropyLoss() 18 | #optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 19 | args = config.get_config() 20 | 21 | 22 | class PTB_Model(nn.Module): 23 | def __init__(self, embedding_dim=args.hidden_size, num_steps=args.num_steps, batch_size=args.batch_size, 24 | vocab_size=args.vocab_size, num_layers=args.num_layers, dp_keep_prob=args.keep_prob,name=None): 25 | super(PTB_Model, self).__init__() 26 | self.batch_size = batch_size 27 | self.num_steps = num_steps 28 | self.vocab_size = vocab_size 29 | 30 | self.F_size = args.cell_size 31 | self.S_size = args.hyper_size 32 | 33 | self.num_steps = num_steps 34 | self.emb_size = embedding_dim 35 | self.is_train = False 36 | 37 | self.embedding = nn.Embedding(self.vocab_size, self.emb_size) 38 | 39 | self.F_cells = [LNLSTM.LN_LSTMCell(self.F_size, use_zoneout=True, is_training=self.is_train, 40 | zoneout_keep_h=args.zoneout_h, zoneout_keep_c=args.zoneout_c) 41 | for _ in range(args.fast_layers)] 42 | 43 | self.S_cell = LNLSTM.LN_LSTMCell(self.S_size, use_zoneout=True, is_training=self.is_train, 44 | zoneout_keep_h=args.zoneout_h, zoneout_keep_c=args.zoneout_c) 45 | 46 | 47 | self.FS_cell = FSRNN.FSRNNCell(self.F_cells, self.S_cell, args.keep_prob, self.is_train) 48 | 49 | self._initial_state = self.FS_cell.zero_state(batch_size, torch.FloatTensor) 50 | 51 | def forward(self,inputs): 52 | state = self._initial_state 53 | outputs = [] 54 | inputs = self.embedding(inputs) 55 | 56 | for time_step in range(self.num_steps): 57 | out , state = self.FS_cell(inputs[:,time_step,:],state) 58 | outputs.append(out) 59 | 60 | output = torch.cat(outputs,dim =1).view([-1,self.F_size]) 61 | 62 | softmax_w = helper.orthogonal_initializer([self.F_size, self.vocab_size]) 63 | softmax_b = helper.orthogonal_initializer([self.vocab_size]) 64 | 65 | logits = torch.mm(output , softmax_w) + softmax_b 66 | 67 | return logits.view([self.num_steps,self.batch_size,self.vocab_size]), state 68 | 69 | 70 | def run_epoch(model, data, is_train=False, lr=1.0): 71 | """Runs the model on the given data.""" 72 | if is_train: 73 | model.is_train = True 74 | else: 75 | model.eval() 76 | 77 | epoch_size = ((len(data) // model.batch_size) - 1) // model.num_steps 78 | start_time = time.time() 79 | #hidden = model.init_hidden() 80 | costs = 0.0 81 | iters = 0.0 82 | 83 | for step, (x, y) in enumerate(reader.ptb_iterator(data, model.batch_size, model.num_steps)): 84 | inputs = Variable(torch.from_numpy(x.astype(np.int64)).transpose(0, 1).contiguous()).cuda() 85 | inputs = torch.transpose(inputs, 0, 1) 86 | 87 | model.zero_grad() 88 | #hidden = repackage_hidden(hidden) 89 | outputs, hidden = model(inputs) 90 | targets = Variable(torch.from_numpy(y.astype(np.int64)).transpose(0, 1).contiguous()).cuda() 91 | tt = torch.squeeze(targets.view(-1, model.batch_size * model.num_steps)) 92 | 93 | loss = criterion(outputs.view(-1, model.vocab_size), tt) 94 | costs += loss.data[0] * model.num_steps 95 | iters += model.num_steps 96 | 97 | if is_train: 98 | loss.backward() 99 | torch.nn.utils.clip_grad_norm(model.parameters(), 0.25) 100 | for p in model.parameters(): 101 | p.data.add_(-lr, p.grad.data) 102 | if step % (epoch_size // 10) == 10: 103 | print("{} perplexity: {:8.2f} speed: {} wps".format(step * 1.0 / epoch_size, np.exp(costs / iters), 104 | iters * model.batch_size / (time.time() - start_time))) 105 | return np.exp(costs / iters) 106 | 107 | if __name__ == "__main__": 108 | raw_data = reader.ptb_raw_data(data_path=args.data_path) 109 | train_data, valid_data, test_data, word_to_id, id_to_word = raw_data 110 | vocab_size = len(word_to_id) 111 | print('Vocabulary size: {}'.format(vocab_size)) 112 | model = PTB_Model(embedding_dim=args.hidden_size, num_steps=args.num_steps, batch_size=args.batch_size, 113 | vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.keep_prob) 114 | model.cuda() 115 | lr = args.lr_start 116 | # decay factor for learning rate 117 | lr_decay_base = args.lr_decay_rate 118 | # we will not touch lr for the first m_flat_lr epochs 119 | m_flat_lr = 14.0 120 | 121 | print("########## Training ##########################") 122 | 123 | for epoch in range(args.max_max_epoch): 124 | lr_decay = lr_decay_base ** max(epoch - m_flat_lr, 0) 125 | lr = lr * lr_decay # decay lr if it is time 126 | train_p = run_epoch(model, train_data, True, lr) 127 | print('Train perplexity at epoch {}: {:8.2f}'.format(epoch, train_p)) 128 | print('Validation perplexity at epoch {}: {:8.2f}'.format(epoch, run_epoch(model, valid_data))) 129 | 130 | 131 | print("########## Testing ##########################") 132 | model.batch_size = 1 # to make sure we process all the data 133 | print('Test Perplexity: {:8.2f}'.format(run_epoch(model, test_data))) 134 | with open(args.save, 'wb') as f: 135 | torch.save(model, f) 136 | 137 | -------------------------------------------------------------------------------- /reader.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import collections 6 | import os 7 | 8 | 9 | import torch 10 | import torch.nn as nn 11 | from torch.autograd import Variable 12 | 13 | 14 | import collections 15 | import os 16 | 17 | import numpy as np 18 | import config 19 | 20 | args = config.get_config() 21 | 22 | """ 23 | def _read_words(filename): 24 | with open(filename, "r") as f: 25 | return f.read().replace("\n", "").split() 26 | """ 27 | 28 | import tensorflow as tf 29 | 30 | 31 | def _read_words(filename): 32 | with tf.gfile.GFile(filename, "rb") as f: 33 | return list(f.read()) 34 | 35 | def _build_vocab(filename): 36 | data = _read_words(filename) 37 | 38 | counter = collections.Counter(data) 39 | count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) 40 | 41 | words, _ = list(zip(*count_pairs)) 42 | word_to_id = dict(zip(words, range(len(words)))) 43 | id_to_word = dict((v, k) for k, v in word_to_id.items()) 44 | 45 | return word_to_id, id_to_word 46 | 47 | 48 | def _file_to_word_ids(filename, word_to_id): 49 | data = _read_words(filename) 50 | return [word_to_id[word] for word in data if word in word_to_id] 51 | 52 | 53 | def ptb_raw_data(data_path=None, prefix="ptb"): 54 | """Load PTB raw data from data directory "data_path". 55 | Reads PTB text files, converts strings to integer ids, 56 | and performs mini-batching of the inputs. 57 | The PTB dataset comes from Tomas Mikolov's webpage: 58 | http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz 59 | Args: 60 | data_path: string path to the directory where simple-examples.tgz has 61 | been extracted. 62 | Returns: 63 | tuple (train_data, valid_data, test_data, vocabulary) 64 | where each of the data objects can be passed to PTBIterator. 65 | """ 66 | 67 | train_path = os.path.join(args.data_path, "train") 68 | valid_path = os.path.join(args.data_path, "valid") 69 | test_path = os.path.join(args.data_path, "test") 70 | 71 | word_to_id, id_2_word = _build_vocab(train_path) 72 | train_data = _file_to_word_ids(train_path, word_to_id) 73 | valid_data = _file_to_word_ids(valid_path, word_to_id) 74 | test_data = _file_to_word_ids(test_path, word_to_id) 75 | return train_data, valid_data, test_data, word_to_id, id_2_word 76 | 77 | 78 | 79 | def ptb_iterator(raw_data, batch_size, num_steps): 80 | """Iterate on the raw PTB data. 81 | This generates batch_size pointers into the raw PTB data, and allows 82 | minibatch iteration along these pointers. 83 | Args: 84 | raw_data: one of the raw data outputs from ptb_raw_data. 85 | batch_size: int, the batch size. 86 | num_steps: int, the number of unrolls. 87 | Yields: 88 | Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. 89 | The second element of the tuple is the same data time-shifted to the 90 | right by one. 91 | Raises: 92 | ValueError: if batch_size or num_steps are too high. 93 | """ 94 | raw_data = np.array(raw_data, dtype=np.int32) 95 | 96 | data_len = len(raw_data) 97 | batch_len = data_len // batch_size 98 | data = np.zeros([batch_size, batch_len], dtype=np.int32) 99 | for i in range(batch_size): 100 | data[i] = raw_data[batch_len * i:batch_len * (i + 1)] 101 | 102 | 103 | epoch_size = (batch_len - 1) // num_steps 104 | 105 | if epoch_size == 0: 106 | raise ValueError("epoch_size == 0, decrease batch_size or num_steps") 107 | 108 | for i in range(epoch_size): 109 | x = data[:, i*num_steps:(i+1)*num_steps] 110 | y = data[:, i*num_steps+1:(i+1)*num_steps+1] 111 | yield (x, y) 112 | --------------------------------------------------------------------------------