├── src
    ├── data_utils.pyc
    ├── test.py
    ├── Dropout.py
    ├── driver.py
    ├── guess.py
    ├── embeddings.py
    ├── data_utils.py
    ├── optimizer.py
    ├── model.py
    ├── Dense.py
    ├── CNN.py
    ├── LSTM.py
    └── pairwise.py
├── .gitattributes
└── Readme


/src/data_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FudanNLP/NeuralSentenceOrdering/HEAD/src/data_utils.pyc


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | np.random.seed(1234)
 3 | n =10
 4 | idx_list = np.arange(n, dtype="int32")
 5 | def shuffle(idx_list):
 6 |     np.random.shuffle(idx_list)
 7 |     return idx_list
 8 | n =10
 9 | idx_list = np.arange(n, dtype="int32")
10 | idx_list = shuffle(idx_list)
11 | print idx_list
12 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/src/Dropout.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | from theano import tensor as T
 3 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 4 | from theano.tensor.signal import downsample
 5 | import numpy as np
 6 | def dropout(x, level, seed=None):
 7 |     if level < 0. or level >= 1:
 8 |         raise Exception('Dropout level must be in interval [0, 1[.')
 9 |     if seed is None:
10 |         seed = np.random.randint(10e6)
11 |     rng = RandomStreams(seed=seed)
12 |     retain_prob = 1. - level
13 |     x *= rng.binomial(x.shape, p=retain_prob, dtype=x.dtype)
14 |     x /= retain_prob
15 |     return x


--------------------------------------------------------------------------------
/Readme:
--------------------------------------------------------------------------------
 1 | An python (with theano) based implementation of Paper "Neural sentence ordering".
 2 | 
 3 | The data is available on https://drive.google.com/drive/folders/0B-mnK8kniGAiNVB6WTQ4bmdyamc.
 4 | 
 5 | The sample of processed data could be found on https://drive.google.com/file/d/0B-mnK8kniGAiSWhaR3gyalJyQm8/view?usp=sharing. And users should put this *.gz file into ./data/ to run the code.
 6 | This processed data is based only a toy way to organize data in order to make the code run.
 7 | 
 8 | The entrance of the code is ./src/driver.py
 9 | 
10 | Any usage of our code, data or idea could cite the paper "Neural sentence ordering" on arXiv .
11 | 
12 | 


--------------------------------------------------------------------------------
/src/driver.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | import time
 4 | from pairwise import Pairwise
 5 | from model import build_model
 6 | import cPickle as pkl
 7 | #from Activation import *
 8 | if __name__ == '__main__':
 9 |     flag_toy_data = 0.1
10 |     random_seed = 1234
11 |     alpha = 0.2
12 |     batch_size = 128
13 |     dispFreq = 2048
14 |     n_epochs = 600
15 |     wordVecLen = 25 # useless
16 |     flag_dropout = False
17 |     size_hidden_layer = 100
18 |     dropoutRates = 0.2 # for output of the embedding layer
19 |     optimizer = 'adadelta'
20 |     beam_size = 128
21 |     dataset = 'all'
22 |     datapath = '../data/%s.pkl.gz'%dataset
23 |     result_path = './result/'
24 |     sentence_modeling = 'CNN' # available: 'CBoW' 'LSTM' 'CNN'
25 |     CNN_filter_length = 3
26 |     LSTM_go_backwards = True
27 |     
28 |     flag_random_lookup_table = False
29 |     
30 |     pair_score = Pairwise(alpha = alpha,
31 |              batch_size=batch_size,
32 |              n_epochs=n_epochs,
33 |              wordVecLen = wordVecLen,
34 |              flag_dropout = flag_dropout,
35 |              datapath=datapath,
36 |              random_seed=random_seed,
37 |              dropoutRates = dropoutRates,
38 |              optimizer = optimizer,
39 |              dispFreq = dispFreq,
40 |              beam_size = beam_size,
41 |              flag_random_lookup_table = flag_random_lookup_table,
42 |              flag_toy_data = flag_toy_data,
43 |              size_hidden_layer = size_hidden_layer,
44 |              dataset = dataset,
45 |              result_path = result_path,
46 |              sentence_modeling = sentence_modeling,
47 |              CNN_filter_length = CNN_filter_length,
48 |              LSTM_go_backwards = LSTM_go_backwards
49 |              )
50 |     
51 |     
52 | 


--------------------------------------------------------------------------------
/src/guess.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from data_utils import load_data
 3 | 
 4 | def score_rank(sentence):
 5 |     n_total = 0
 6 |     n_correct = 0
 7 |     for i in range(len(sentence)):
 8 |         for j in range(i+1, len(sentence)):
 9 |             n_total += 1
10 |             if sentence[i] < sentence[j]: n_correct += 1
11 |     patial_correct = n_correct * 1.0 / n_total
12 |     total_correct = 0.0
13 |     if n_correct == n_total: total_correct = 1.0
14 |     return patial_correct, total_correct
15 | def save_result(path,top1_res):
16 |     fw = open(path,'w')
17 |     for paragraph, cur_categories in top1_res:
18 |         paragraph = np.asarray(paragraph) - np.min(paragraph)
19 |         paragraph = list(paragraph)
20 |         for sentence in paragraph:
21 |             fw.write(str(sentence))
22 |             fw.write(' ')
23 |         fw.write('#')
24 |         for category in cur_categories:
25 |             fw.write(category)
26 |             fw.write(' ')
27 |         fw.write('\n')
28 |     fw.close()
29 |     
30 |     
31 | dataset = 'cs'
32 | datapath = '../data/%s.pkl.gz'%dataset
33 | src_train,src_valid,src_test,dic_w2idx, dic_idx2w, dic_w2embed, dic_idx2embed, embedding = load_data(path=datapath)
34 | 
35 | res_order = []
36 | res_eva = []
37 | for paragraph, cur_categories in src_test:
38 |     n = len(paragraph)
39 |     candidates = [x for x in xrange(n)]
40 |     guess_order = []
41 |     for i in xrange(n):
42 |         idx = np.random.randint(n - i)
43 |         guess_order.append(candidates[idx])
44 |         candidates.remove(candidates[idx])
45 |     res_order.append((guess_order, cur_categories))
46 |     patial_correct, total_correct = score_rank(guess_order)
47 |     res_eva.append(np.asarray([patial_correct, total_correct]))
48 | res_eva = np.asarray(res_eva)
49 | 
50 | print 'Guess result# patial_correct, total_correct: ', np.average(res_eva, axis = 0)
51 | result_path = './result/guess_%s'%dataset
52 | 
53 | save_result(result_path, res_order)


--------------------------------------------------------------------------------
/src/embeddings.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from keras import backend as K
 3 | from keras import activations, initializations, regularizers, constraints
 4 | from keras.regularizers import ActivityRegularizer
 5 | import numpy as np
 6 | import theano
 7 | import theano.tensor as T
 8 | 
 9 | class Embedding():
10 |     '''Turn positive integers (indexes) into dense vectors of fixed size.
11 |     eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]
12 | 
13 |     This layer can only be used as the first layer in a model.
14 | 
15 |     # Input shape
16 |         2D tensor with shape: `(nb_samples, sequence_length)`.
17 | 
18 |     # Output shape
19 |         3D tensor with shape: `(nb_samples, sequence_length, output_dim)`.
20 | 
21 |     # Arguments
22 |       input_dim: int >= 0. Size of the vocabulary, ie.
23 |           1 + maximum integer index occurring in the input data.
24 |       output_dim: int >= 0. Dimension of the dense embedding.
25 |       init: name of initialization function for the weights
26 |           of the layer (see: [initializations](../initializations.md)),
27 |           or alternatively, Theano function to use for weights initialization.
28 |           This parameter is only relevant if you don't pass a `weights` argument.
29 |       weights: list of numpy arrays to set as initial weights.
30 |           The list should have 1 element, of shape `(input_dim, output_dim)`.
31 |       W_regularizer: instance of the [regularizers](../regularizers.md) module
32 |         (eg. L1 or L2 regularization), applied to the embedding matrix.
33 |       W_constraint: instance of the [constraints](../constraints.md) module
34 |           (eg. maxnorm, nonneg), applied to the embedding matrix.
35 |       mask_zero: Whether or not the input value 0 is a special "padding"
36 |           value that should be masked out.
37 |           This is useful for [recurrent layers](recurrent.md) which may take
38 |           variable length input. If this is `True` then all subsequent layers
39 |           in the model need to support masking or an exception will be raised.
40 |       input_length: Length of input sequences, when it is constant.
41 |           This argument is required if you are going to connect
42 |           `Flatten` then `Dense` layers upstream
43 |           (without it, the shape of the dense outputs cannot be computed).
44 |     '''
45 |     input_ndim = 2
46 | 
47 |     def __init__(self, input_dim, output_dim, init='uniform', 
48 |                  weights=None):
49 |         self.input_dim = input_dim
50 |         self.output_dim = output_dim
51 |         self.init = initializations.get(init)
52 |         
53 |         self.W = self.init((self.input_dim, self.output_dim))
54 |         #self.W_Tag = self.init((self.size_label_set, self.output_dim_tag))
55 |         
56 |         
57 |         if weights != None:
58 |             self.W = theano.shared(value = np.asarray(weights, dtype = theano.config.floatX), borrow=True)
59 |         self.params = [self.W]
60 |         
61 |     def get_output(self, train=False):
62 |         X = train
63 |         out = K.gather(self.W, X)
64 |         return out
65 | 
66 |     
67 |     
68 | 


--------------------------------------------------------------------------------
/src/data_utils.py:
--------------------------------------------------------------------------------
  1 | import cPickle
  2 | import gzip
  3 | import os
  4 | import sys
  5 | 
  6 | import numpy as np
  7 | import theano
  8 | 
  9 | def get_max_length(sentences):
 10 |     n = 0
 11 |     for sentence in sentences:
 12 |         l = len(sentence)
 13 |         if n < l: n = l
 14 |     return n
 15 | def padding(sentences, max_len):
 16 |     res = np.zeros((len(sentences),max_len),dtype = np.int32)
 17 |     mask = np.zeros((len(sentences),max_len))
 18 |     for s_id, sentence in enumerate(sentences):
 19 |         for w_id, word in enumerate(sentence):
 20 |             res[s_id][w_id] = word
 21 |             mask[s_id][w_id] = 1
 22 |     return res, mask
 23 |     
 24 | def data_padding(batch_samples):
 25 |     s1 = []
 26 |     s2 = []
 27 |     y = []
 28 |     for fir, sec, label in batch_samples:
 29 |         s1.append(fir)
 30 |         s2.append(sec)
 31 |         y.append(label)
 32 |     max_len1 = get_max_length(s1)
 33 |     max_len2 = get_max_length(s2)
 34 |     # s: 2d_array n_samples * max_len
 35 |     # mask: 2d_array n_samples * max_len
 36 |     s1, s1_mask = padding(s1, max_len1)
 37 |     s2, s2_mask = padding(s2, max_len2)
 38 |     y = np.asarray(y)
 39 |     return s1, s1_mask, s2, s2_mask, y
 40 | 
 41 | def get_minibatches_idx(n, minibatch_size, shuffle=False):
 42 |     """
 43 |     Used to shuffle the dataset at each iteration.
 44 |     """
 45 | 
 46 |     idx_list = np.arange(n, dtype="int32")
 47 | 
 48 |     if shuffle:
 49 |         np.random.shuffle(idx_list)
 50 | 
 51 |     minibatches = []
 52 |     minibatch_start = 0
 53 |     for i in range(n // minibatch_size):
 54 |         minibatches.append(idx_list[minibatch_start:minibatch_start + minibatch_size])
 55 |         minibatch_start += minibatch_size
 56 | 
 57 |     if minibatch_start != n:
 58 |         # Make a minibatch out of what is left
 59 |         minibatches.append(idx_list[minibatch_start:])
 60 | 
 61 |     return zip(range(len(minibatches)), minibatches)
 62 | 
 63 | def prepare_data(examples):
 64 |     data = [] # (s1, s2, y)
 65 |     pairdict = {}
 66 |     n_sentences = 0
 67 |     for paragraph, cur_categories in examples:
 68 |         for s1_id,s1 in enumerate(paragraph):
 69 |             for s2_id,s2 in enumerate(paragraph):
 70 |                 if s1_id == s2_id: continue
 71 |                 if s1_id < s2_id: 
 72 |                     data.append((s1, s2, 1))
 73 |                 else:
 74 |                     data.append((s1, s2, 0))
 75 |                 pairdict[(n_sentences + s1_id, n_sentences + s2_id)] = len(data) - 1
 76 |         n_sentences += len(paragraph)
 77 |     return data, pairdict
 78 | 
 79 | 
 80 | def load_data(path='tsp_test.pkl.gz'):
 81 |     data_dir, data_file = os.path.split(path)
 82 |     if data_dir == "" and not os.path.isfile(path):
 83 |         path = os.path.join(
 84 |             os.path.split(__file__)[0],
 85 |             "..",
 86 |             "data",
 87 |             path
 88 |         )
 89 | 
 90 |     if path.endswith(".gz"):
 91 |         f = gzip.open(path, 'rb')
 92 |     else:
 93 |         f = open(path, 'rb')
 94 | 
 95 |     src_train,src_valid,src_test,dic_w2idx, dic_idx2w, dic_w2embed, dic_idx2embed, embedding = cPickle.load(f)
 96 |     f.close()
 97 |     return src_train,src_valid,src_test,dic_w2idx, dic_idx2w, dic_w2embed, dic_idx2embed, embedding
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     pass


--------------------------------------------------------------------------------
/src/optimizer.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import cPickle as pkl
  3 | import sys
  4 | import time
  5 | import argparse
  6 | import copy
  7 | 
  8 | import random
  9 | import numpy
 10 | import theano
 11 | from theano import config
 12 | import theano.tensor as tensor
 13 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 14 | def numpy_floatX(data):
 15 |     return numpy.asarray(data, dtype=config.floatX)
 16 | def sgd(lr, tparams, grads, sentence1,sentence1_mask,sentence2,sentence2_mask,y, cost):
 17 |     """ Stochastic Gradient Descent
 18 | 
 19 |     :note: A more complicated version of sgd then needed.  This is
 20 |         done like that for adadelta and rmsprop.
 21 | 
 22 |     """
 23 |     # New set of shared variable that will contain the gradient
 24 |     # for a mini-batch.
 25 |     gshared = [theano.shared(v.get_value() * 0., name='%s_grad' % k)
 26 |                for k, v in tparams.iteritems()]
 27 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
 28 | 
 29 |     # Function that computes gradients for a mini-batch, but do not
 30 |     # updates the weights.
 31 |     f_grad_shared = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask,y], cost, updates=gsup,
 32 |                                     name='sgd_f_grad_shared')
 33 | 
 34 |     pup = [(v, v - lr * g) for v, g in zip(tparams.values(), gshared)]
 35 | 
 36 |     # Function that updates the weights from the previously computed
 37 |     # gradient.
 38 |     f_update = theano.function([lr], [], updates=pup, name='sgd_f_update')
 39 | 
 40 |     return f_grad_shared, f_update
 41 | 
 42 | 
 43 | def rmsprop(lr, tparams, grads, sentence1,sentence1_mask,sentence2,sentence2_mask,y, cost):
 44 |     zipped_grads = [theano.shared(q.get_value() * numpy_floatX(0.), name='%s_grad' % k)
 45 |                     for k, q in tparams.iteritems()]
 46 |     running_grads = [theano.shared(q.get_value() * numpy_floatX(0.), name='%s_rgrad' % k)
 47 |                      for k, q in tparams.iteritems()]
 48 |     running_grads2 = [theano.shared(q.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k)
 49 |                       for k, q in tparams.iteritems()]
 50 | 
 51 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
 52 |     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
 53 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
 54 |              for rg2, g in zip(running_grads2, grads)]
 55 | 
 56 |     f_grad_shared = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask,y], cost,
 57 |                                     updates=zgup + rgup + rg2up,
 58 |                                     name='rmsprop_f_grad_shared')
 59 | 
 60 |     updir = [theano.shared(q.get_value() * numpy_floatX(0.), name='%s_updir' % k)
 61 |              for k, q in tparams.iteritems()]
 62 |     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
 63 |                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
 64 |                                             running_grads2)]
 65 |     param_up = [(q, q + udn[1])
 66 |                 for q, udn in zip(tparams.values(), updir_new)]
 67 |     f_update = theano.function([lr], [], updates=updir_new + param_up,
 68 |                                on_unused_input='ignore',
 69 |                                name='rmsprop_f_update')
 70 | 
 71 |     return f_grad_shared, f_update
 72 | 
 73 | 
 74 | def adadelta(lr, tparams, grads, sentence1,sentence1_mask,sentence2,sentence2_mask,y, cost):
 75 |     '''
 76 |     zipped_grads = [theano.shared(q.get_value() * numpy_floatX(0.), name='%s_grad' % k)
 77 |                     for k, q in tparams.iteritems()]
 78 |     running_up2 = [theano.shared(q.get_value() * numpy_floatX(0.),name='%s_rup2' % k)
 79 |                    for k, q in tparams.iteritems()]
 80 |     running_grads2 = [theano.shared(q.get_value() * numpy_floatX(0.),name='%s_rgrad2' % k)
 81 |                       for k, q in tparams.iteritems()]
 82 |     '''
 83 |     zipped_grads = [theano.shared(q.get_value() * numpy_floatX(0.))
 84 |                     for q in tparams]
 85 |     running_up2 = [theano.shared(q.get_value() * numpy_floatX(0.))
 86 |                    for q in tparams]
 87 |     running_grads2 = [theano.shared(q.get_value() * numpy_floatX(0.))
 88 |                       for q in tparams]
 89 | 
 90 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
 91 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
 92 |              for rg2, g in zip(running_grads2, grads)]
 93 | 
 94 |     f_grad_shared = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask,y], cost, updates=zgup + rg2up,
 95 |                                     name='adadelta_f_grad_shared', allow_input_downcast=True)
 96 | 
 97 |     updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
 98 |              for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
 99 |     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
100 |              for ru2, ud in zip(running_up2, updir)]
101 |     param_up = [(q, q + ud) for q, ud in zip(tparams, updir)]
102 | 
103 |     f_update = theano.function([lr], [], updates=ru2up + param_up,
104 |                                on_unused_input='ignore',
105 |                                name='adadelta_f_update', allow_input_downcast=True)
106 | 
107 |     return f_grad_shared, f_update


--------------------------------------------------------------------------------
/src/model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | import sys
 4 | from embeddings import Embedding
 5 | from Dropout import dropout
 6 | import theano
 7 | import theano.tensor as T
 8 | import pickle
 9 | from collections import OrderedDict
10 | from Dense import Dense
11 | from keras import backend as K
12 | 
13 | from CNN import Convolution1D
14 | from LSTM import LSTM
15 | def CNN_embed(embed_s,s_mask,sentence_encode_layer):
16 |     s_mask = s_mask.reshape((s_mask.shape[0],s_mask.shape[1],1)) # n_samples * len_sentence * 1
17 |     s_mask = s_mask.repeat(embed_s.shape[2],axis = 2) # n_samples * len_sentence * embed_dim
18 |     embed_s = embed_s * s_mask # n_samples * len_sentence * embed_dim
19 |     
20 |     embed_s = sentence_encode_layer.get_output(embed_s) # n_samples * len_sentence- * embed_dim
21 |     embed_s = T.max(embed_s,axis = 1) # n_samples * embed_dim
22 |     return embed_s # n_samples * embed_dim
23 | 
24 | def LSTM_embed(embed_s,s_mask,sentence_encode_layer, options):
25 |     s_mask = s_mask.reshape((s_mask.shape[0],s_mask.shape[1],1)) # n_samples * len_sentence * 1
26 |     s_mask = s_mask.repeat(embed_s.shape[2],axis = 2) # n_samples * len_sentence * embed_dim
27 |     embed_s = embed_s * s_mask # n_samples * len_sentence * embed_dim
28 |     
29 |     embed_s = sentence_encode_layer.get_output(go_backwards = options['LSTM_go_backwards'], train = embed_s) # n_samples * len_sentence * embed_dim
30 |     return embed_s[:,-1,:] # n_samples * embed_dim
31 |     
32 | def ave_embed(embed_s,s_mask):
33 |     n = s_mask.sum(axis = 1) # n_samples
34 |     n = n.reshape((s_mask.shape[0],1)) # n_samples * 1
35 |     n = n.repeat(embed_s.shape[2],axis = 1) # n_samples * embed_dim
36 |     s_mask = s_mask.reshape((s_mask.shape[0],s_mask.shape[1],1)) # n_samples * len_sentence * 1
37 |     s_mask = s_mask.repeat(embed_s.shape[2],axis = 2) # n_samples * len_sentence * embed_dim
38 |     embed_s = embed_s * s_mask # n_samples * len_sentence * embed_dim
39 |     return embed_s.sum(axis = 1) /n  # n_samples * embed_dim
40 | def build_model(options):
41 |     print('Build model...')
42 |     sys.stdout.flush()
43 |     weights = None
44 |     if options['flag_random_lookup_table'] == False: weights = options['embedding']
45 |     embed_layer = Embedding(input_dim = options['embedding'].shape[0], 
46 |                             output_dim = options['embedding'].shape[1], 
47 |                             weights = weights)
48 |     dense_layers = []
49 |     dense_layers.append(Dense(input_dim = options['embedding'].shape[1] * 2, output_dim = options['size_hidden_layer'], activation = 'tanh'))
50 |     dense_layers.append(Dense(input_dim = options['size_hidden_layer'], output_dim = 1, activation = 'sigmoid'))
51 |     
52 |     # for training
53 |     sentence1 = T.imatrix('s1')  # sentence1, n_samples * len_sentence
54 |     sentence1_mask = T.matrix('s1_mask')
55 |     sentence2 = T.imatrix('s2')  # sentence2, n_samples * len_sentence
56 |     sentence2_mask = T.matrix('s2_mask')
57 |     y = T.ivector('y1')  # n_samples
58 |     
59 |     embed_s1 = embed_layer.get_output(sentence1) # n_samples * len_sentence * embed_dim
60 |     embed_s2 = embed_layer.get_output(sentence2) # n_samples * len_sentence * embed_dim
61 |     if options['sentence_modeling'] == 'CBoW':
62 |         embed_s1 = ave_embed(embed_s1,sentence1_mask) # n_samples * embed_dim
63 |         embed_s2 = ave_embed(embed_s2,sentence2_mask) # n_samples * embed_dim
64 |     elif options['sentence_modeling'] == 'CNN':
65 |         sentence_encode_layer = Convolution1D(input_dim = options['embedding'].shape[1], activation = 'tanh',
66 |                                 nb_filter = options['embedding'].shape[1], filter_length = options['CNN_filter_length'],
67 |                                 border_mode = 'same')
68 |         embed_s1 = CNN_embed(embed_s1,sentence1_mask,sentence_encode_layer) # n_samples * embed_dim
69 |         embed_s2 = CNN_embed(embed_s2,sentence2_mask,sentence_encode_layer) # n_samples * embed_dim
70 |     elif options['sentence_modeling'] == 'LSTM':
71 |         sentence_encode_layer = LSTM(input_dim = options['embedding'].shape[1], output_dim = options['embedding'].shape[1])
72 |         embed_s1 = LSTM_embed(embed_s1,sentence1_mask,sentence_encode_layer,options) # n_samples * embed_dim
73 |         embed_s2 = LSTM_embed(embed_s2,sentence2_mask,sentence_encode_layer,options) # n_samples * embed_dim
74 |     else:
75 |         print 'Error: No model called %s available!' % options['sentence_modeling']
76 |         return
77 |     
78 |     output = T.concatenate([embed_s1,embed_s2],axis = -1) # n_samples * (embed_dim * 2)
79 |     
80 |     if options['flag_dropout'] == True:
81 |         output = dropout(output, level=options['dropoutRates'])
82 |     for dense_layer in dense_layers:
83 |         output = dense_layer.get_output(output)
84 |     f_pred = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask],output, allow_input_downcast=True)
85 |     
86 |     output = output.reshape((output.shape[0],))
87 |     #y = y.reshape((output.shape[0],1))
88 |     cost = T.nnet.binary_crossentropy(output, y).mean()
89 |     f_debug = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask,y],[output,y,T.nnet.binary_crossentropy(output, y),cost], allow_input_downcast=True)
90 |     tparams = []
91 |     tparams += embed_layer.params
92 |     if options['sentence_modeling'] != 'CBoW':
93 |         tparams += sentence_encode_layer.params
94 |     for dense_layer in dense_layers: tparams += dense_layer.params
95 |     return sentence1,sentence1_mask,sentence2,sentence2_mask,y,cost,f_pred,tparams,f_debug
96 |     


--------------------------------------------------------------------------------
/src/Dense.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | 
  4 | import numpy as np
  5 | 
  6 | from collections import OrderedDict
  7 | import copy
  8 | from six.moves import zip
  9 | 
 10 | from keras import backend as K
 11 | from keras import activations, initializations, regularizers, constraints
 12 | from keras.regularizers import ActivityRegularizer
 13 | 
 14 | import marshal
 15 | import types
 16 | import sys
 17 | class Dense():
 18 |     '''Apply a same Dense layer for each dimension[1] (time_dimension) input.
 19 |     Especially useful after a recurrent network with 'return_sequence=True'.
 20 | 
 21 |     # Input shape
 22 |         3D tensor with shape `(nb_sample, time_dimension, input_dim)`.
 23 | 
 24 |     # Output shape
 25 |         3D tensor with shape `(nb_sample, time_dimension, output_dim)`.
 26 | 
 27 |     # Arguments
 28 |         output_dim: int > 0.
 29 |         init: name of initialization function for the weights of the layer
 30 |             (see [initializations](../initializations.md)),
 31 |             or alternatively, Theano function to use for weights
 32 |             initialization. This parameter is only relevant
 33 |             if you don't pass a `weights` argument.
 34 |         activation: name of activation function to use
 35 |             (see [activations](../activations.md)),
 36 |             or alternatively, elementwise Theano function.
 37 |             If you don't specify anything, no activation is applied
 38 |             (ie. "linear" activation: a(x) = x).
 39 |         weights: list of numpy arrays to set as initial weights.
 40 |             The list should have 1 element, of shape `(input_dim, output_dim)`.
 41 |         W_regularizer: instance of [WeightRegularizer](../regularizers.md)
 42 |             (eg. L1 or L2 regularization), applied to the main weights matrix.
 43 |         b_regularizer: instance of [WeightRegularizer](../regularizers.md),
 44 |             applied to the bias.
 45 |         activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
 46 |             applied to the network output.
 47 |         W_constraint: instance of the [constraints](../constraints.md) module
 48 |             (eg. maxnorm, nonneg), applied to the main weights matrix.
 49 |         b_constraint: instance of the [constraints](../constraints.md) module,
 50 |             applied to the bias.
 51 |         input_dim: dimensionality of the input (integer).
 52 |             This argument (or alternatively, the keyword argument `input_shape`)
 53 |             is required when using this layer as the first layer in a model.
 54 |     '''
 55 |     input_ndim = 3
 56 | 
 57 |     def __init__(self, input_dim, output_dim,
 58 |                  init='glorot_uniform', activation='linear', weights=None,
 59 |                  W_regularizer=None, b_regularizer=None, activity_regularizer=None,
 60 |                  W_constraint=None, b_constraint=None):
 61 |         self.input_dim = input_dim
 62 |         self.output_dim = output_dim
 63 |         self.init = initializations.get(init)
 64 |         self.activation = activations.get(activation)
 65 |         '''
 66 |         self.W_regularizer = regularizers.get(W_regularizer)
 67 |         self.b_regularizer = regularizers.get(b_regularizer)
 68 |         self.activity_regularizer = regularizers.get(activity_regularizer)
 69 | 
 70 |         self.W_constraint = constraints.get(W_constraint)
 71 |         self.b_constraint = constraints.get(b_constraint)
 72 |         self.constraints = [self.W_constraint, self.b_constraint]
 73 | 
 74 |         self.initial_weights = weights
 75 |         '''
 76 |         
 77 |         #super(TimeDistributedDense, self).__init__(**kwargs)
 78 | 
 79 |     #def build(self):
 80 |         
 81 | 
 82 |         self.W = self.init((self.input_dim, self.output_dim))
 83 |         self.b = K.zeros((self.output_dim,))
 84 | 
 85 |         self.params = [self.W, self.b]
 86 |         '''
 87 |         self.regularizers = []
 88 | 
 89 |         if self.W_regularizer:
 90 |             self.W_regularizer.set_param(self.W)
 91 |             self.regularizers.append(self.W_regularizer)
 92 | 
 93 |         if self.b_regularizer:
 94 |             self.b_regularizer.set_param(self.b)
 95 |             self.regularizers.append(self.b_regularizer)
 96 | 
 97 |         if self.activity_regularizer:
 98 |             self.activity_regularizer.set_layer(self)
 99 |             self.regularizers.append(self.activity_regularizer)
100 | 
101 |         if self.initial_weights is not None:
102 |             self.set_weights(self.initial_weights)
103 |             del self.initial_weights
104 | 
105 |         '''
106 | 
107 |     def get_output(self, X):
108 |         output = self.activation(K.dot(X, self.W) + self.b)
109 |         return output
110 |     '''
111 |     def get_config(self):
112 |         config = {'name': self.__class__.__name__,
113 |                   'output_dim': self.output_dim,
114 |                   'init': self.init.__name__,
115 |                   'activation': self.activation.__name__,
116 |                   'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
117 |                   'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
118 |                   'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
119 |                   'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
120 |                   'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
121 |                   'input_dim': self.input_dim,
122 |                   'input_length': self.input_length}
123 |         #base_config = super(TimeDistributedDense, self).get_config()
124 |         return dict(list(config.items()))
125 |     '''


--------------------------------------------------------------------------------
/src/CNN.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | 
  4 | from keras import backend as K
  5 | from keras import activations, initializations, regularizers, constraints
  6 | from keras.regularizers import ActivityRegularizer
  7 | from theano.tensor.signal import downsample
  8 | 
  9 | 
 10 | def conv_output_length(input_length, filter_size, border_mode, stride):
 11 |     if input_length is None:
 12 |         return None
 13 |     assert border_mode in {'same', 'valid'}
 14 |     if border_mode == 'same':
 15 |         output_length = input_length
 16 |     elif border_mode == 'valid':
 17 |         output_length = input_length - filter_size + 1
 18 |     return (output_length + stride - 1) // stride
 19 | 
 20 | 
 21 | class Convolution1D():
 22 |     '''Convolution operator for filtering neighborhoods of one-dimensional inputs.
 23 |     When using this layer as the first layer in a model,
 24 |     either provide the keyword argument `input_dim`
 25 |     (int, e.g. 128 for sequences of 128-dimensional vectors),
 26 |     or `input_shape` (tuple of integers, e.g. (10, 128) for sequences
 27 |     of 10 vectors of 128-dimensional vectors).
 28 | 
 29 |     # Input shape
 30 |         3D tensor with shape: `(samples, steps, input_dim)`.
 31 | 
 32 |     # Output shape
 33 |         3D tensor with shape: `(samples, new_steps, nb_filter)`.
 34 |         `steps` value might have changed due to padding.
 35 | 
 36 |     # Arguments
 37 |         nb_filter: Number of convolution kernels to use
 38 |             (dimensionality of the output).
 39 |         filter_length: The extension (spatial or temporal) of each filter.
 40 |         init: name of initialization function for the weights of the layer
 41 |             (see [initializations](../initializations.md)),
 42 |             or alternatively, Theano function to use for weights initialization.
 43 |             This parameter is only relevant if you don't pass a `weights` argument.
 44 |         activation: name of activation function to use
 45 |             (see [activations](../activations.md)),
 46 |             or alternatively, elementwise Theano function.
 47 |             If you don't specify anything, no activation is applied
 48 |             (ie. "linear" activation: a(x) = x).
 49 |         weights: list of numpy arrays to set as initial weights.
 50 |         border_mode: 'valid' or 'same'.
 51 |         subsample_length: factor by which to subsample output.
 52 |         W_regularizer: instance of [WeightRegularizer](../regularizers.md)
 53 |             (eg. L1 or L2 regularization), applied to the main weights matrix.
 54 |         b_regularizer: instance of [WeightRegularizer](../regularizers.md),
 55 |             applied to the bias.
 56 |         activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
 57 |             applied to the network output.
 58 |         W_constraint: instance of the [constraints](../constraints.md) module
 59 |             (eg. maxnorm, nonneg), applied to the main weights matrix.
 60 |         b_constraint: instance of the [constraints](../constraints.md) module,
 61 |             applied to the bias.
 62 |         input_dim: Number of channels/dimensions in the input.
 63 |             Either this argument or the keyword argument `input_shape`must be
 64 |             provided when using this layer as the first layer in a model.
 65 |         input_length: Length of input sequences, when it is constant.
 66 |             This argument is required if you are going to connect
 67 |             `Flatten` then `Dense` layers upstream
 68 |             (without it, the shape of the dense outputs cannot be computed).
 69 |     '''
 70 |     input_ndim = 3
 71 | 
 72 |     def __init__(self, nb_filter, filter_length,
 73 |                  init='uniform', activation='linear', weights=None,
 74 |                  border_mode='valid', subsample_length=1,
 75 |                  input_dim=None):
 76 | 
 77 |         if border_mode not in {'valid', 'same'}:
 78 |             raise Exception('Invalid border mode for Convolution1D:', border_mode)
 79 |         self.nb_filter = nb_filter
 80 |         self.filter_length = filter_length
 81 |         self.init = initializations.get(init)
 82 |         self.activation = activations.get(activation)
 83 |         assert border_mode in {'valid', 'same'}, 'border_mode must be in {valid, same}'
 84 |         self.border_mode = border_mode
 85 |         self.subsample_length = subsample_length
 86 | 
 87 |         self.subsample = (subsample_length, 1)
 88 | 
 89 |         
 90 |         self.input_dim = input_dim
 91 |         
 92 |  
 93 |         input_dim = self.input_dim
 94 |         self.W_shape = (self.nb_filter, input_dim, self.filter_length, 1)
 95 |         self.W = self.init(self.W_shape)
 96 |         self.b = K.zeros((self.nb_filter,))
 97 |         self.params = [self.W, self.b]
 98 |  
 99 |     def get_output(self, train=False):
100 |         X = train
101 |         X = K.expand_dims(X, -1)  # add a dimension of the right
102 |         X = K.permute_dimensions(X, (0, 2, 1, 3))
103 |         conv_out = K.conv2d(X, self.W, strides=self.subsample,
104 |                             border_mode=self.border_mode,
105 |                             dim_ordering='th')
106 | 
107 |         output = conv_out + K.reshape(self.b, (1, self.nb_filter, 1, 1))
108 |         output = self.activation(output)
109 |         output = K.squeeze(output, 3)  # remove the dummy 3rd dimension
110 |         output = K.permute_dimensions(output, (0, 2, 1))
111 |         return output
112 |     
113 | class MaxPooling1D():
114 |     '''Max pooling operation for temporal data.
115 | 
116 |     # Input shape
117 |         3D tensor with shape: `(samples, steps, features)`.
118 | 
119 |     # Output shape
120 |         3D tensor with shape: `(samples, downsampled_steps, features)`.
121 | 
122 |     # Arguments
123 |         pool_length: factor by which to downscale. 2 will halve the input.
124 |         stride: integer or None. Stride value.
125 |         border_mode: 'valid' or 'same'.
126 |             Note: 'same' will only work with TensorFlow for the time being.
127 |     '''
128 |     def __init__(self, pool_length=2, stride=None,
129 |                  border_mode='valid'):
130 |         self.pool_length = pool_length = 2
131 |         self.border_mode = border_mode
132 |         self.params = []
133 | 
134 |     def get_output(self, train=False):
135 |         #output = K.pool2d(x = train, pool_size = (self.pool_length,1), 
136 |         #                  border_mode = self.border_mode, pool_mode='max')
137 |         pool_size = (self.pool_length, 1)
138 |         strides = (self.pool_length, 1)
139 |         ignore_border = True
140 |         padding = (0, 0)
141 |         output = downsample.max_pool_2d(train, ds=pool_size, st=strides,
142 |                                           ignore_border=ignore_border,
143 |                                           padding=padding,
144 |                                           mode='max')
145 |         return output
146 | 


--------------------------------------------------------------------------------
/src/LSTM.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | 
  5 | from keras import backend as K
  6 | from keras import activations, initializations
  7 | import theano
  8 | import theano.tensor as T
  9 | 
 10 | class LSTM():
 11 |     '''Long-Short Term Memory unit - Hochreiter 1997.
 12 | 
 13 |     For a step-by-step description of the algorithm, see
 14 |     [this tutorial](http://deeplearning.net/tutorial/lstm.html).
 15 | 
 16 |     # Arguments
 17 |         output_dim: dimension of the internal projections and the final output.
 18 |         init: weight initialization function.
 19 |             Can be the name of an existing function (str),
 20 |             or a Theano function (see: [initializations](../initializations.md)).
 21 |         inner_init: initialization function of the inner cells.
 22 |         forget_bias_init: initialization function for the bias of the forget gate.
 23 |             [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
 24 |             recommend initializing with ones.
 25 |         activation: activation function.
 26 |             Can be the name of an existing function (str),
 27 |             or a Theano function (see: [activations](../activations.md)).
 28 |         inner_activation: activation function for the inner cells.
 29 | 
 30 |     # References
 31 |         - [Long short-term memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf) (original 1997 paper)
 32 |         - [Learning to forget: Continual prediction with LSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015)
 33 |         - [Supervised sequence labelling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf)
 34 |     '''
 35 |     def __init__(self, input_dim, output_dim,
 36 |                  init='glorot_uniform', inner_init='orthogonal',
 37 |                  forget_bias_init='one', activation='tanh',
 38 |                  inner_activation='hard_sigmoid'):
 39 |         #self.input_dim = input_dim
 40 |         self.output_dim = output_dim
 41 |         self.init = initializations.get(init)
 42 |         self.inner_init = initializations.get(inner_init)
 43 |         self.forget_bias_init = initializations.get(forget_bias_init)
 44 |         self.activation = activations.get(activation)
 45 |         self.inner_activation = activations.get(inner_activation)
 46 |         
 47 |         self.input_dim = input_dim
 48 |         #self.input = K.placeholder(input_shape)
 49 | 
 50 |         # initial states: 2 all-zero tensor of shape (output_dim)
 51 |         self.states = [None, None]
 52 | 
 53 |         self.W_i = self.init((input_dim, self.output_dim))
 54 |         self.U_i = self.inner_init((self.output_dim, self.output_dim))
 55 |         self.b_i = K.zeros((self.output_dim,))
 56 | 
 57 |         self.W_f = self.init((input_dim, self.output_dim))
 58 |         self.U_f = self.inner_init((self.output_dim, self.output_dim))
 59 |         self.b_f = self.forget_bias_init((self.output_dim,))
 60 | 
 61 |         self.W_c = self.init((input_dim, self.output_dim))
 62 |         self.U_c = self.inner_init((self.output_dim, self.output_dim))
 63 |         self.b_c = K.zeros((self.output_dim,))
 64 | 
 65 |         self.W_o = self.init((input_dim, self.output_dim))
 66 |         self.U_o = self.inner_init((self.output_dim, self.output_dim))
 67 |         self.b_o = K.zeros((self.output_dim,))
 68 | 
 69 |         self.params = [self.W_i, self.U_i, self.b_i,
 70 |                        self.W_c, self.U_c, self.b_c,
 71 |                        self.W_f, self.U_f, self.b_f,
 72 |                        self.W_o, self.U_o, self.b_o]
 73 | 
 74 |         #if self.initial_weights is not None:
 75 |         #    self.set_weights(self.initial_weights)
 76 |         #    del self.initial_weights
 77 | 
 78 |     def numpy_floatX(self,data):
 79 |         return np.asarray(data, dtype=np.float32)
 80 |     def reset_states(self,batch_size):
 81 |         #self.states = [K.zeros((batch_size, self.output_dim)),
 82 |         #               K.zeros((batch_size, self.output_dim))]
 83 |         
 84 |         self.states = [T.alloc(self.numpy_floatX(0.),batch_size,self.output_dim),
 85 |                        T.alloc(self.numpy_floatX(0.),batch_size,self.output_dim)]
 86 | 
 87 |     def step(self, x, h_tm1, c_tm1):
 88 |         #assert len(states) == 2
 89 |         #h_tm1 = states[0]
 90 |         #c_tm1 = states[1]
 91 | 
 92 |         x_i = K.dot(x, self.W_i) + self.b_i
 93 |         x_f = K.dot(x, self.W_f) + self.b_f
 94 |         x_c = K.dot(x, self.W_c) + self.b_c
 95 |         x_o = K.dot(x, self.W_o) + self.b_o
 96 | 
 97 |         i = self.inner_activation(x_i + K.dot(h_tm1, self.U_i))
 98 |         f = self.inner_activation(x_f + K.dot(h_tm1, self.U_f))
 99 |         c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1, self.U_c))
100 |         o = self.inner_activation(x_o + K.dot(h_tm1, self.U_o))
101 |         h = o * self.activation(c)
102 |         return h, c
103 | 
104 |     def get_output(self, go_backwards = False, train = False):
105 |         self.reset_states(train.shape[0])
106 |         inputs = train.dimshuffle((1, 0, 2))
107 |         results, _ = theano.scan(
108 |             self.step,
109 |             sequences=inputs,
110 |             outputs_info=[self.states[0],self.states[1]],
111 |             go_backwards=go_backwards)
112 |         '''
113 |         # deal with Theano API inconsistency
114 |         if type(results) is list:
115 |             outputs = results[0]
116 |             states = results[1:]
117 |         else:
118 |             outputs = results
119 |             states = []
120 |     
121 |         outputs = T.squeeze(outputs)
122 |         last_output = outputs[-1]
123 |         '''
124 |         
125 |         #outputs = np.asarray(results)[:,0]
126 |         #outputs = T.squeeze(outputs)
127 |         #outputs = outputs.dimshuffle((1, 0, 2))
128 |         
129 |         #states = [T.squeeze(state[-1]) for state in states]
130 |         #return last_output, outputs, states
131 |         
132 |         outputs = results[0]
133 |         outputs = T.squeeze(outputs)
134 |         outputs = outputs.dimshuffle((1, 0, 2))
135 |         return outputs
136 |     
137 |     
138 | class BLSTM():
139 |     def __init__(self, input_dim, output_dim,
140 |                  init='glorot_uniform', inner_init='orthogonal',
141 |                  forget_bias_init='one', activation='tanh',
142 |                  inner_activation='hard_sigmoid'):
143 |         #self.input_dim = input_dim
144 |         self.output_dim = int(output_dim / 2)
145 |         self.init = initializations.get(init)
146 |         self.inner_init = initializations.get(inner_init)
147 |         self.forget_bias_init = initializations.get(forget_bias_init)
148 |         self.activation = activations.get(activation)
149 |         self.inner_activation = activations.get(inner_activation)
150 |         
151 |         self.input_dim = input_dim
152 |         #self.input = K.placeholder(input_shape)
153 | 
154 |         # initial states: 2 all-zero tensor of shape (output_dim)
155 |         self.forward_lstm = LSTM(input_dim = input_dim, output_dim = self.output_dim)
156 |         self.backward_lstm = LSTM(input_dim = input_dim, output_dim = self.output_dim)
157 |         
158 |         self.params = self.forward_lstm.params + self.backward_lstm.params
159 | 
160 |         #if self.initial_weights is not None:
161 |         #    self.set_weights(self.initial_weights)
162 |         #    del self.initial_weights
163 | 
164 |     
165 | 
166 |     def get_output(self, train = False):
167 |         res_forward = self.forward_lstm.get_output(train)
168 |         res_backward = self.backward_lstm.get_output(train[:,::-1,:])
169 |         outputs = T.concatenate([res_forward, res_backward[:,::-1,:]], axis = -1)
170 |         return outputs
171 | 


--------------------------------------------------------------------------------
/src/pairwise.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys
  3 | import time
  4 | from collections import OrderedDict
  5 | import theano
  6 | import copy
  7 | from data_utils import load_data, prepare_data, get_minibatches_idx, data_padding
  8 | from model import build_model
  9 | from optimizer import sgd,rmsprop,adadelta
 10 | import theano.tensor as tensor
 11 | class Pairwise(object):
 12 |     def get_score(self, pre_sentence_list, cur_sentence, preds, pairdict):
 13 |         score = 0.0
 14 |         for pre_sentence in pre_sentence_list:
 15 |             idx = pairdict[(pre_sentence, cur_sentence)]
 16 |             score += np.log(preds[idx])
 17 |         return score
 18 |     def score_rank(self, sentence):
 19 |         n_total = 0
 20 |         n_correct = 0
 21 |         for i in range(len(sentence)):
 22 |             for j in range(i+1, len(sentence)):
 23 |                 n_total += 1
 24 |                 if sentence[i] < sentence[j]: n_correct += 1
 25 |         patial_correct = n_correct * 1.0 / n_total
 26 |         total_correct = 0.0
 27 |         if n_correct == n_total: total_correct = 1.0
 28 |         return patial_correct, total_correct
 29 |     def eva(self,
 30 |             f_pred, src_data, data, pairdict, kf, model_options):
 31 |         preds = []
 32 |         for _, data_index in kf:
 33 |             batch_samples = [data[t] for t in data_index]
 34 |             sentence1,sentence1_mask,sentence2,sentence2_mask,y = data_padding(batch_samples)
 35 |             preds.append(f_pred(sentence1,sentence1_mask,sentence2,sentence2_mask))
 36 |         preds = np.concatenate(preds, axis = 0) # 1d_array n_samples
 37 |         
 38 |         categories = []
 39 |         n_sentences = 0
 40 |         data_beams = [] # n_paragraph * n_sentences (sentence, score)
 41 |         for paragraph, cur_categories in src_data:
 42 |             categories.append(cur_categories)
 43 |             beam = []
 44 |             for s_id in xrange(len(paragraph)):
 45 |                 beam.append(([s_id + n_sentences],0.0))
 46 |             for nid in xrange(len(paragraph)-1):
 47 |                 new_beam = []
 48 |                 for item in beam:
 49 |                     for s_id in xrange(len(paragraph)):
 50 |                         new_sentence = item[0] + [s_id + n_sentences]
 51 |                         if len(set(new_sentence)) < nid + 2: continue # repeated elements occur
 52 |                         new_score = item[1] + self.get_score(item[0], s_id + n_sentences, preds, pairdict)
 53 |                         new_beam.append((new_sentence, new_score))
 54 |                 new_beam = sorted(new_beam, key=lambda item : -item[1]) #from high score to lower ones
 55 |                 beam = new_beam[:model_options['beam_size']]
 56 |             data_beams.append(beam)
 57 |             n_sentences += len(paragraph)
 58 |         
 59 |         top1_res = [] # sentence_rank, paragraph_categories
 60 |         eva_res = np.zeros((len(src_data),model_options['beam_size'],2)) # n_paragraph * beam_size * 2  patial_correct, total_correct
 61 |         for id_paragraph, beam in enumerate(data_beams):
 62 |             top1_res.append((beam[0][0], categories[id_paragraph]))
 63 |             for idx, (sentence, _) in enumerate(beam):
 64 |                 patial_correct, total_correct = self.score_rank(sentence)
 65 |                 eva_res[id_paragraph][idx] = np.asarray([patial_correct, total_correct])
 66 |         
 67 |         top = 1
 68 |         while top <= model_options['beam_size']:
 69 |             eva_res_top = np.max(eva_res[:,:top,:], axis = 1) # n_paragraph * 2  patial_correct, total_correct
 70 |             print 'Top %d beam ' % top
 71 |             average = np.average(eva_res_top, axis = 0)
 72 |             print 'patial_correct_rate: ', average[0]
 73 |             print 'total_correct_rate: ', average[1]
 74 |             top *= 2
 75 |         print ''
 76 |                  
 77 |         return top1_res
 78 |     def save_result(self,path,top1_res):
 79 |         fw = open(path,'w')
 80 |         for paragraph, cur_categories in top1_res:
 81 |             paragraph = np.asarray(paragraph) - np.min(paragraph)
 82 |             paragraph = list(paragraph)
 83 |             for sentence in paragraph:
 84 |                 fw.write(str(sentence))
 85 |                 fw.write(' ')
 86 |             fw.write('#')
 87 |             for category in cur_categories:
 88 |                 fw.write(category)
 89 |                 fw.write(' ')
 90 |             fw.write('\n')
 91 |         fw.close()
 92 |             
 93 |     def __init__(self,
 94 |                  alpha,
 95 |              batch_size,
 96 |              n_epochs,
 97 |              wordVecLen,
 98 |              flag_dropout,
 99 |              datapath,
100 |              random_seed,
101 |              dropoutRates,
102 |              optimizer,
103 |              dispFreq,
104 |              beam_size,
105 |              flag_random_lookup_table,
106 |              flag_toy_data,
107 |              size_hidden_layer,
108 |              dataset,
109 |              result_path,
110 |              sentence_modeling,
111 |              CNN_filter_length,
112 |              LSTM_go_backwards
113 |              ):
114 |         model_options = locals().copy()
115 |         model_options['rng'] = np.random.RandomState(random_seed)
116 |         print 'Loading data'
117 |         src_train,src_valid,src_test,dic_w2idx, dic_idx2w, dic_w2embed, dic_idx2embed, embedding = load_data(path=datapath)
118 |         if flag_toy_data == True:
119 |             src_valid = src_valid[:10]
120 |             src_test = src_test[:10] 
121 |             #src_train = copy.copy(src_valid)
122 |             src_train = src_train[:10]
123 |         elif flag_toy_data != False:
124 |             valid_l = len(src_valid) * flag_toy_data
125 |             test_l = len(src_test) * flag_toy_data
126 |             train_l = len(src_train) * flag_toy_data
127 |             src_valid = src_valid[:int(valid_l)]
128 |             src_test = src_test[:int(test_l)] 
129 |             src_train = src_train[:int(train_l)]
130 |             
131 |         train,pairdict_train = prepare_data(src_train)
132 |         valid,pairdict_valid = prepare_data(src_valid)
133 |         test,pairdict_test = prepare_data(src_test)
134 |         model_options['embedding'] = embedding
135 |         
136 |         (sentence1,sentence1_mask,sentence2,sentence2_mask,y,cost,f_pred,tparams,f_debug) = build_model(model_options)
137 |         #f_cost = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask,y], cost, name='f_cost')
138 |     
139 |         #grads = tensor.grad(theano.gradient.grad_clip(cost, -2.0, 2.0), wrt=tparams.values())
140 |         grads = tensor.grad(theano.gradient.grad_clip(cost, -2.0, 2.0), wrt=tparams)
141 |         # grads = tensor.grad(cost, wrt=tparams.values())
142 |         #f_grad = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask,y], grads, name='f_grad')
143 |     
144 |         lr = tensor.scalar(name='lr')
145 |         if model_options['optimizer'] == 'sgd': optimizer = sgd
146 |         elif model_options['optimizer'] == 'rmsprop': optimizer = rmsprop
147 |         else: optimizer = adadelta
148 |         f_grad_shared, f_update = optimizer(lr, tparams, grads, sentence1,sentence1_mask,sentence2,sentence2_mask,y, cost)
149 |         
150 |         
151 |         print 'Optimization'
152 | 
153 |         kf_valid = get_minibatches_idx(len(valid), model_options['batch_size'])
154 |         kf_test = get_minibatches_idx(len(test), model_options['batch_size'])
155 |     
156 |         print "%d train examples" % len(train)
157 |         print "%d valid examples" % len(valid)
158 |         print "%d test examples" % len(test)
159 |         sys.stdout.flush()
160 |         
161 |         
162 |         best_validation_score = -np.inf
163 |         best_iter = 0
164 |         uidx = 0  # the number of update done
165 |         for epoch in xrange(model_options['n_epochs']):
166 |             print ('Training on %d epoch' % epoch)
167 |             sys.stdout.flush()
168 |             kf = get_minibatches_idx(len(train), batch_size, shuffle=True)
169 |             start_time = time.time()
170 |             samples_seen = 0
171 |             for _, train_index in kf:
172 |                 uidx += 1
173 |                 batch_samples = [train[t] for t in train_index]
174 |                 samples_seen += len(batch_samples)
175 |                 #print batch_samples
176 |                 sentence1,sentence1_mask,sentence2,sentence2_mask,y = data_padding(batch_samples)
177 |                 #print sentence1,sentence1_mask,sentence2,sentence2_mask,y
178 |                 #print sentence1.shape,sentence1_mask.shape,sentence2.shape,sentence2_mask.shape,y.shape
179 |                 #o = f_debug(sentence1,sentence1_mask,sentence2,sentence2_mask,y)
180 |                 #print o
181 |                 #print o[0].shape,o[1].shape,o[2].shape,o[3].shape
182 |                 cost = f_grad_shared(sentence1,sentence1_mask,sentence2,sentence2_mask,y)
183 |                 f_update(model_options['alpha'])
184 |                 if np.isnan(cost) or np.isinf(cost):
185 |                     print 'NaN detected'
186 |                     return 1., 1., 1.
187 | 
188 |                 if np.mod(uidx, dispFreq) == 0:
189 |                     print 'Epoch ', epoch, 'Update ', uidx, 'Cost ', cost, 'Samples_seen ', samples_seen
190 |                     sys.stdout.flush()
191 |             print 'Epoch ', epoch, 'Update ', uidx, 'Cost ', cost, 'Samples_seen ', samples_seen
192 |             sys.stdout.flush()
193 |             '''
194 |             if epoch % 5 == 0:
195 |                 kf_train = get_minibatches_idx(len(train), batch_size)
196 |                 print ('Train_score:')
197 |                 self.eva(f_pred, src_train, train, pairdict_train, kf_train, model_options)
198 |                 sys.stdout.flush()
199 |             '''
200 |             print ('Valid_score:')
201 |             top1_res = self.eva(f_pred, src_valid, valid, pairdict_valid, kf_valid, model_options)
202 |             self.save_result(model_options['result_path'] + 'dev.on.' + str(epoch) +'th_epoch_' + model_options['dataset'],top1_res)
203 |             sys.stdout.flush()
204 |             print ('Test_score:')
205 |             top1_res = self.eva(f_pred, src_test, test, pairdict_test, kf_test, model_options)
206 |             self.save_result(model_options['result_path'] + 'test.on.' + str(epoch) +'th_epoch_' + model_options['dataset'],top1_res)
207 |             sys.stdout.flush()
208 |             
209 |             print ('%d epoch completed.' % epoch)
210 |             sys.stdout.flush()
211 |             '''
212 |             if(best_validation_score < valid_score):
213 |                 best_iter = epoch
214 |                 best_validation_score = valid_score
215 |             print ('Current best_dev_F is %.2f, at %d epoch'%(best_validation_score,best_iter))
216 |             '''
217 |         
218 |             end_time = time.time()
219 |             minu = int((end_time - start_time)/60)
220 |             sec = (end_time - start_time) - 60 * minu
221 |             print ('Time: %d min %.2f sec' % (minu, sec))
222 |             sys.stdout.flush()
223 |         print('Training completed!')
224 |         sys.stdout.flush()
225 |        
226 |         


--------------------------------------------------------------------------------