├── README.md
├── kge
    ├── data
    │   └── freebase15k
    │   │   ├── freebase_15k-id2entity.pkl
    │   │   ├── freebase_15k-id2relation.pkl
    │   │   ├── freebase_15k-test.pkl
    │   │   ├── freebase_15k-train.pkl
    │   │   └── freebase_15k-valid.pkl
    └── hole.py
├── pom.xml
├── python
    └── sansa
    │   ├── __init__.py
    │   └── ml
    │       ├── __init__.py
    │       └── kbc
    │           ├── __init__.py
    │           ├── keras
    │               ├── __init__.py
    │               ├── actfun.py
    │               ├── base.py
    │               ├── hole.py
    │               ├── holek.py
    │               ├── param.py
    │               ├── sample.py
    │               └── util.py
    │           └── rdfio.py
└── src
    └── main
        └── scala
            └── net
                └── sansa_stack
                    └── ml
                        └── kge
                            ├── Functions.scala
                            ├── Main.scala
                            ├── RDFDatasetReader.scala
                            └── model
                                └── TransE.scala


/README.md:
--------------------------------------------------------------------------------
 1 | # Spark-Tensors
 2 | Temporary repository for implementing tensor factorization algorithms on Apache Spark
 3 | 
 4 | Currently I am working on the following 3 algorithms:
 5 | 
 6 | 1. PARAFAC (parallel algorithms given here: [GigaTensor](https://www.cs.cmu.edu/~epapalex/papers/gigatensor_KDD2012.pdf), [U. Kang's PhD thesis](http://datalab.snu.ac.kr/~ukang/papers/KangThesis.pdf)) Also check the [HaTen2 paper](https://www.cs.cmu.edu/~epapalex/papers/haten2_icde2015.pdf) that apparently improves upon GigaTensor.
 7 | 2. RESCAL [RESCAL paper](http://www.icml-2011.org/papers/438_icmlpaper.pdf), [M. Nickel's PhD thesis](http://edoc.ub.uni-muenchen.de/16056/1/Nickel_Maximilian.pdf) (Spark-based distributed algorithm will be designed for this)
 8 | 3. HolE [Holographic Embeddings of Knowledge Graphs](http://arxiv.org/pdf/1510.04935v2) (Spark-based distributed algorithm will be designed for this)
 9 | 
10 | This will be divided across the Spark-RDF (interface, I/O, storage) and Spark-Sem-ML (algorithm) repositories eventually.
11 | 


--------------------------------------------------------------------------------
/kge/data/freebase15k/freebase_15k-id2entity.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SANSA-Stack/Spark-Tensors/9d834e75b917c6c476f426ebab47eec0830f190b/kge/data/freebase15k/freebase_15k-id2entity.pkl


--------------------------------------------------------------------------------
/kge/data/freebase15k/freebase_15k-id2relation.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SANSA-Stack/Spark-Tensors/9d834e75b917c6c476f426ebab47eec0830f190b/kge/data/freebase15k/freebase_15k-id2relation.pkl


--------------------------------------------------------------------------------
/kge/data/freebase15k/freebase_15k-test.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SANSA-Stack/Spark-Tensors/9d834e75b917c6c476f426ebab47eec0830f190b/kge/data/freebase15k/freebase_15k-test.pkl


--------------------------------------------------------------------------------
/kge/data/freebase15k/freebase_15k-train.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SANSA-Stack/Spark-Tensors/9d834e75b917c6c476f426ebab47eec0830f190b/kge/data/freebase15k/freebase_15k-train.pkl


--------------------------------------------------------------------------------
/kge/data/freebase15k/freebase_15k-valid.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SANSA-Stack/Spark-Tensors/9d834e75b917c6c476f426ebab47eec0830f190b/kge/data/freebase15k/freebase_15k-valid.pkl


--------------------------------------------------------------------------------
/kge/hole.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from __future__ import print_function
  4 | 
  5 | from collections import OrderedDict
  6 | import keras
  7 | import theano as th
  8 | import theano.tensor as T
  9 | 
 10 | from keras import backend as K
 11 | from keras.optimizers import Adagrad, SGD
 12 | import keras
 13 | from keras.engine.topology import Layer
 14 | from keras.models import Sequential, Model
 15 | from keras.layers import merge, Input, Embedding, Dropout, Convolution1D, Lambda, Activation, LSTM, Dense, TimeDistributed, \
 16 |     ActivityRegularization, Reshape, Flatten
 17 | from keras.constraints import unitnorm
 18 | 
 19 | import os
 20 | import sys
 21 | import random
 22 | import numpy as np
 23 | from time import strftime, gmtime
 24 | import six.moves.cPickle as pickle
 25 | from keras.optimizers import RMSprop, Adam, SGD, Adadelta, Adagrad
 26 | from scipy.stats import rankdata
 27 | 
 28 | __author__ = 'nilesh'
 29 | 
 30 | class KgeModel:
 31 |     def __init__(self, config):
 32 |         self.subject = Input(shape=(config['subject_len'],), dtype='int32', name='subject_base')
 33 |         self.subject_bad = Input(shape=(config['subject_len'],), dtype='int32', name='subject_bad_base')
 34 |         self.relation = Input(shape=(config['relation_len'],), dtype='int32', name='relation_base')
 35 |         self.object_good = Input(shape=(config['object_len'],), dtype='int32', name='object_good_base')
 36 |         self.object_bad = Input(shape=(config['object_len'],), dtype='int32', name='object_bad_base')
 37 | 
 38 |         self.config = config
 39 |         self.model_params = config.get('model_params', dict())
 40 |         self.similarity_params = config.get('similarity_params', dict())
 41 | 
 42 |         # initialize a bunch of variables that will be set later
 43 |         self._models = None
 44 |         self._similarities = None
 45 |         self._object = None
 46 |         self._subject = None
 47 |         self._kge_model = None
 48 | 
 49 |         self.training_model = None
 50 |         self.prediction_model = None
 51 | 
 52 |     def get_object(self):
 53 |         if self._object is None:
 54 |             self._object = Input(shape=(self.config['object_len'],), dtype='int32', name='object')
 55 |         return self._object
 56 | 
 57 |     def get_subject(self):
 58 |         if self._subject is None:
 59 |             self._subject = Input(shape=(self.config['subject_len'],), dtype='int32', name='subject')
 60 |         return self._subject
 61 | 
 62 |     # @abstractmethod
 63 |     def build(self):
 64 |         return
 65 | 
 66 |     def get_similarity(self):
 67 |         ''' Specify similarity in configuration under 'similarity_params' -> 'mode'
 68 |         If a parameter is needed for the model, specify it in 'similarity_params'
 69 |         Example configuration:
 70 |         config = {
 71 |             ... other parameters ...
 72 |             'similarity_params': {
 73 |                 'mode': 'gesd',
 74 |                 'gamma': 1,
 75 |                 'c': 1,
 76 |             }
 77 |         }
 78 |         cosine: dot(a, b) / sqrt(dot(a, a) * dot(b, b))
 79 |         polynomial: (gamma * dot(a, b) + c) ^ d
 80 |         sigmoid: tanh(gamma * dot(a, b) + c)
 81 |         rbf: exp(-gamma * l2_norm(a-b) ^ 2)
 82 |         euclidean: 1 / (1 + l2_norm(a - b))
 83 |         exponential: exp(-gamma * l2_norm(a - b))
 84 |         gesd: euclidean * sigmoid
 85 |         aesd: (euclidean + sigmoid) / 2
 86 |         '''
 87 | 
 88 |         params = self.similarity_params
 89 |         similarity = params['mode']
 90 | 
 91 |         axis = lambda a: len(a._keras_shape) - 1
 92 |         dot = lambda a, b: K.batch_dot(a, b, axes=axis(a))
 93 |         l2_norm = lambda a, b: K.sqrt(K.sum((a - b) ** 2, axis=axis(a), keepdims=True))
 94 |         l1_norm = lambda a, b: K.sum(K.abs(a - b), axis=axis(a), keepdims=True)
 95 | 
 96 |         if similarity == 'cosine':
 97 |             return lambda x: dot(x[0], x[1]) / K.sqrt(dot(x[0], x[0]) * dot(x[1], x[1]))
 98 |         elif similarity == 'polynomial':
 99 |             return lambda x: (params['gamma'] * dot(x[0], x[1]) + params['c']) ** params['d']
100 |         elif similarity == 'sigmoid':
101 |             return lambda x: K.tanh(params['gamma'] * dot(x[0], x[1]) + params['c'])
102 |         elif similarity == 'rbf':
103 |             return lambda x: K.exp(-1 * params['gamma'] * l2_norm(x[0], x[1]) ** 2)
104 |         elif similarity == 'euclidean':
105 |             return lambda x: 1 / (1 + l2_norm(x[0], x[1]))
106 |         elif similarity == 'l1':
107 |             return lambda x: -l1_norm(x[0], x[1])
108 |         elif similarity == 'exponential':
109 |             return lambda x: K.exp(-1 * params['gamma'] * l2_norm(x[0], x[1]))
110 |         elif similarity == 'gesd':
111 |             euclidean = lambda x: 1 / (1 + l2_norm(x[0], x[1]))
112 |             sigmoid = lambda x: 1 / (1 + K.exp(-1 * params['gamma'] * (dot(x[0], x[1]) + params['c'])))
113 |             return lambda x: euclidean(x) * sigmoid(x)
114 |         elif similarity == 'aesd':
115 |             euclidean = lambda x: 0.5 / (1 + l2_norm(x[0], x[1]))
116 |             sigmoid = lambda x: 0.5 / (1 + K.exp(-1 * params['gamma'] * (dot(x[0], x[1]) + params['c'])))
117 |             return lambda x: euclidean(x) + sigmoid(x)
118 |         else:
119 |             raise Exception('Invalid similarity: {}'.format(similarity))
120 | 
121 |     def get_kge_model(self):
122 |         if self._models is None:
123 |             self._models = self.build()
124 | 
125 |         if self._kge_model is None:
126 |             subject_output, relation_output, object_output = self._models
127 | 
128 |             # relation_output2 = Reshape((100,100))(relation_output)
129 |             sp_output = merge([subject_output, relation_output], mode='sum')
130 |             # so_output = merge([subject_output, object_output], mode=lambda x: np.outer(x[0], x[1]).reshape(100000,))
131 |             # spo_output = merge([sp_output, Reshape((0,100))(object_output)], mode=lambda a, b: K.batch_dot(a, b, axes=len(a._keras_shape) - 1),
132 |             #                    output_shape=lambda x: x[0])
133 |             spo_output = merge([sp_output, object_output], mode='sum',  output_shape=lambda x: x[:-1])
134 | 
135 |             self._kge_model = Model(input=[self.subject, self.relation, self.get_object()], output=[spo_output])
136 |         return self._kge_model
137 | 
138 | 
139 |     def compile(self, optimizer, **kwargs):
140 |         kge_model = self.get_kge_model()
141 | 
142 |         good_output = kge_model([self.subject, self.relation, self.object_good])
143 |         bad_output = kge_model([self.subject, self.relation, self.object_bad])
144 | 
145 |         loss = merge([good_output, bad_output],
146 |                      mode=lambda x: K.maximum(1e-6, self.config['margin'] - x[0] + x[1]),
147 |                      output_shape=lambda x: x[0])
148 | 
149 |         self.training_model = Model(input=[self.subject, self.relation, self.object_good, self.object_bad], output=loss)
150 |         self.training_model.compile(loss=lambda y_true, y_pred: y_pred + y_true - y_true, optimizer=optimizer, **kwargs)
151 | 
152 |         self.prediction_model = Model(input=[self.subject, self.relation, self.object_good], output=good_output)
153 |         self.prediction_model.compile(loss='binary_crossentropy', optimizer=optimizer, **kwargs)
154 |         self.training_model.summary()
155 | 
156 |     def fit(self, x, **kwargs):
157 |         assert self.training_model is not None, 'Must compile the model before fitting data'
158 |         y = np.zeros(shape=x[0].shape[:1])
159 |         return self.training_model.fit(x, y, **kwargs)
160 | 
161 | 
162 |     def train_on_batch(self, x, **kwargs):
163 |         assert self.training_model is not None, 'Must compile the model before fitting data'
164 |         y = np.zeros(shape=x[0].shape[:1])
165 |         return self.training_model.train_on_batch(x, y, **kwargs)
166 | 
167 |     def predict(self, x, **kwargs):
168 |         return self.prediction_model.predict(x, **kwargs)
169 | 
170 |     def save_weights(self, file_name, **kwargs):
171 |         assert self.prediction_model is not None, 'Must compile the model before saving weights'
172 |         self.prediction_model.save_weights(file_name, **kwargs)
173 | 
174 |     def load_weights(self, file_name, **kwargs):
175 |         assert self.prediction_model is not None, 'Must compile the model loading weights'
176 |         self.prediction_model.load_weights(file_name, **kwargs)
177 | 
178 | 
179 | 
180 | 
181 | class RescalModel(KgeModel):
182 |     def build(self):
183 |         subject = self.subject
184 |         relation = self.relation
185 |         object_ = self.get_object()
186 |         embedding_size = self.model_params.get('n_embed_dims', 100)
187 | 
188 |         # add embedding layers
189 |         embedding_rel = Embedding(input_dim=self.config['n_rel'],
190 |                                   output_dim=self.model_params.get('n_embed_dims', 100),
191 |                                   init='he_uniform',
192 |                                   mask_zero=False)
193 |         embedding_ent = Embedding(input_dim=self.config['n_ent'],
194 |                                   output_dim=self.model_params.get('n_embed_dims', 100),
195 |                                   init='he_uniform',
196 |                                   W_constraint=unitnorm(axis=1),
197 |                                   mask_zero=False)
198 |         subject_embedding = embedding_ent(subject)
199 |         relation_embedding = embedding_rel(relation)
200 |         object_embedding = embedding_ent(object_)
201 | 
202 |         subject_output = Reshape((embedding_size,))(subject_embedding)
203 |         relation_output = Reshape((embedding_size,))(relation_embedding)
204 |         object_output = Reshape((embedding_size,))(object_embedding)
205 | 
206 |         return subject_output, relation_output, object_output
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | random.seed(42)
217 | os.environ['FREEBASE_15K'] = 'data/freebase15k'
218 | 
219 | 
220 | class Evaluator:
221 |     def __init__(self, conf=None):
222 |         try:
223 |             data_path = os.environ['FREEBASE_15K']
224 |         except KeyError:
225 |             print("FREEBASE_15K is not set.")
226 |             sys.exit(1)
227 |         self.path = data_path
228 |         self.conf = dict() if conf is None else conf
229 |         self.params = conf.get('training_params', dict())
230 |         self.entity = self.load('freebase_15k-id2entity.pkl')
231 |         self._vocab = None
232 |         self._reverse_vocab = None
233 |         self._eval_sets = None
234 | 
235 |     ##### Resources #####
236 | 
237 |     def load(self, name):
238 |         return pickle.load(open(os.path.join(self.path, name), 'rb'))
239 | 
240 |     def vocab(self):
241 |         if self._vocab is None:
242 |             self._vocab = self.load('vocabulary')
243 |         return self._vocab
244 | 
245 |     def reverse_vocab(self):
246 |         if self._reverse_vocab is None:
247 |             vocab = self.vocab()
248 |             self._reverse_vocab = dict((v.lower(), k) for k, v in vocab.items())
249 |         return self._reverse_vocab
250 | 
251 |     ##### Loading / saving #####
252 | 
253 |     def save_epoch(self, model, epoch):
254 |         if not os.path.exists('models/freebase_models/embedding/'):
255 |             os.makedirs('models/freebase_models/embedding/')
256 |         model.save_weights('models/freebase_models/embedding/weights_epoch_%d.h5' % epoch, overwrite=True)
257 | 
258 |     def load_epoch(self, model, epoch):
259 |         assert os.path.exists('models/freebase_models/embedding/weights_epoch_%d.h5' % epoch),\
260 |             'Weights at epoch %d not found' % epoch
261 |         model.load_weights('models/freebase_models/embedding/weights_epoch_%d.h5' % epoch)
262 | 
263 |     ##### Converting / reverting #####
264 | 
265 |     def convert(self, words):
266 |         rvocab = self.reverse_vocab()
267 |         if type(words) == str:
268 |             words = words.strip().lower().split(' ')
269 |         return [rvocab.get(w, 0) for w in words]
270 | 
271 |     def revert(self, indices):
272 |         vocab = self.vocab()
273 |         return [vocab.get(i, 'X') for i in indices]
274 | 
275 |     ##### Padding #####
276 | 
277 |     def padq(self, data):
278 |         return self.pad(data, self.conf.get('question_len', None))
279 | 
280 |     def pada(self, data):
281 |         return self.pad(data, self.conf.get('answer_len', None))
282 | 
283 |     def pad(self, data, len=None):
284 |         from keras.preprocessing.sequence import pad_sequences
285 |         return pad_sequences(data, maxlen=len, padding='post', truncating='post', value=0)
286 | 
287 |     ##### Training #####
288 | 
289 |     def print_time(self):
290 |         print(strftime('%Y-%m-%d %H:%M:%S :: ', gmtime()), end='')
291 | 
292 |     def train(self, model):
293 |         eval_every = self.params.get('eval_every', None)
294 |         save_every = self.params.get('save_every', None)
295 |         batch_size = self.params.get('batch_size', 128)
296 |         nb_epoch = self.params.get('nb_epoch', 10)
297 |         split = self.params.get('validation_split', 0)
298 | 
299 |         training_set = self.load('freebase_15k-train.pkl')
300 |         valid_set = self.load('freebase_15k-valid.pkl')
301 | 
302 |         subjects = list()
303 |         relations = list()
304 |         good_objects = list()
305 | 
306 |         for line in training_set:
307 |             triplet = line.split('\t')
308 |             subjects += [[int(triplet[0])]]
309 |             relations += [[int(triplet[1])]]
310 |             good_objects += [[int(triplet[2])]]
311 | 
312 |         subjects = np.asarray(subjects)
313 |         relations = np.asarray(relations)
314 |         good_objects = np.asarray(good_objects)
315 | 
316 |         # subjects_valid = list()
317 |         # relations_valid = list()
318 |         # good_objects_valid = list()
319 |         #
320 |         # for line in valid_set:
321 |         #     triplet = line.split('\t')
322 |         #     subjects_valid += [[int(triplet[0])]]
323 |         #     relations_valid += [[int(triplet[1])]]
324 |         #     good_objects_valid += [[int(triplet[2])]]
325 | 
326 |         # subjects_valid = np.asarray(subjects_valid)
327 |         # relations_valid = np.asarray(relations_valid)
328 |         # good_objects_valid = np.asarray(good_objects_valid)
329 | 
330 |         val_loss = {'loss': 1., 'epoch': 0}
331 | 
332 |         for i in range(1, nb_epoch+1):
333 |             # bad_answers = np.roll(good_answers, random.randint(10, len(questions) - 10))
334 |             # bad_answers = good_answers.copy()
335 |             # random.shuffle(bad_answers)
336 |             bad_objects = np.asarray([[int(random.choice(list(self.entity.keys())))] for _ in range(len(good_objects))])
337 | 
338 |             # shuffle questionsj
339 |             # zipped = zip(questions, good_answers)
340 |             # random.shuffle(zipped)
341 |             # questions[:], good_answers[:] = zip(*zipped)
342 | 
343 |             print('Epoch %d :: ' % i, end='')
344 |             self.print_time()
345 |             model.fit([subjects, relations, good_objects, bad_objects], nb_epoch=1, batch_size=batch_size)
346 | 
347 |             # if hist.history['val_loss'][0] < val_loss['loss']:
348 |             #     val_loss = {'loss': hist.history['val_loss'][0], 'epoch': i}
349 |             # print('Best: Loss = {}, Epoch = {}'.format(val_loss['loss'], val_loss['epoch']))
350 | 
351 |             if eval_every is not None and i % eval_every == 0:
352 |                 self.get_mrr(model)
353 | 
354 |             if save_every is not None and i % save_every == 0:
355 |                 self.save_epoch(model, i)
356 | 
357 |     ##### Evaluation #####
358 | 
359 |     def prog_bar(self, so_far, total, n_bars=20):
360 |         n_complete = int(so_far * n_bars / total)
361 |         if n_complete >= n_bars - 1:
362 |             print('\r[' + '=' * n_bars + ']', end='')
363 |         else:
364 |             s = '\r[' + '=' * (n_complete - 1) + '>' + '.' * (n_bars - n_complete) + ']'
365 |             print(s, end='')
366 | 
367 |     def eval_sets(self):
368 |         if self._eval_sets is None:
369 |             self._eval_sets = dict([(s, self.load(s)) for s in ['freebase_15k-test.pkl']])
370 |         return self._eval_sets
371 | 
372 |     def get_mrr(self, model, evaluate_all=False):
373 |         top1s = list()
374 |         mrrs = list()
375 |         for name, data in self.eval_sets().items():
376 |             if evaluate_all:
377 |                 self.print_time()
378 |                 print('----- %s -----' % name)
379 | 
380 |             random.shuffle(data)
381 | 
382 |             if not evaluate_all and 'n_eval' in self.params:
383 |                 data = data[:self.params['n_eval']]
384 | 
385 |             # c_1 for hit@1, c_3 for hit@3, c_10 for hit@10
386 |             c_1, c_3, c_10 = 0, 0, 0
387 |             mean_ranks = list()
388 | 
389 |             for i, d in enumerate(data):
390 |                 triplet = d.split('\t')
391 |                 if evaluate_all:
392 |                     self.prog_bar(i, len(data))
393 | 
394 |                 candidate_objects = self.entity.keys()
395 |                 candidate_objects.remove(int(triplet[2]))
396 | 
397 |                 subject = np.asarray([[int(triplet[0])]] * (len(candidate_objects)+1))
398 |                 relation = np.asarray([[int(triplet[1])]] * (len(candidate_objects)+1))
399 |                 objects = np.asarray([[int(triplet[2])]] + [[entity_id] for entity_id in candidate_objects])
400 |                 sims = model.predict([subject, relation, objects], batch_size=len(self.entity)).flatten()
401 |                 r = rankdata(sims, method='max')
402 | 
403 |                 target_rank = r[0]
404 |                 num_candidate = len(sims)
405 |                 real_rank = num_candidate - target_rank + 1
406 | 
407 |                 # print(' '.join(self.revert(d['question'])))
408 |                 # print(' '.join(self.revert(self.answers[indices[max_r]])))
409 |                 # print(' '.join(self.revert(self.answers[indices[max_n]])))
410 | 
411 |                 c_1 += 1 if target_rank == num_candidate else 0
412 |                 c_3 += 1 if target_rank + 3 > num_candidate else 0
413 |                 c_10 += 1 if target_rank + 10 > num_candidate else 0
414 |                 mean_ranks.append(real_rank)
415 |                 # c_2 += 1 / float(r[max_r] - r[max_n] + 1)
416 | 
417 |             hit_at_1 = c_1 / float(len(data))
418 |             hit_at_3 = c_3 / float(len(data))
419 |             hit_at_10 = c_10 / float(len(data))
420 |             avg_rank = np.mean(mean_ranks)
421 | 
422 |             del data
423 | 
424 |             if evaluate_all:
425 |                 print('Hit@1 Precision: %f' % hit_at_1)
426 |                 print('Hit@3 Precision: %f' % hit_at_3)
427 |                 print('Hit@10 Precision: %f' % hit_at_10)
428 |                 print('Mean Rank: %f' % avg_rank)
429 | 
430 |             # top1s.append(top1)
431 |             # mrrs.append(mrr)
432 | 
433 |         # rerun the evaluation if above some threshold
434 |         if not evaluate_all:
435 |             print('Top-1 Precision: {}'.format(top1s))
436 |             print('MRR: {}'.format(mrrs))
437 |             evaluate_all_threshold = self.params.get('evaluate_all_threshold', dict())
438 |             evaluate_mode = evaluate_all_threshold.get('mode', 'all')
439 |             mrr_theshold = evaluate_all_threshold.get('mrr', 1)
440 |             top1_threshold = evaluate_all_threshold.get('top1', 1)
441 | 
442 |             if evaluate_mode == 'any':
443 |                 evaluate_all = evaluate_all or any([x >= top1_threshold for x in top1s])
444 |                 evaluate_all = evaluate_all or any([x >= mrr_theshold for x in mrrs])
445 |             else:
446 |                 evaluate_all = evaluate_all or all([x >= top1_threshold for x in top1s])
447 |                 evaluate_all = evaluate_all or all([x >= mrr_theshold for x in mrrs])
448 | 
449 |             if evaluate_all:
450 |                 return self.get_mrr(model, evaluate_all=True)
451 | 
452 | if __name__ == '__main__':
453 |     conf = {
454 |         'subject_len': 1,
455 |         'relation_len': 1,
456 |         'object_len': 1,
457 |         'n_rel': 1345,  # len(vocabulary)
458 |         'n_ent': 14951,
459 |         'margin': 0.2,
460 | 
461 |         'training_params': {
462 |             'save_every': 100,
463 |             'eval_every': 1,
464 |             'batch_size': 128,
465 |             'nb_epoch': 1000,
466 |             'validation_split': 0,
467 |             'optimizer': Adam(),
468 |             # 'optimizer': Adam(clip_norm=0.1),
469 |             # 'n_eval': 100,
470 | 
471 |             'evaluate_all_threshold': {
472 |                 'mode': 'all',
473 |                 'top1': 0.4,
474 |             },
475 |         },
476 | 
477 |         'model_params': {
478 |             'n_embed_dims': 100,
479 |             'n_hidden': 200,
480 | 
481 |             # convolution
482 |             'nb_filters': 1000, # * 4
483 |             'conv_activation': 'relu',
484 | 
485 |             # recurrent
486 |             'n_lstm_dims': 141, # * 2
487 | 
488 |             # 'initial_embed_weights': np.load('models/wordnet_word2vec_1000_dim.h5'),
489 |         },
490 | 
491 |         'similarity_params': {
492 |             'mode': 'cosine',
493 |             'gamma': 1,
494 |             'c': 1,
495 |             'd': 2,
496 |         }
497 |     }
498 | 
499 |     evaluator = Evaluator(conf)
500 | 
501 |     ##### Embedding model ######
502 |     model = RescalModel(conf)
503 |     optimizer = conf.get('training_params', dict()).get('optimizer', 'adam')
504 | 
505 |     # TransE model
506 |     # model = TranEModel(conf)
507 |     # optimizer = conf.get('training_params', dict()).get('optimizer', 'adam')
508 | 
509 |     model.compile(optimizer=optimizer)
510 | 
511 |     # save embedding layer
512 |     # evaluator.load_epoch(model, 33)
513 |     # embedding_layer = model.prediction_model.layers[2].layers[2]
514 |     # evaluator.load_epoch(model, 100)
515 |     # evaluator.train(model)
516 |     # weights = embedding_layer.get_weights()[0]
517 |     # np.save(open('models/embedding_1000_dim.h5', 'wb'), weights)
518 | 
519 |     # train the model
520 |     # evaluator.load_epoch(model, 54)
521 |     evaluator.train(model)
522 |     # embedding_matrix = model.prediction_model.layers[3].layers[3].get_weights()[0]
523 |     # print(np.linalg.norm(embedding_matrix[1, :]))
524 |     # print(np.linalg.norm(embedding_matrix[:, 1]))
525 | 
526 |     # evaluate mrr for a particular epoch
527 |     # evaluator.load_epoch(model, 5)
528 |     # evaluator.get_mrr(model, evaluate_all=True)
529 | 
530 | 
531 | 
532 | 
533 | 
534 | 
535 | 
536 | 
537 | 
538 | 
539 | # class HolE(Layer):
540 | #     def __init__(self, ndim=50, marge=1., lremb=0.1, lrparam=1., **kwargs):
541 | #         super().__init__(**kwargs)
542 | #         self.ndim = ndim
543 | #         self.marge = marge
544 | #         self.lremb = lremb
545 | #         self.lrparam = lrparam
546 | 
547 | 
548 | 
549 | 
550 | # import itertools
551 | # import logging
552 | # import numpy as np
553 | # import os
554 | # import time
555 | # import theano as th
556 | # import theano.tensor as T
557 | # from .gradient_descent import gd
558 | # from ..data_structures import triple_tensor as tt
559 | # from ..experiments.metrics import auprc
560 | # from .optimization import sgd_on_triples
561 | # from ..experiments.helper import tolist
562 | # _log = logging.getLogger(__name__)
563 | # DTYPE = th.config.floatX  # @UndefinedVariable
564 | # def init_uniform(rng, n, d, dtype=np.float32):
565 | #     wbound = np.sqrt(6. / d)
566 | #     W_values = rng.uniform(low=-wbound, high=wbound, size=(d, n))
567 | #     W_values = W_values / np.sqrt(np.sum(W_values ** 2, axis=0))
568 | #     W_values = np.asarray(W_values, dtype=dtype)
569 | #     return W_values.T
570 | # class TranslationalEmbeddingsModel(object):
571 | #     """Translational Embeddings Model.
572 | #     Implementation of TransE:
573 | #     Antoine Bordes, Nicolas Usunier, Alberto Garcia-Duran, Jason Weston, Oksana
574 | #     Yakhnenko. Translating Embeddings for Modeling Multi-relational Data.
575 | #     NIPS 2013
576 | #     Parameters
577 | #     ----------
578 | #     consider_tc : bool
579 | #         Whether or not to consider information about type constraints in the
580 | #         data.
581 | #         Defaults to True.
582 | #     simfn : string.
583 | #         'L1' or 'L2' similarity function.
584 | #         Defaults to 'L1'.
585 | #     ndim : int
586 | #         Dimension of the latent embeddings (rank).
587 | #         Defaults to 50.
588 | #     marge : float
589 | #         Margin in the margin based ranking function (gamma in the paper).
590 | #         Defaults to 1.
591 | #     lremb : float
592 | #         Learning rate for latent embeddings.
593 | #         Defaults to 0.1.
594 | #     lrparam : float
595 | #         Learning rate for other parameters.
596 | #         Defaults to 1.0.
597 | #     mbatchsize : int
598 | #         Size of the minibatch.
599 | #         Defaults to 128.
600 | #     totepoches : int
601 | #         Maximum epoches (how often the model is trained on the complete
602 | #         dataset).
603 | #         Defaults to 500.
604 | #     neval : int
605 | #         Validate performance every nth minibatch.
606 | #         Defaults to 1.
607 | #     lcwa : bool
608 | #         If true and consider_tc is True, approximate the type constraints from
609 | #         the data with the local closed-world assumption.
610 | #         Defaults to `False`.
611 | #     seed : int
612 | #         Seed used for random number generation.
613 | #         Defaults to 123.
614 | #     savepath : string
615 | #         Location where to save the best model parameters.
616 | #         Defaults to ./transE.
617 | #     """
618 | #     def __init__(self, consider_tc=True, simfn='L1', ndim=50, marge=1.,
619 | #                  lremb=0.1, lrparam=1., mbatchsize=128, maxepoch=500,
620 | #                  neval=100, lcwa=False, seed=123, conv=1e-4,
621 | #                  savepath='./transE', dtype=DTYPE,
622 | #                  mid=np.random.randint(1000000)):
623 | #         model_id = (time.strftime('%d_%m_%y___%H_%M_%S') +
624 | #                     '%d-%d_' % (mid, np.random.randint(100000)))
625 | #         self.simfn = simfn
626 | #         self.ndim = ndim
627 | #         self.marge = marge
628 | #         self.lremb = lremb
629 | #         self.lrparam = lrparam
630 | #         self.mbatchsize = mbatchsize
631 | #         self.maxepoch = maxepoch
632 | #         self.neval = neval
633 | #         self.seed = seed
634 | #         self.corrupted = 1
635 | #         self.corrupted_axes = [0, 1]
636 | #         self.rng = np.random.RandomState(seed)
637 | #         self.dtype = dtype
638 | #         self.consider_tc = consider_tc
639 | #         self.lcwa = lcwa
640 | #         self.conv = conv
641 | #         self.params = [ndim, marge, lremb, lrparam, simfn, seed, consider_tc,
642 | #                        lcwa]
643 | #         self.parallization_precautions = False
644 | #         self.savefile = os.path.join(savepath,
645 | #                                      model_id+type(self).__name__+".pkl")
646 | #         # create path where the model is saved
647 | #         if not os.path.isdir(savepath):
648 | #             os.mkdir(savepath)
649 | #     def __graph_pred(self, X):
650 | #         # Translational Embeddings Function d(h+l,t)
651 | #         e = self.E[X[:, :2].T.reshape((-1,))]
652 | #         h = e[:e.shape[0]//2]
653 | #         l = self.R[X[:, 2]]
654 | #         t = e[e.shape[0]//2:]
655 | #         return (-T.sum(T.abs_((h+l)-t), axis=1)
656 | #                 if self.simfn == 'L1'
657 | #                 else - T.sqrt(T.sum(T.sqr((h+l)-t), axis=1)))
658 | #     def __graph_train(self, X, Xc):
659 | #         # Translational Embeddings max-margin loss function
660 | #         E = self.E[T.concatenate([X[:, :2], Xc[:, :2]],
661 | #                                  axis=1).T.reshape((-1,))]
662 | #         R = self.R[T.concatenate([X[:, 2], Xc[:, 2]])]
663 | #         e = E[:E.shape[0]//2]
664 | #         h = e[:e.shape[0]//2]
665 | #         l = R[:R.shape[0]//2]
666 | #         t = e[e.shape[0]//2:]
667 | #         outputX = (-T.sum(T.abs_((h+l)-t), axis=1)
668 | #                    if self.simfn == 'L1'
669 | #                    else - T.sqrt(T.sum(T.sqr((h+l)-t), axis=1)))
670 | #         ec = E[E.shape[0]//2:]
671 | #         hc = ec[:ec.shape[0]//2]
672 | #         lc = R[R.shape[0]//2:]
673 | #         tc = ec[ec.shape[0]//2:]
674 | #         outputXc = (-T.sum(T.abs_((hc+lc)-tc), axis=1)
675 | #                     if self.simfn == 'L1'
676 | #                     else - T.sqrt(T.sum(T.sqr((hc+lc)-tc), axis=1)))
677 | #         loss = outputXc - outputX + self.marge
678 | #         return T.sum(loss * (loss > 0))
679 | #     def loss_func(self, indices, Y):
680 | #         # Metric used for early stopping
681 | #         return 1-auprc(Y, self.func(indices))
682 | #     def fit(self, tensor):
683 | #         if not self.consider_tc:
684 | #             # remove type-constraint information
685 | #             tensor.type_constraints = [[None, None]
686 | #                                        for i in xrange(tensor.shape[2])]
687 | #         elif self.lcwa:
688 | #             tensor.approximate_type_constraints()
689 | #         self.type_constraints = tensor.type_constraints
690 | #         self.Nent = tensor.shape[0]
691 | #         self.Nrel = tensor.shape[2]
692 | #         self.samplefunc = tt.compute_corrupted_bordes
693 | #         X = T.imatrix("X")  # matrices with triple indices
694 | #         Xc = T.imatrix("Xc")  # corrupted entities
695 | #         self.E = th.shared(
696 | #             value=init_uniform(self.rng, tensor.shape[0], self.ndim,
697 | #                                dtype=self.dtype), name="Ents_emb")
698 | #         self.R = th.shared(
699 | #             value=init_uniform(self.rng, tensor.shape[0], self.ndim,
700 | #                                dtype=self.dtype), name="Rels_emb")
701 | #         self.parameters = [self.E, self.R]
702 | #         # Output function TransE: d(h+l,t)
703 | #         self.func = th.function([X], self.__graph_pred(X))
704 | #         # Define the cost function
705 | #         loss_pos = self.__graph_train(X, Xc)
706 | #         # Normalization function for embeddings of entities:
707 | #         batch_idcs = T.ivector('batch_idcs')
708 | #         update = OrderedDict({self.E: T.set_subtensor(
709 | #             self.E[batch_idcs], self.E[batch_idcs] /
710 | #             T.sqrt(T.sum(self.E[batch_idcs] ** 2, axis=1, keepdims=True)))})
711 | #         self.normalize = th.function([batch_idcs], [], updates=update)
712 | #         # Update function
713 | #         self.update_func = gd([X, Xc], loss_pos, self.parameters,
714 | #                               lr=[self.lremb,
715 | #                                   self.lrparam/float(self.mbatchsize)])
716 | #         # Train the model with stg
717 | #         fitted_parameters, self.used_epochs, self.epoch_times = (
718 | #             sgd_on_triples(self.rng, tensor, self, neval=self.neval,
719 | #                            mbsize=self.mbatchsize, unlabeled=True,
720 | #                            copy_X_train=not self.parallization_precautions))
721 | #         for i, parameter in enumerate(fitted_parameters):
722 | #             self.parameters[i].set_value(parameter.get_value())
723 | #     @property
724 | #     def sparsity(self):
725 | #         raise NotImplementedError
726 | #     def clear(self):
727 | #         """Deletes the memory expensive parameters."""
728 | #         del self.E
729 | #         del self.R
730 | #         del self.parameters
731 | #         os.remove(self.savefile)
732 | #     def predict(self, indices):
733 | #         # This should be just d(h+l,t)
734 | #         return self.func(indices)
735 | #     @staticmethod
736 | #     def model_creator(settings):
737 | #         # For loading multiple model parameters from a configuration file
738 | #         confs = None
739 | #         if settings['try_all_reg_combinations']:
740 | #             confs = list(itertools.product(tolist(settings['rank']),
741 | #                                            tolist(settings['gamma']),
742 | #                                            tolist(settings['lrate_emb']),
743 | #                                            tolist(settings['lrate_par'])))
744 | #         else:
745 | #             confs = [[r, m, lr1, lr2]
746 | #                      for r, m, lr1, lr2 in
747 | #                      zip(tolist(settings['rank']),
748 | #                          tolist(settings['gamma']),
749 | #                          tolist(settings['lrate_emb']),
750 | #                          tolist(settings['lrate_par']))]
751 | #         confs = list(itertools.product(tolist(settings['seed']), confs))
752 | #         models = []
753 | #         for i, conf in enumerate(confs):
754 | #             s, conf = conf
755 | #             r, m, lr1, lr2 = conf
756 | #             models.append(TranslationalEmbeddingsModel(
757 | #                 consider_tc=settings['consider_tc'],
758 | #                 simfn=str.upper(settings['simfn']),
759 | #                 ndim=r,
760 | #                 marge=m,
761 | #                 lremb=lr1,
762 | #                 lrparam=lr2,
763 | #                 mbatchsize=settings['mbatchsize'],
764 | #                 maxepoch=settings['maxepoch'],
765 | #                 neval=settings['neval'],
766 | #                 lcwa=settings['lcwa'],
767 | #                 seed=s,
768 | #                 savepath=settings['savepath'],
769 | #                 mid=i))
770 | #         return models


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <modelVersion>4.0.0</modelVersion>
  6 | 
  7 |     <groupId>net.sansa_stack.ml.kge</groupId>
  8 |     <artifactId>sansa-kge</artifactId>
  9 |     <version>0.0.1-SNAPSHOT</version>
 10 |     <name>${project.artifactId}</name>
 11 |     <description>Knowledge graph factorization</description>
 12 |     <inceptionYear>2016</inceptionYear>
 13 | 
 14 |     <properties>
 15 |         <maven.compiler.source>1.8</maven.compiler.source>
 16 |         <maven.compiler.target>1.8</maven.compiler.target>
 17 |         <encoding>UTF-8</encoding>
 18 |         <spark.version>2.0.1</spark.version>
 19 |         <scala.version>2.11.7</scala.version>
 20 |         <scala.compat.version>2.11</scala.compat.version>
 21 |         <scala.classifier>${scala.compat.version}</scala.classifier>
 22 |     </properties>
 23 | 
 24 |     <scm>
 25 |         <url>https://github.com/SANSA-Stack/Spark-Tensors</url>
 26 |         <connection>scm:git:git://github.com/SANSA-Stack/Spark-Tensors.git</connection>
 27 |         <developerConnection>scm:git:git@github.com:SANSA-Stack/Spark-Tensors.git</developerConnection>
 28 |         <tag>HEAD</tag>
 29 |     </scm>
 30 | 
 31 |     <repositories>
 32 |         <repository>
 33 |             <id>oss-sonatype</id>
 34 |             <name>oss-sonatype</name>
 35 |             <url>https://oss.sonatype.org/content/repositories/snapshots/</url>
 36 |             <snapshots>
 37 |                 <enabled>true</enabled>
 38 |             </snapshots>
 39 |         </repository>
 40 |         <repository>
 41 |             <id>apache-snapshot</id>
 42 |             <name>Apache repository (snapshots)</name>
 43 |             <url>https://repository.apache.org/content/repositories/snapshots/</url>
 44 |             <snapshots>
 45 |                 <enabled>true</enabled>
 46 |             </snapshots>
 47 |         </repository>
 48 |         <repository>
 49 |             <id>maven.aksw.internal</id>
 50 |             <name>AKSW Release Repository</name>
 51 |             <url>http://maven.aksw.org/archiva/repository/internal</url>
 52 |             <releases>
 53 |                 <enabled>true</enabled>
 54 |             </releases>
 55 |             <snapshots>
 56 |                 <enabled>false</enabled>
 57 |             </snapshots>
 58 |         </repository>
 59 |         <repository>
 60 |             <id>maven.aksw.snapshots</id>
 61 |             <name>AKSW Snapshot Repository</name>
 62 |             <url>http://maven.aksw.org/archiva/repository/snapshots</url>
 63 |             <releases>
 64 |                 <enabled>false</enabled>
 65 |             </releases>
 66 |             <snapshots>
 67 |                 <enabled>true</enabled>
 68 |             </snapshots>
 69 |         </repository>
 70 |     </repositories>
 71 | 
 72 |     <dependencies>
 73 |         <dependency>
 74 |             <groupId>ml.dmlc.mxnet</groupId>
 75 |             <artifactId>mxnet-spark_${scala.compat.version}</artifactId>
 76 |             <version>0.10.1-SNAPSHOT</version>
 77 |             <exclusions>
 78 |                 <exclusion>
 79 |                     <groupId>*</groupId>
 80 |                     <artifactId>*</artifactId>
 81 |                 </exclusion>
 82 |             </exclusions>
 83 |         </dependency>
 84 |         <dependency>
 85 |             <groupId>ml.dmlc.mxnet</groupId>
 86 |             <artifactId>mxnet-full_${scala.compat.version}-${platform}</artifactId>
 87 |             <version>0.10.1-SNAPSHOT</version>
 88 |             <exclusions>
 89 |                 <exclusion>
 90 |                     <groupId>*</groupId>
 91 |                     <artifactId>*</artifactId>
 92 |                 </exclusion>
 93 |             </exclusions>
 94 |         </dependency>
 95 |         <dependency>
 96 |             <groupId>net.sansa-stack</groupId>
 97 |             <artifactId>sansa-rdf-spark-core</artifactId>
 98 |             <version>0.1.1-SNAPSHOT</version>
 99 |         </dependency>
100 |         <dependency>
101 |             <groupId>org.apache.spark</groupId>
102 |             <artifactId>spark-graphx_${scala.compat.version}</artifactId>
103 |             <version>${spark.version}</version>
104 |         </dependency>
105 |         <dependency>
106 |             <groupId>org.apache.spark</groupId>
107 |             <artifactId>spark-core_${scala.compat.version}</artifactId>
108 |             <version>${spark.version}</version>
109 |         </dependency>
110 |         <dependency>
111 |             <groupId>org.apache.spark</groupId>
112 |             <artifactId>spark-mllib_${scala.compat.version}</artifactId>
113 |             <version>${spark.version}</version>
114 |         </dependency>
115 |     </dependencies>
116 | 
117 |     <build>
118 |         <plugins>
119 |             <plugin>
120 |                 <groupId>org.apache.maven.plugins</groupId>
121 |                 <artifactId>maven-compiler-plugin</artifactId>
122 |             </plugin>
123 | 
124 |             <plugin>
125 |                 <groupId>net.alchim31.maven</groupId>
126 |                 <artifactId>scala-maven-plugin</artifactId>
127 |             </plugin>
128 | 
129 |             <plugin>
130 |                 <groupId>org.apache.maven.plugins</groupId>
131 |                 <artifactId>maven-surefire-plugin</artifactId>
132 |             </plugin>
133 | 
134 |             <plugin>
135 |                 <groupId>com.amashchenko.maven.plugin</groupId>
136 |                 <artifactId>gitflow-maven-plugin</artifactId>
137 |             </plugin>
138 |             <!-- <sourceDirectory>src/main/scala</sourceDirectory> -->
139 |             <!-- <testSourceDirectory>src/test/scala</testSourceDirectory> -->
140 |         </plugins>
141 | 
142 |         <pluginManagement>
143 |             <plugins>
144 | 
145 |                 <plugin>
146 |                     <groupId>org.apache.maven.plugins</groupId>
147 |                     <artifactId>maven-compiler-plugin</artifactId>
148 |                     <version>3.6.0</version>
149 |                     <configuration>
150 |                         <source>${maven.compiler.source}</source>
151 |                         <target>${maven.compiler.target}</target>
152 |                     </configuration>
153 |                 </plugin>
154 | 
155 |                 <plugin>
156 |                     <groupId>net.alchim31.maven</groupId>
157 |                     <artifactId>scala-maven-plugin</artifactId>
158 |                     <version>3.2.2</version>
159 |                     <!-- <configuration> -->
160 |                     <!-- <recompileMode>incremental</recompileMode> -->
161 |                     <!-- </configuration> -->
162 |                     <executions>
163 |                         <execution>
164 |                             <goals>
165 |                                 <goal>add-source</goal>
166 |                                 <goal>compile</goal>
167 |                                 <goal>testCompile</goal>
168 |                             </goals>
169 |                         </execution>
170 |                         <!-- <execution> -->
171 |                         <!-- <goals> -->
172 |                         <!-- <goal>add-source</goal> -->
173 |                         <!-- <configuration> -->
174 |                         <!-- <sourceDir>src/main/scala</sourceDir> -->
175 |                         <!-- <testSourceDir>src/main/test</testSourceDir> -->
176 |                         <!-- </configuration> -->
177 |                         <!-- </goals> -->
178 |                         <!-- </execution> -->
179 |                     </executions>
180 |                 </plugin>
181 | 
182 |                 <plugin>
183 |                     <groupId>org.apache.maven.plugins</groupId>
184 |                     <artifactId>maven-surefire-plugin</artifactId>
185 |                     <version>2.18.1</version>
186 |                     <configuration>
187 |                         <useFile>false</useFile>
188 |                         <disableXmlReport>true</disableXmlReport>
189 |                         <!-- If you have classpath issue like NoDefClassError,... -->
190 |                         <!-- useManifestOnlyJar>false</useManifestOnlyJar -->
191 |                         <includes>
192 |                             <include>**/*Test.*</include>
193 |                             <include>**/*Suite.*</include>
194 |                         </includes>
195 |                     </configuration>
196 |                 </plugin>
197 | 
198 |                 <plugin>
199 |                     <groupId>com.amashchenko.maven.plugin</groupId>
200 |                     <artifactId>gitflow-maven-plugin</artifactId>
201 |                     <version>1.3.1</version>
202 |                     <configuration>
203 |                         <gitFlowConfig>
204 |                             <versionTagPrefix>v</versionTagPrefix>
205 |                         </gitFlowConfig>
206 |                         <pushRemote>false</pushRemote>
207 |                     </configuration>
208 |                 </plugin>
209 | 
210 |                 <plugin>
211 |                     <groupId>org.apache.maven.plugins</groupId>
212 |                     <artifactId>maven-shade-plugin</artifactId>
213 |                     <version>2.4.3</version>
214 |                     <executions>
215 |                         <!-- Run shade goal on package phase -->
216 |                         <execution>
217 |                             <phase>package</phase>
218 |                             <goals>
219 |                                 <goal>shade</goal>
220 |                             </goals>
221 |                             <configuration>
222 |                                 <shadedArtifactAttached>true</shadedArtifactAttached>
223 |                                 <shadedClassifierName>jar-with-dependencies</shadedClassifierName>
224 |                                 <filters>
225 |                                     <filter>
226 |                                         <artifact>*:*</artifact>
227 |                                         <excludes>
228 |                                             <exclude>META-INF/*.SF</exclude>
229 |                                             <exclude>META-INF/*.DSA</exclude>
230 |                                             <exclude>META-INF/*.RSA</exclude>
231 |                                         </excludes>
232 |                                     </filter>
233 |                                 </filters>
234 |                                 <transformers>
235 |                                     <transformer
236 |                                             implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
237 |                                     <!-- add Main-Class to manifest file -->
238 |                                     <!-- <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> -->
239 |                                     <!-- <mainClass>com.mkyong.core.utils.App</mainClass> -->
240 |                                     <!-- </transformer> -->
241 |                                 </transformers>
242 |                             </configuration>
243 |                         </execution>
244 |                     </executions>
245 |                 </plugin>
246 | 
247 |                 <!--This plugin's configuration is used to store Eclipse m2e settings
248 |                     only. It has no influence on the Maven build itself. -->
249 |                 <plugin>
250 |                     <groupId>org.eclipse.m2e</groupId>
251 |                     <artifactId>lifecycle-mapping</artifactId>
252 |                     <version>1.0.0</version>
253 |                     <configuration>
254 |                         <lifecycleMappingMetadata>
255 |                             <pluginExecutions>
256 |                                 <pluginExecution>
257 |                                     <pluginExecutionFilter>
258 |                                         <groupId>
259 |                                             net.alchim31.maven
260 |                                         </groupId>
261 |                                         <artifactId>
262 |                                             scala-maven-plugin
263 |                                         </artifactId>
264 |                                         <versionRange>
265 |                                             [3.2.0,)
266 |                                         </versionRange>
267 |                                         <goals>
268 |                                             <goal>testCompile</goal>
269 |                                             <goal>compile</goal>
270 |                                             <goal>add-source</goal>
271 |                                         </goals>
272 |                                     </pluginExecutionFilter>
273 |                                     <action>
274 |                                         <ignore></ignore>
275 |                                     </action>
276 |                                 </pluginExecution>
277 |                             </pluginExecutions>
278 |                         </lifecycleMappingMetadata>
279 |                     </configuration>
280 |                 </plugin>
281 | 
282 |             </plugins>
283 |         </pluginManagement>
284 |     </build>
285 | 
286 |     <profiles>
287 |         <profile>
288 |             <id>doclint-java8-disable</id>
289 |             <activation>
290 |                 <jdk>[1.8,)</jdk>
291 |             </activation>
292 | 
293 |             <build>
294 |                 <plugins>
295 |                     <plugin>
296 |                         <groupId>org.apache.maven.plugins</groupId>
297 |                         <artifactId>maven-javadoc-plugin</artifactId>
298 |                         <version>2.9.1</version>
299 |                         <executions>
300 |                             <execution>
301 |                                 <id>attach-javadocs</id>
302 |                                 <goals>
303 |                                     <goal>jar</goal>
304 |                                 </goals>
305 |                                 <configuration>
306 |                                     <failOnError>false</failOnError>
307 |                                 </configuration>
308 |                             </execution>
309 |                         </executions>
310 |                         <configuration>
311 |                             <additionalparam>-Xdoclint:none</additionalparam>
312 |                         </configuration>
313 |                     </plugin>
314 |                 </plugins>
315 |             </build>
316 |         </profile>
317 |         <profile>
318 |             <id>osx-x86_64-cpu</id>
319 |             <activation>
320 |                 <os>
321 |                     <family>mac</family>
322 |                     <arch>x86_64</arch>
323 |                 </os>
324 |             </activation>
325 |             <properties>
326 |                 <platform>osx-x86_64-cpu</platform>
327 |             </properties>
328 |         </profile>
329 |         <profile>
330 |             <id>linux-x86_64-cpu</id>
331 |             <activation>
332 |                 <os>
333 |                     <family>linux</family>
334 |                 </os>
335 |             </activation>
336 |             <properties>
337 |                 <platform>linux-x86_64-cpu</platform>
338 |             </properties>
339 |         </profile>
340 |         <profile>
341 |             <id>linux-x86_64-gpu</id>
342 |             <properties>
343 |                 <platform>linux-x86_64-gpu</platform>
344 |             </properties>
345 |         </profile>
346 |         <profile>
347 |             <id>release</id>
348 |             <build>
349 |                 <plugins>
350 |                     <plugin>
351 |                         <groupId>org.apache.maven.plugins</groupId>
352 |                         <artifactId>maven-gpg-plugin</artifactId>
353 |                         <version>1.6</version>
354 |                         <executions>
355 |                             <execution>
356 |                                 <id>sign-artifacts</id>
357 |                                 <phase>verify</phase>
358 |                                 <goals>
359 |                                     <goal>sign</goal>
360 |                                 </goals>
361 |                                 <configuration>
362 |                                     <keyname>AKSW</keyname>
363 |                                     <passphraseServerId>${gpg.keyname}</passphraseServerId>
364 |                                 </configuration>
365 |                             </execution>
366 |                         </executions>
367 |                     </plugin>
368 |                 </plugins>
369 |             </build>
370 |         </profile>
371 |     </profiles>
372 | </project>


--------------------------------------------------------------------------------
/python/sansa/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'nilesh'
2 | 


--------------------------------------------------------------------------------
/python/sansa/ml/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'nilesh'
2 | 


--------------------------------------------------------------------------------
/python/sansa/ml/kbc/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'nilesh'
2 | 
3 | import keras


--------------------------------------------------------------------------------
/python/sansa/ml/kbc/keras/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'nilesh'
2 | 
3 | from .hole import HolE
4 | from .base import StochasticTrainer, PairwiseStochasticTrainer, KerasTrainer
5 | from .actfun import afuns as activation_functions
6 | 


--------------------------------------------------------------------------------
/python/sansa/ml/kbc/keras/actfun.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | import inspect
 4 | 
 5 | 
 6 | class ActivationFunction(object):
 7 | 
 8 |     @classmethod
 9 |     def key(cls):
10 |         return cls.__name__.lower()
11 | 
12 | 
13 | class Linear(ActivationFunction):
14 | 
15 |     @staticmethod
16 |     def f(x):
17 |         return x
18 | 
19 |     @staticmethod
20 |     def g_given_f(fx):
21 |         #return 1
22 |         return np.ones(fx.shape[0])
23 | 
24 |     # return np.ones((fx.shape[0], 1))
25 | 
26 | 
27 | class Sigmoid(ActivationFunction):
28 | 
29 |     @staticmethod
30 |     def f(x):
31 |         return 1.0 / (1 + np.exp(-x))
32 | 
33 |     @staticmethod
34 |     def g_given_f(fx):
35 |         return fx * (1.0 - fx)
36 | 
37 | 
38 | class Tanh(ActivationFunction):
39 | 
40 |     @staticmethod
41 |     def f(x):
42 |         return np.tanh(x)
43 | 
44 |     @staticmethod
45 |     def g_given_f(fx):
46 |         return 1 - fx ** 2
47 | 
48 | 
49 | class ReLU(ActivationFunction):
50 | 
51 |     @staticmethod
52 |     def f(x):
53 |         return np.maximum(0, x)
54 | 
55 |     @staticmethod
56 |     def g_given_f(fx):
57 |         return np.int_(fx > 0)
58 | 
59 | 
60 | class Softplus(ActivationFunction):
61 | 
62 |     @staticmethod
63 |     def f(x):
64 |         return np.log(1 + np.exp(x))
65 | 
66 |     @staticmethod
67 |     def g(x):
68 |         raise NotImplementedError()
69 | 
70 | 
71 | afuns = {}
72 | for cls in ActivationFunction.__subclasses__():
73 |     afuns[cls.key()] = cls
74 | 


--------------------------------------------------------------------------------
/python/sansa/ml/kbc/keras/base.py:
--------------------------------------------------------------------------------
  1 | import keras
  2 | from keras.engine.topology import Layer
  3 | from keras.models import Sequential
  4 | from keras.layers import Dense, Activation
  5 | from keras import backend as K
  6 | from keras.optimizers import Adagrad, SGD
  7 | import math
  8 | import theano.tensor as T
  9 | import numpy as np
 10 | from numpy.random import shuffle
 11 | from collections import defaultdict, Counter
 12 | from sansa.ml.kbc.keras.param import Parameter, AdaGrad
 13 | import timeit
 14 | import pickle
 15 | 
 16 | _cutoff = 30
 17 | 
 18 | _DEF_NBATCHES = 100
 19 | _DEF_POST_EPOCH = []
 20 | _DEF_LEARNING_RATE = 0.1
 21 | _DEF_SAMPLE_FUN = None
 22 | _DEF_MAX_EPOCHS = 1000
 23 | _DEF_MARGIN = 1.0
 24 | 
 25 | 
 26 | class Config(object):
 27 | 
 28 |     def __init__(self, model, trainer):
 29 |         self.model = model
 30 |         self.trainer = trainer
 31 | 
 32 |     def __getstate__(self):
 33 |         return {
 34 |             'model': self.model,
 35 |             'trainer': self.trainer
 36 |         }
 37 | 
 38 | 
 39 | class Model(object):
 40 |     """
 41 |     Base class for all Knowledge Graph models
 42 | 
 43 |     Implements basic setup routines for parameters and serialization methods
 44 | 
 45 |     Subclasses need to implement:
 46 |     - scores(self, ss, ps, os)
 47 |     - _gradients(self, xys) for StochasticTrainer
 48 |     - _pairwise_gradients(self, pxs, nxs) for PairwiseStochasticTrainer
 49 |     """
 50 | 
 51 |     def __init__(self, *args, **kwargs):
 52 |         #super(Model, self).__init__(*args, **)
 53 |         self.params = {}
 54 |         self.hyperparams = {}
 55 |         self.add_hyperparam('init', kwargs.pop('init', 'nunif'))
 56 | 
 57 |     def add_param(self, param_id, shape, post=None, value=None):
 58 |         if value is None:
 59 |             value = Parameter(shape, self.init, name=param_id, post=post)
 60 |         setattr(self, param_id, value)
 61 |         self.params[param_id] = value
 62 | 
 63 |     def add_hyperparam(self, param_id, value):
 64 |         setattr(self, param_id, value)
 65 |         self.hyperparams[param_id] = value
 66 | 
 67 |     def __getstate__(self):
 68 |         return {
 69 |             'hyperparams': self.hyperparams,
 70 |             'params': self.params
 71 |         }
 72 | 
 73 |     def __setstate__(self, st):
 74 |         self.params = {}
 75 |         self.hyperparams = {}
 76 |         for pid, p in st['params'].items():
 77 |             self.add_param(pid, None, None, value=p)
 78 |         for pid, p in st['hyperparams'].items():
 79 |             self.add_hyperparam(pid, p)
 80 | 
 81 |     def save(self, fname, protocol=pickle.HIGHEST_PROTOCOL):
 82 |         with open(fname, 'wb') as fout:
 83 |             pickle.dump(self, fout, protocol=protocol)
 84 | 
 85 |     @staticmethod
 86 |     def load(fname):
 87 |         with open(fname, 'rb') as fin:
 88 |             mdl = pickle.load(fin)
 89 |         return mdl
 90 | 
 91 | class KerasTrainer(object):
 92 |     """
 93 |     Keras model trainer
 94 |     """
 95 | 
 96 |     def __init__(self, *args, **kwargs):
 97 |         self.model = args[0]
 98 |         self.hyperparams = {}
 99 |         self.add_hyperparam('max_epochs', kwargs.pop('max_epochs', _DEF_MAX_EPOCHS))
100 |         self.add_hyperparam('nbatches', kwargs.pop('nbatches', _DEF_NBATCHES))
101 |         self.add_hyperparam('learning_rate', kwargs.pop('learning_rate', _DEF_LEARNING_RATE))
102 | 
103 |         self.post_epoch = kwargs.pop('post_epoch', _DEF_POST_EPOCH)
104 |         self.samplef = kwargs.pop('samplef', _DEF_SAMPLE_FUN)
105 | 
106 |     def __getstate__(self):
107 |         return self.hyperparams
108 | 
109 |     def __setstate__(self, st):
110 |         for pid, p in st['hyperparams']:
111 |             self.add_hyperparam(pid, p)
112 | 
113 |     def add_hyperparam(self, param_id, value):
114 |         setattr(self, param_id, value)
115 |         self.hyperparams[param_id] = value
116 | 
117 |     def fit(self, xs, ys):
118 |         self._optim(list(zip(xs, ys)))
119 | 
120 | 
121 |     def getModel(self):
122 |         # # Training stuff
123 |         # batch_placeholder = K.placeholder(shape=(3,), name="batch")
124 |         # label_placeholder = K.placeholder(shape=(1,), name="label")
125 | 
126 |         # Model stuff
127 |         # E = K.variable(self.model.E, name="entity_embeddings")
128 |         # R = K.variable(self.model.R, name="relation_embeddings")
129 |         model = Sequential()
130 |         model.add(HolographicLayerTest(self.model.E.shape[0], self.model.R.shape[0], self.model.E.shape[1], self.model.rparam))
131 |         # model.add(Dense(5, input_dim=(10,)))
132 |         # model.add(Activation('sigmoid'))
133 |         # adagrad = Adagrad(lr=0.001, epsilon=1e-07)
134 |         adagrad = SGD(lr=0.001, decay=1e-06, nesterov=True, momentum=0.5)
135 | 
136 |         def loss(y_true, y_pred):
137 |             print(y_pred)
138 |             return -K.sum(K.log(K.sigmoid(-y_true * y_pred)))
139 | 
140 |         print("Compiling new model")
141 |         model.compile(optimizer=adagrad, loss=loss)
142 |         return model
143 | 
144 |     def _pre_epoch(self):
145 |         self.loss = 0
146 | 
147 |     def _optim(self, xys):
148 |         idx = np.arange(len(xys))
149 |         # self.batch_size = np.ceil(len(xys) / self.nbatches)
150 | 
151 |         # batch_idx = np.arange(self.batch_size, len(xys), self.batch_size)
152 | 
153 |         model = self.getModel()
154 | 
155 |         for self.epoch in range(1, self.max_epochs + 1):
156 |             # shuffle training examples
157 |             self._pre_epoch()
158 |             shuffle(idx)
159 | 
160 |             # store epoch for callback
161 |             self.epoch_start = timeit.default_timer()
162 | 
163 |             # process mini-batches
164 |             # for batch in np.split(idx, batch_idx):
165 |             #     # select indices for current batch
166 |             #     bxys = [xys[z] for z in batch]
167 |             #     self._process_batch(bxys, model)
168 | 
169 |             self._process_batch(xys, model)
170 | 
171 |             # check callback function, if false return
172 |             for f in self.post_epoch:
173 |                 if not f(self):
174 |                     break
175 | 
176 |         #
177 |         # print (self.model.E.shape)
178 |         # print (self.model.R.shape)
179 | 
180 |     def _process_batch(self, xys, model):
181 |         # if enabled, sample additional examples
182 |         if self.samplef is not None:
183 |             xys += self.samplef(xys)
184 | 
185 |         if hasattr(self.model, '_prepare_batch_step'):
186 |             self.model._prepare_batch_step(xys)
187 | 
188 |         shuffle(xys)
189 |         # take step for batch
190 |         assert isinstance(model, keras.models.Model)
191 |         xs, ys = [np.array(i) for i in list(zip(*xys))]
192 |         # print(xs, ys)
193 |         # print(xs.shape, ys.shape)
194 | 
195 |         class LossHistory(keras.callbacks.Callback):
196 |             def on_train_begin(self, logs={}):
197 |                 self.loss = -1
198 | 
199 |             def on_batch_end(self, batch, logs={}):
200 |                 self.loss = logs.get('loss')
201 | 
202 |         history = LossHistory()
203 |         # print(Counter(ys))
204 |         # x = K.placeholder(shape=(1,3))
205 |         # func = K.function([x], model(x))
206 |         # for i, j in zip(xs, ys):
207 |         #     print(func([[i]]), j)
208 |         model.fit(xs, ys, batch_size=len(xs)/100, nb_epoch=100, callbacks=[history])
209 |         loss = history.loss
210 |         # loss = model.train_on_batch(xs, ys)
211 |         E, R = model.layers[0].get_weights()
212 |         # print (np.linalg.norm(self.model.E-E, 'fro'))
213 |         # print (np.linalg.norm(self.model.R-R, 'fro'))
214 |         self.model.E, self.model.R = E, R
215 | 
216 |         # print(loss)
217 | 
218 |         self.loss += loss
219 |         # print (acc)
220 | 
221 | class HolographicLayer(Layer):
222 |     def __init__(self, E, R, rparam, input_shape=(3,), **kwargs):
223 |         self.E = E
224 |         self.R = R
225 |         self.rparam = rparam
226 |         kwargs["input_shape"] = input_shape
227 |         super(HolographicLayer, self).__init__(**kwargs)
228 | 
229 |     def ccorr1d_sc(self, input, filters, image_shape=None, filter_shape=None,
230 |               border_mode='valid', subsample=(1,), filter_flip=True):
231 |         """
232 |         using conv2d with a single input channel
233 |         """
234 |     #     if border_mode not in ('valid', 0, (0,)):
235 |     #         raise RuntimeError("Unsupported border_mode for conv1d_sc: "
236 |     #                            "%s" % border_mode)
237 | 
238 |         if image_shape is None:
239 |             image_shape_sc = None
240 |         else:
241 |             # (b, c, i0) to (b, 1, c, i0)
242 |             image_shape_sc = (image_shape[0], 1, image_shape[1], image_shape[2])
243 | 
244 |         if filter_shape is None:
245 |             filter_shape_sc = None
246 |         else:
247 |             filter_shape_sc = (filter_shape[0], 1, filter_shape[1],
248 |                                filter_shape[2])
249 | 
250 |         input_sc = input.dimshuffle('x', 'x', 0, 'x')
251 |         # We need to flip the channels dimension because it will be convolved over.
252 |         filters_sc = filters.dimshuffle('x', 'x', 0, 'x')[:, :, ::-1, :]
253 | 
254 |         conved = T.nnet.conv2d(input_sc, filters_sc, image_shape_sc,
255 |                            filter_shape_sc, subsample=(1, subsample[0]),
256 |                            filter_flip=filter_flip, border_mode=border_mode).flatten()
257 |         return conved  # drop the unused dimension
258 | 
259 |     def build(self, input_shape):
260 |         self.trainable_weights = [self.E, self.R]
261 |         # from keras.regularizers import l2
262 |         # regularizer = l2(self.rparam)
263 |         # regularizer.set_param(K.concatenate([self.E, self.R], axis=0))
264 |         # self.regularizers.append(regularizer)
265 | 
266 |     def call(self, x, mask=None):
267 |         batch_placeholder = K.cast(x, 'int32')[0]
268 |         s, o, p = [batch_placeholder[i] for i in range(3)]
269 | 
270 |         s2v = K.gather(self.E, s)
271 |         o2v = K.gather(self.E, o)
272 |         r2v = K.gather(self.R, p)
273 | 
274 |         # print(K.shape(s2v).eval())
275 |         # print(self.E[[0]].shape.eval())
276 | 
277 |         def ccorr(a, b):
278 |             return self.ccorr1d_sc(a, b, border_mode='half')
279 | 
280 |         eta = K.dot(K.transpose(r2v), ccorr(s2v, o2v))
281 |         return eta
282 | 
283 |     def get_output_shape_for(self, input_shape):
284 |         return (input_shape[0], 1)
285 | 
286 | class HolographicLayerTest(Layer):
287 |     def __init__(self, E, R, d, rparam, input_shape=(3,), **kwargs):
288 |         bnd = math.sqrt(6) / math.sqrt(2*E)
289 |         from numpy.random import uniform
290 |         self.init = [K.variable(uniform(size=(E,d), low=-bnd, high=bnd), name="E"),
291 |                      K.variable(uniform(size=(R,d*d), low=-bnd, high=bnd), name="R")]
292 |         self.rparam = rparam
293 |         kwargs["input_shape"] = input_shape
294 |         super(HolographicLayerTest, self).__init__(**kwargs)
295 | 
296 |     def ccorr1d_sc(self, input, filters, image_shape=None, filter_shape=None,
297 |               border_mode='valid', subsample=(1,), filter_flip=True):
298 |         """
299 |         using conv2d with a single input channel
300 |         """
301 |     #     if border_mode not in ('valid', 0, (0,)):
302 |     #         raise RuntimeError("Unsupported border_mode for conv1d_sc: "
303 |     #                            "%s" % border_mode)
304 | 
305 |         if image_shape is None:
306 |             image_shape_sc = None
307 |         else:
308 |             # (b, c, i0) to (b, 1, c, i0)
309 |             image_shape_sc = (image_shape[0], 1, image_shape[1], image_shape[2])
310 | 
311 |         if filter_shape is None:
312 |             filter_shape_sc = None
313 |         else:
314 |             filter_shape_sc = (filter_shape[0], 1, filter_shape[1],
315 |                                filter_shape[2])
316 | 
317 |         input_sc = input.dimshuffle('x', 'x', 0, 'x')
318 |         # We need to flip the channels dimension because it will be convolved over.
319 |         filters_sc = filters.dimshuffle('x', 'x', 0, 'x')[:, :, ::-1, :]
320 | 
321 |         conved = T.nnet.conv2d(input_sc, filters_sc, image_shape_sc,
322 |                            filter_shape_sc, subsample=(1, subsample[0]),
323 |                            filter_flip=filter_flip, border_mode=border_mode).flatten()
324 |         return conved  # drop the unused dimension
325 | 
326 |     def build(self, input_shape):
327 |         self.E, self.R = self.init
328 |         self.trainable_weights = [self.E, self.R]
329 |         # from keras.regularizers import l2
330 |         # regularizer = l2(self.rparam)
331 |         # regularizer.set_param(self.E)
332 |         # self.regularizers.append(regularizer)
333 |         #
334 |         # regularizer = l2(self.rparam)
335 |         # regularizer.set_param(self.R)
336 |         # self.regularizers.append(regularizer)
337 | 
338 |     def call(self, x, mask=None):
339 |         batch_placeholder = K.cast(x, 'int32')[0]
340 |         s, o, p = [batch_placeholder[i] for i in range(3)]
341 | 
342 |         s2v = K.gather(self.E, s)
343 |         o2v = K.gather(self.E, o)
344 |         r2v = K.gather(self.R, p)
345 | 
346 |         def ccorr(a, b):
347 |             return T.outer(a,b).flatten()
348 |             # return self.ccorr1d_sc(a, b, border_mode='half')
349 |         eta = K.dot(r2v, ccorr(s2v, o2v))
350 | 
351 |         # func = K.function([s2v,o2v,r2v], K.gradients(K.sigmoid(eta), [s2v,o2v,r2v]))
352 |         # print(func([np.random.random(150),np.random.random(150),np.random.random(150)]))
353 | 
354 |         return eta
355 | 
356 |     def get_output_shape_for(self, input_shape):
357 |         return (input_shape[0], 1)
358 | 
359 | class TheanoGradTest(object):
360 |     def ccorr1d_sc(self, input, filters, image_shape=None, filter_shape=None,
361 |               border_mode='valid', subsample=(1,), filter_flip=True):
362 |         """
363 |         using conv2d with a single input channel
364 |         """
365 |     #     if border_mode not in ('valid', 0, (0,)):
366 |     #         raise RuntimeError("Unsupported border_mode for conv1d_sc: "
367 |     #                            "%s" % border_mode)
368 | 
369 |         if image_shape is None:
370 |             image_shape_sc = None
371 |         else:
372 |             # (b, c, i0) to (b, 1, c, i0)
373 |             image_shape_sc = (image_shape[0], 1, image_shape[1], image_shape[2])
374 | 
375 |         if filter_shape is None:
376 |             filter_shape_sc = None
377 |         else:
378 |             filter_shape_sc = (filter_shape[0], 1, filter_shape[1],
379 |                                filter_shape[2])
380 | 
381 |         input_sc = input.dimshuffle('x', 'x', 0, 'x')
382 |         # We need to flip the channels dimension because it will be convolved over.
383 |         filters_sc = filters.dimshuffle('x', 'x', 0, 'x')[:, :, ::-1, :]
384 | 
385 |         conved = T.nnet.conv2d(input_sc, filters_sc, image_shape_sc,
386 |                            filter_shape_sc, subsample=(1, subsample[0]),
387 |                            filter_flip=filter_flip, border_mode=border_mode).flatten()
388 |         return conved  # drop the unused dimension
389 | 
390 | 
391 |     def call(self):
392 |         E = K.variable(np.random.random((1000,100)), name="entity_embeddings")
393 |         R = K.variable(np.random.random((10,10000)), name="relation_embeddings")
394 |         x = K.placeholder(shape=(1,3), name="spo")
395 |         y = K.placeholder(ndim=0, name="y")
396 |         batch_placeholder = K.cast(x, 'int32')[0]
397 |         # print(batch_placeholder.eval())
398 |         s, o, p = [batch_placeholder[i] for i in range(3)]
399 | 
400 |         s2v = K.gather(E, s)
401 |         o2v = K.gather(E, o)
402 |         r2v = K.gather(R, p)
403 | 
404 |         def ccorr(a, b):
405 |             return T.outer(a,b).flatten()
406 |             # return T.arctan(s2v) + T.arctan(o2v)
407 |             # return (s2v.dimshuffle('x', 'x', 0, 'x') + o2v.dimshuffle('x', 'x', 0, 'x')).flatten()
408 |             # return T.nnet.conv2d(a.dimshuffle('x', 'x', 0, 'x'), b.dimshuffle('x', 'x', 0, 'x'), None,
409 |             #                None,
410 |             #                filter_flip=True, border_mode='half')
411 |             # return self.ccorr1d_sc(a, b, border_mode='half')
412 |         eta = K.dot(r2v, ccorr(s2v, o2v))
413 |         # py = 1/(1+K.exp(-eta))
414 |         # l = -K.log(py)
415 |         # from theano import pp, function, printing
416 |         # grad = T.grad(eta, E)
417 |         # print(pp(grad))
418 |         # func = function([x], grad)
419 |         func = K.function([x, y], K.gradients(eta, [s2v, o2v, r2v, E, R]))
420 | 
421 |         # for i in func.maker.fgraph.outputs:
422 |             # print(pp(i))
423 |         # print (T.grad(py, s2v))
424 |         print (func([[[1,2,3]], -1]))
425 | 
426 | class StochasticTrainer(object):
427 |     """
428 |     Stochastic gradient descent trainer with scalar loss function.
429 | 
430 |     Models need to implement
431 | 
432 |     _gradients(self, xys)
433 | 
434 |     to be trained with this class.
435 | 
436 |     """
437 | 
438 |     def __init__(self, *args, **kwargs):
439 |         self.model = args[0]
440 |         self.hyperparams = {}
441 |         self.add_hyperparam('max_epochs', kwargs.pop('max_epochs', _DEF_MAX_EPOCHS))
442 |         self.add_hyperparam('nbatches', kwargs.pop('nbatches', _DEF_NBATCHES))
443 |         self.add_hyperparam('learning_rate', kwargs.pop('learning_rate', _DEF_LEARNING_RATE))
444 | 
445 |         self.post_epoch = kwargs.pop('post_epoch', _DEF_POST_EPOCH)
446 |         self.samplef = kwargs.pop('samplef', _DEF_SAMPLE_FUN)
447 |         pu = kwargs.pop('param_update', AdaGrad)
448 |         self._updaters = {
449 |             key: pu(param, self.learning_rate)
450 |             for key, param in self.model.params.items()
451 |         }
452 | 
453 |     def __getstate__(self):
454 |         return self.hyperparams
455 | 
456 |     def __setstate__(self, st):
457 |         for pid, p in st['hyperparams']:
458 |             self.add_hyperparam(pid, p)
459 | 
460 |     def add_hyperparam(self, param_id, value):
461 |         setattr(self, param_id, value)
462 |         self.hyperparams[param_id] = value
463 | 
464 |     def fit(self, xs, ys):
465 |         self._optim(list(zip(xs, ys)))
466 | 
467 |     def _pre_epoch(self):
468 |         self.loss = 0
469 | 
470 |     def _optim(self, xys):
471 |         idx = np.arange(len(xys))
472 |         self.batch_size = np.ceil(len(xys) / self.nbatches)
473 |         batch_idx = np.arange(self.batch_size, len(xys), self.batch_size)
474 | 
475 |         for self.epoch in range(1, self.max_epochs + 1):
476 |             # shuffle training examples
477 |             self._pre_epoch()
478 |             shuffle(idx)
479 | 
480 |             # store epoch for callback
481 |             self.epoch_start = timeit.default_timer()
482 | 
483 |             # process mini-batches
484 |             for batch in np.split(idx, batch_idx):
485 |                 # select indices for current batch
486 |                 bxys = [xys[z] for z in batch]
487 |                 self._process_batch(bxys)
488 | 
489 |             # check callback function, if false return
490 |             for f in self.post_epoch:
491 |                 if not f(self):
492 |                     break
493 | 
494 |     def _process_batch(self, xys):
495 |         # if enabled, sample additional examples
496 |         if self.samplef is not None:
497 |             xys += self.samplef(xys)
498 | 
499 |         if hasattr(self.model, '_prepare_batch_step'):
500 |             self.model._prepare_batch_step(xys)
501 | 
502 |         # take step for batch
503 |         grads = self.model._gradients(xys)
504 |         self.loss += self.model.loss
505 |         self._batch_step(grads)
506 | 
507 |     def _batch_step(self, grads):
508 |         for paramID in self._updaters.keys():
509 |             self._updaters[paramID](*grads[paramID])
510 | 
511 | 
512 | class PairwiseStochasticTrainer(StochasticTrainer):
513 |     """
514 |     Stochastic gradient descent trainer with pairwise ranking loss functions.
515 | 
516 |     Models need to implement
517 | 
518 |     _pairwise_gradients(self, pxs, nxs)
519 | 
520 |     to be trained with this class.
521 | 
522 |     """
523 | 
524 | 
525 |     def __init__(self, *args, **kwargs):
526 |         super(PairwiseStochasticTrainer, self).__init__(*args, **kwargs)
527 |         self.model.add_hyperparam('margin', kwargs.pop('margin', _DEF_MARGIN))
528 | 
529 |     def fit(self, xs, ys):
530 |         if self.samplef is None:
531 |             pidx = np.where(np.array(ys) == 1)[0]
532 |             nidx = np.where(np.array(ys) != 1)[0]
533 |             pxs = [xs[i] for i in pidx]
534 |             self.nxs = [xs[i] for i in nidx]
535 |             self.pxs = int(len(self.nxs) / len(pxs)) * pxs
536 |             xys = list(range(min(len(pxs), len(self.nxs))))
537 |             self._optim(xys)
538 |         else:
539 |             self._optim(list(zip(xs, ys)))
540 | 
541 |     def _pre_epoch(self):
542 |         self.nviolations = 0
543 |         if self.samplef is None:
544 |             shuffle(self.pxs)
545 |             shuffle(self.nxs)
546 | 
547 |     def _process_batch(self, xys):
548 |         pxs = []
549 |         nxs = []
550 | 
551 |         for xy in xys:
552 |             if self.samplef is not None:
553 |                 for nx in self.samplef([xy]):
554 |                     pxs.append(xy)
555 |                     nxs.append(nx)
556 |             else:
557 |                 pxs.append((self.pxs[xy], 1))
558 |                 nxs.append((self.nxs[xy], 1))
559 | 
560 |         # take step for batch
561 |         if hasattr(self.model, '_prepare_batch_step'):
562 |             self.model._prepare_batch_step(pxs, nxs)
563 |         grads = self.model._pairwise_gradients(pxs, nxs)
564 | 
565 |         # update if examples violate margin
566 |         if grads is not None:
567 |             self.nviolations += self.model.nviolations
568 |             self._batch_step(grads)
569 | 
570 | 
571 | def sigmoid(fs):
572 |     # compute elementwise gradient for sigmoid
573 |     for i in range(len(fs)):
574 |         if fs[i] > _cutoff:
575 |             fs[i] = 1.0
576 |         elif fs[i] < -_cutoff:
577 |             fs[i] = 0.0
578 |         else:
579 |             fs[i] = 1.0 / (1 + np.exp(-fs[i]))
580 |     return fs[:, np.newaxis]
581 | 
582 | if __name__ =="__main__":
583 |     TheanoGradTest().call()
584 | 
585 | 


--------------------------------------------------------------------------------
/python/sansa/ml/kbc/keras/hole.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sansa.ml.kbc.keras.base import Model
 3 | from sansa.ml.kbc.keras.util import grad_sum_matrix, unzip_triples, ccorr, cconv
 4 | from sansa.ml.kbc.keras.param import normless1
 5 | from sansa.ml.kbc.keras import actfun as af
 6 | 
 7 | 
 8 | class HolE(Model):
 9 | 
10 |     def __init__(self, *args, **kwargs):
11 |         super(HolE, self).__init__(*args, **kwargs)
12 |         self.add_hyperparam('sz', args[0])
13 |         self.add_hyperparam('ncomp', args[1])
14 |         self.add_hyperparam('rparam', kwargs.pop('rparam', 0.0))
15 |         self.add_hyperparam('af', kwargs.pop('af', af.Sigmoid))
16 |         self.add_param('E', (self.sz[0], self.ncomp), post=normless1)
17 |         self.add_param('R', (self.sz[2], self.ncomp*self.ncomp))
18 | 
19 |     def _scores(self, ss, ps, os):
20 |         return np.sum(self.R[ps] * ccorr(self.E[ss], self.E[os]), axis=1)
21 | 
22 |     def _gradients(self, xys):
23 |         ss, ps, os, ys = unzip_triples(xys, with_ys=True)
24 | 
25 |         yscores = ys * self._scores(ss, ps, os)
26 |         self.loss = np.sum(np.logaddexp(0, -yscores))
27 |         #preds = af.Sigmoid.f(yscores)
28 |         fs = -(ys * af.Sigmoid.f(-yscores))[:, np.newaxis]
29 |         #self.loss -= np.sum(np.log(preds))
30 | 
31 |         ridx, Sm, n = grad_sum_matrix(ps)
32 |         gr = Sm.dot(fs * ccorr(self.E[ss], self.E[os])) / n
33 |         gr += self.rparam * self.R[ridx]
34 | 
35 |         eidx, Sm, n = grad_sum_matrix(list(ss) + list(os))
36 |         ge = Sm.dot(np.vstack((
37 |             fs * ccorr(self.R[ps], self.E[os]),
38 |             fs * cconv(self.E[ss], self.R[ps])
39 |         ))) / n
40 |         ge += self.rparam * self.E[eidx]
41 | 
42 |         return {'E': (ge, eidx), 'R':(gr, ridx)}
43 | 
44 |     def _pairwise_gradients(self, pxs, nxs):
45 |         # indices of positive examples
46 |         sp, pp, op = unzip_triples(pxs)
47 |         # indices of negative examples
48 |         sn, pn, on = unzip_triples(nxs)
49 | 
50 |         pscores = self.af.f(self._scores(sp, pp, op))
51 |         nscores = self.af.f(self._scores(sn, pn, on))
52 | 
53 |         #print("avg = %f/%f, min = %f/%f, max = %f/%f" % (pscores.mean(), nscores.mean(), pscores.min(), nscores.min(), pscores.max(), nscores.max()))
54 | 
55 |         # find examples that violate margin
56 |         ind = np.where(nscores + self.margin > pscores)[0]
57 |         self.nviolations = len(ind)
58 |         if len(ind) == 0:
59 |             return
60 | 
61 |         # aux vars
62 |         sp, sn = list(sp[ind]), list(sn[ind])
63 |         op, on = list(op[ind]), list(on[ind])
64 |         pp, pn = list(pp[ind]), list(pn[ind])
65 |         gpscores = -self.af.g_given_f(pscores[ind])[:, np.newaxis]
66 |         gnscores = self.af.g_given_f(nscores[ind])[:, np.newaxis]
67 | 
68 |         # object role gradients
69 |         ridx, Sm, n = grad_sum_matrix(pp + pn)
70 |         grp = gpscores * ccorr(self.E[sp], self.E[op])
71 |         grn = gnscores * ccorr(self.E[sn], self.E[on])
72 |         #gr = (Sm.dot(np.vstack((grp, grn))) + self.rparam * self.R[ridx]) / n
73 |         gr = Sm.dot(np.vstack((grp, grn))) / n
74 |         gr += self.rparam * self.R[ridx]
75 | 
76 |         # filler gradients
77 |         eidx, Sm, n = grad_sum_matrix(sp + sn + op + on)
78 |         geip = gpscores * ccorr(self.R[pp], self.E[op])
79 |         gein = gnscores * ccorr(self.R[pn], self.E[on])
80 |         gejp = gpscores * cconv(self.E[sp], self.R[pp])
81 |         gejn = gnscores * cconv(self.E[sn], self.R[pn])
82 |         ge = Sm.dot(np.vstack((geip, gein, gejp, gejn))) / n
83 |         #ge += self.rparam * self.E[eidx]
84 | 
85 |         return {'E': (ge, eidx), 'R':(gr, ridx)}
86 | 


--------------------------------------------------------------------------------
/python/sansa/ml/kbc/keras/holek.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | from random import shuffle
  3 | from keras.models import Model
  4 | from keras.layers import Input, Activation, Dense, Reshape
  5 | from keras.models import Sequential
  6 | import keras
  7 | from keras.layers import Layer
  8 | import math
  9 | import numpy as np
 10 | import keras.backend as K
 11 | from keras.optimizers import Adagrad
 12 | from theano import tensor as T
 13 | from sansa.ml.kbc.keras import sample
 14 | 
 15 | __author__ = 'nilesh'
 16 | 
 17 | class KerasHole(object):
 18 |     def __init__(self, numEntities, numRelations, ndim, rparam):
 19 |         self.numEntities = numEntities
 20 |         self.numRelations = numRelations
 21 |         self.ndim = ndim
 22 |         self.rparam = rparam
 23 | 
 24 |     def buildModel(self):
 25 |         inputs = Input(shape=(2,3))
 26 |         score = HolographicLayer2(self.numEntities, self.numRelations, self.ndim, self.rparam)(inputs)
 27 |         # score = Reshape((1,))(score)
 28 |         # score = Activation("sigmoid")(score)
 29 |         model = Model(input=inputs, output=score)
 30 |         adagrad = Adagrad(lr=0.001, epsilon=1e-06)
 31 | 
 32 |         def max_margin(y_true, y_pred):
 33 |             return T.sum(T.maximum(0., 1. + y_pred[1] + y_pred[0]))
 34 | 
 35 |         def loss(y_true, y_pred):
 36 |             # print(y_pred)
 37 |             return K.sum(K.log(1. + K.exp(-y_true * y_pred)))
 38 | 
 39 |         model.compile(optimizer='rmsprop', loss='binary_crossentropy')
 40 |         # Or try setting model's output=prediction and loss='binary_crossentropy' - essentially same thing as above
 41 |         return model
 42 | 
 43 |     def fit2(self, xs, ys):
 44 |         sampler = sample.RandomModeSampler(1, [0, 1], xs, (self.numEntities, self.numEntities, self.numRelations))
 45 |         xys = list(zip(xs, ys))
 46 |         xyns = sampler.sample(xys)
 47 |         shuffle(xys)
 48 |         shuffle(xyns)
 49 |         xs, ys = [np.array(i) for i in list(zip(*xys))]
 50 |         xns, yns = [np.array(i) for i in list(zip(*xyns))]
 51 |         # print(xs[:100], ys[:100])
 52 |         xpairs = [np.array(i) for i in list(zip(xs, xns))]
 53 |         ypairs = [np.array(i) for i in list(zip(ys, yns))]
 54 | 
 55 |         print (xpairs[0].shape)
 56 | 
 57 |         model = self.buildModel()
 58 |         # x = K.placeholder((3,))
 59 |         # func = K.function([x], model(x))
 60 |         # for x in xs:
 61 |         #     print(func([x]))
 62 |         best_weights_filepath = './best_weights.hdf5'
 63 |         # earlyStopping= keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')
 64 |         saveBestModel = keras.callbacks.ModelCheckpoint(best_weights_filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
 65 | 
 66 |         # train model
 67 |         history = model.fit(xpairs, ypairs, batch_size=len(xs)/1000, validation_split=0.1, nb_epoch=100,
 68 |                             callbacks=[saveBestModel])
 69 | 
 70 |         #reload best weights
 71 |         model.load_weights(best_weights_filepath)
 72 | 
 73 |         self.model = self
 74 |         self.E, self.R = model.layers[1].get_weights()
 75 | 
 76 |     def fit(self, xs, ys):
 77 |         sampler = sample.RandomModeSampler(1, [0, 1], xs, (self.numEntities, self.numEntities, self.numRelations))
 78 |         xys = list(zip(xs, ys))
 79 |         print(len(xys))
 80 |         xys += sampler.sample(xys)
 81 |         print(len(xys))
 82 |         shuffle(xys)
 83 |         xs, ys = [np.array(i) for i in list(zip(*xys))]
 84 |         # print(xs[:100], ys[:100])
 85 | 
 86 |         model = self.buildModel()
 87 |         # x = K.placeholder((3,))
 88 |         # func = K.function([x], model(x))
 89 |         # for x in xs:
 90 |         #     print(func([x]))
 91 |         best_weights_filepath = './best_weights.hdf5'
 92 |         # earlyStopping= keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')
 93 |         saveBestModel = keras.callbacks.ModelCheckpoint(best_weights_filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
 94 | 
 95 |         # train model
 96 |         history = model.fit(xs, ys, batch_size=len(xs)/1000, validation_split=0.05, nb_epoch=100,
 97 |                             callbacks=[saveBestModel])
 98 | 
 99 |         #reload best weights
100 |         model.load_weights(best_weights_filepath)
101 | 
102 |         self.model = self
103 |         self.E, self.R = model.layers[1].get_weights()
104 | 
105 | 
106 | class HolographicLayer(Layer):
107 |     def __init__(self, E, R, d, rparam, input_shape=(3,), **kwargs):
108 |         from keras.initializations import glorot_normal
109 |         self.init = [glorot_normal(shape=(E,d), name="E"), glorot_normal(shape=(R,d,d), name="R")]
110 |         self.rparam = rparam
111 |         kwargs["input_shape"] = input_shape
112 |         super(HolographicLayer, self).__init__(**kwargs)
113 | 
114 | 
115 |     def build(self, input_shape):
116 |         self.E, self.R = self.init
117 |         self.trainable_weights = [self.E, self.R]
118 |         from keras.regularizers import l2
119 |         # regularizer = l2(self.rparam)
120 |         # regularizer.set_param(self.E)
121 |         # self.regularizers.append(regularizer)
122 |         #
123 |         # regularizer = l2(self.rparam)
124 |         # regularizer.set_param(self.R)
125 |         # self.regularizers.append(regularizer)
126 | 
127 |     def call(self, x, mask=None):
128 |         batch_placeholder = K.cast(x, 'int32')[0]
129 |         s, o, p = [batch_placeholder[i] for i in range(3)]
130 | 
131 |         s2v = K.gather(self.E, s)
132 |         o2v = K.gather(self.E, o)
133 |         r2v = K.gather(self.R, p)
134 | 
135 |         def ccorr(a, b):
136 |             # Return tensor product - basically bilinear/RESCAL models
137 |             return T.outer(a,b).flatten()
138 | 
139 |             # Or cross-correlation op?
140 |             # return T.nnet.conv2d(a.dimshuffle('x', 'x', 0, 'x'), b.dimshuffle('x', 'x', 0, 'x'), None,
141 |             #                None,
142 |             #                filter_flip=True, border_mode='half').flatten()[:-1]
143 |             # return self.ccorr1d_sc(a, b, border_mode='half')
144 |         # eta = K.dot(r2v, ccorr(s2v, o2v))
145 |         eta = K.dot(K.dot(s2v, r2v), o2v)
146 | 
147 |         # func = K.function([s2v,o2v,r2v], K.gradients(K.sigmoid(eta), [s2v,o2v,r2v]))
148 |         # print(func([np.random.random(150),np.random.random(150),np.random.random(150)]))
149 | 
150 |         return eta
151 | 
152 |     def get_output_shape_for(self, input_shape):
153 |         return (input_shape[0], 1)
154 | 
155 | class HolographicLayer2(Layer):
156 |     def __init__(self, E, R, d, rparam, input_shape=(2,3), **kwargs):
157 |         from keras.initializations import glorot_normal
158 |         self.init = [glorot_normal(shape=(E,d), name="E"), glorot_normal(shape=(R,d*d), name="R")]
159 |         self.rparam = rparam
160 |         kwargs["input_shape"] = input_shape
161 |         super(HolographicLayer2, self).__init__(**kwargs)
162 | 
163 | 
164 |     def build(self, input_shape):
165 |         self.E, self.R = self.init
166 |         self.trainable_weights = [self.E, self.R]
167 |         from keras.regularizers import l2
168 |         regularizer = l2(self.rparam)
169 |         regularizer.set_param(self.E)
170 |         self.regularizers.append(regularizer)
171 | 
172 |         regularizer = l2(self.rparam)
173 |         regularizer.set_param(self.R)
174 |         self.regularizers.append(regularizer)
175 | 
176 |     def call(self, x, mask=None):
177 |         pos = K.cast(x, 'int32')[0][0]
178 |         neg = K.cast(x, 'int32')[0][1]
179 | 
180 |         def eta(s, o, p):
181 |             s2v = K.gather(self.E, s)
182 |             o2v = K.gather(self.E, o)
183 |             r2v = K.gather(self.R, p)
184 | 
185 |             def ccorr(a, b):
186 |                 # Return tensor product - basically bilinear/RESCAL models
187 |                 return T.outer(a,b).flatten()
188 | 
189 |                 # Or cross-correlation op?
190 |                 # return T.nnet.conv2d(a.dimshuffle('x', 'x', 0, 'x'), b.dimshuffle('x', 'x', 0, 'x'), None,
191 |                 #                None,
192 |                 #                filter_flip=True, border_mode='half').flatten()[:-1]
193 |                 # return self.ccorr1d_sc(a, b, border_mode='half')
194 |             eta = K.dot(r2v, ccorr(s2v, o2v))
195 | 
196 |             return eta
197 | 
198 | 
199 |         pos_eta = eta(*[pos[i] for i in range(3)])
200 |         neg_eta = eta(*[neg[i] for i in range(3)])
201 |         return K.variable(np.array([pos_eta, neg_eta]))
202 | 
203 |     def get_output_shape_for(self, input_shape):
204 |         return (input_shape[0], 2)
205 | 
206 | if __name__ == "__main__":
207 |     wnbin = "/Users/nilesh/python/holographic-embeddings/data/wn18.bin"
208 |     with open(wnbin, 'rb') as fin:
209 |             data = pickle.load(fin)
210 | 
211 |     N = len(data['entities'])
212 |     M = len(data['relations'])
213 | 
214 |     xs = data['train_subs']
215 |     ys = np.ones(len(xs))
216 | 
217 |     trainer = KerasHole(N, M, 10, 0.01)
218 |     trainer.fit2(xs, ys)


--------------------------------------------------------------------------------
/python/sansa/ml/kbc/keras/param.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | from numpy import sqrt, squeeze, zeros_like
  4 | from numpy.random import randn, uniform
  5 | 
  6 | 
  7 | def init_unif(sz):
  8 |         """
  9 |         Uniform intialization
 10 | 
 11 |         Heuristic commonly used to initialize deep neural networks
 12 |         """
 13 |         bnd = 1 / sqrt(sz[0])
 14 |         p = uniform(low=-bnd, high=bnd, size=sz)
 15 |         return squeeze(p)
 16 | 
 17 | 
 18 | def init_nunif(sz):
 19 |         """
 20 |         Normalized uniform initialization
 21 | 
 22 |         See Glorot X., Bengio Y.: "Understanding the difficulty of training
 23 |         deep feedforward neural networks". AISTATS, 2010
 24 |         """
 25 |         bnd = sqrt(6) / sqrt(sz[0] + sz[1])
 26 |         p = uniform(low=-bnd, high=bnd, size=sz)
 27 |         return squeeze(p)
 28 | 
 29 | 
 30 | def init_randn(sz):
 31 |         return squeeze(randn(*sz))
 32 | 
 33 | 
 34 | class Parameter(np.ndarray):
 35 | 
 36 |     def __new__(cls, *args, **kwargs):
 37 |         # TODO: hackish, find better way to handle higher-order parameters
 38 |         if len(args[0]) == 3:
 39 |                 sz = (args[0][1], args[0][2])
 40 |                 arr = np.array([Parameter._init_array(sz, args[1]) for _ in range(args[0][0])])
 41 |         else:
 42 |                 arr = Parameter._init_array(args[0], args[1])
 43 |         arr = arr.view(cls)
 44 |         arr.name = kwargs.pop('name', None)
 45 |         arr.post = kwargs.pop('post', None)
 46 | 
 47 |         if arr.post is not None:
 48 |             arr = arr.post(arr)
 49 | 
 50 |         return arr
 51 | 
 52 |     def __array_finalize__(self, obj):
 53 |         if obj is None:
 54 |             return
 55 |         self.name = getattr(obj, 'name', None)
 56 |         self.post = getattr(obj, 'post', None)
 57 | 
 58 |     @staticmethod
 59 |     def _init_array(shape, method):
 60 |         mod = sys.modules[__name__]
 61 |         method = 'init_%s' % method
 62 |         if not hasattr(mod, method):
 63 |             raise ValueError('Unknown initialization (%s)' % method)
 64 |         elif len(shape) != 2:
 65 |             raise ValueError('Shape must be of size 2')
 66 |         return getattr(mod, method)(shape)
 67 | 
 68 | 
 69 | class ParameterUpdate(object):
 70 | 
 71 |     def __init__(self, param, learning_rate):
 72 |         self.param = param
 73 |         self.learning_rate = learning_rate
 74 | 
 75 |     def __call__(self, gradient, idx=None):
 76 |         self._update(gradient, idx)
 77 |         if self.param.post is not None:
 78 |             self.param = self.param.post(self.param, idx)
 79 | 
 80 |     def reset(self):
 81 |         pass
 82 | 
 83 | 
 84 | class SGD(ParameterUpdate):
 85 |     """
 86 |     Class to perform SGD updates on a parameter
 87 |     """
 88 | 
 89 |     def _update(self, g, idx):
 90 |         self.param[idx] -= self.learning_rate * g
 91 | 
 92 | 
 93 | class AdaGrad(ParameterUpdate):
 94 | 
 95 |     def __init__(self, param, learning_rate):
 96 |         super(AdaGrad, self).__init__(param, learning_rate)
 97 |         self.p2 = zeros_like(param)
 98 | 
 99 |     def _update(self, g, idx=None):
100 |         self.p2[idx] += g * g
101 |         H = np.maximum(np.sqrt(self.p2[idx]), 1e-7)
102 |         self.param[idx] -= self.learning_rate * g / H
103 | 
104 |     def reset(self):
105 |         self.p2 = zeros_like(self.p2)
106 | 
107 | 
108 | def normalize(M, idx=None):
109 |     if idx is None:
110 |         M = M / np.sqrt(np.sum(M ** 2, axis=1))[:, np.newaxis]
111 |     else:
112 |         nrm = np.sqrt(np.sum(M[idx, :] ** 2, axis=1))[:, np.newaxis]
113 |         M[idx, :] = M[idx, :] / nrm
114 |     return M
115 | 
116 | 
117 | def normless1(M, idx=None):
118 |     nrm = np.sum(M[idx] ** 2, axis=1)[:, np.newaxis]
119 |     nrm[nrm < 1] = 1
120 |     M[idx] = M[idx] / nrm
121 |     return M
122 | 


--------------------------------------------------------------------------------
/python/sansa/ml/kbc/keras/sample.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Sampling strategies to generate negative examples from knowledge graphs
  3 | with an open-world assumption
  4 | """
  5 | 
  6 | from copy import deepcopy
  7 | from collections import defaultdict as ddict
  8 | from numpy.random import randint
  9 | 
 10 | 
 11 | class Sampler(object):
 12 | 
 13 |     def __init__(self, n, modes, ntries=100):
 14 |         self.n = n
 15 |         self.modes = modes
 16 |         self.ntries = ntries
 17 | 
 18 |     def sample(self, xys):
 19 |         res = []
 20 |         for x, _ in xys:
 21 |             for _ in range(self.n):
 22 |                 for mode in self.modes:
 23 |                     t = self._sample(x, mode)
 24 |                     if t is not None:
 25 |                         res.append(t)
 26 |         return res
 27 | 
 28 | 
 29 | class RandomModeSampler(Sampler):
 30 |     """
 31 |     Sample negative triples randomly
 32 |     """
 33 | 
 34 |     def __init__(self, n, modes, xs, sz):
 35 |         super(RandomModeSampler, self).__init__(n, modes)
 36 |         self.xs = set(xs)
 37 |         self.sz = sz
 38 | 
 39 |     def _sample(self, x, mode):
 40 |         nex = list(x)
 41 |         res = None
 42 |         for _ in range(self.ntries):
 43 |             nex[mode] = randint(self.sz[mode])
 44 |             if tuple(nex) not in self.xs:
 45 |                 res = (tuple(nex), -1.0)
 46 |                 break
 47 |         return res
 48 | 
 49 | 
 50 | class RandomSampler(Sampler):
 51 | 
 52 |     def __init__(self, n, xs, sz):
 53 |         super(RandomSampler, self).__init__(n)
 54 |         self.xs = set(xs)
 55 |         self.sz = sz
 56 | 
 57 |     def _sample(self, x, mode):
 58 |         res = None
 59 |         for _ in range(self.ntries):
 60 |             nex = (randint(self.sz[0]),
 61 |                    randint(self.sz[0]),
 62 |                    randint(self.sz[1]))
 63 |             if nex not in self.xs:
 64 |                 res = (nex, -1.0)
 65 |                 break
 66 |         return res
 67 | 
 68 | 
 69 | class CorruptedSampler(Sampler):
 70 | 
 71 |     def __init__(self, n, xs, type_index):
 72 |         super(CorruptedSampler, self).__init__(n)
 73 |         self.xs = set(xs)
 74 |         self.type_index = type_index
 75 | 
 76 |     def _sample(self, x, mode):
 77 |         nex = list(deepcopy(x))
 78 |         res = None
 79 |         for _ in range(self.ntries):
 80 |             if mode == 2:
 81 |                 nex[2] = randint(len(self.type_index))
 82 |             else:
 83 |                 k = x[2]
 84 |                 n = len(self.type_index[k][mode])
 85 |                 nex[mode] = self.type_index[k][mode][randint(n)]
 86 |             if tuple(nex) not in self.xs:
 87 |                 res = (tuple(nex), -1.0)
 88 |                 break
 89 |         return res
 90 | 
 91 | 
 92 | class LCWASampler(RandomModeSampler):
 93 |     """
 94 |     Sample negative examples according to the local closed world assumption
 95 |     """
 96 | 
 97 |     def __init__(self, n, modes, xs, sz):
 98 |         super(LCWASampler, self).__init__(n, modes, xs, sz)
 99 |         self.counts = ddict(int)
100 |         for s, o, p in xs:
101 |             self.counts[(s, p)] += 1
102 | 
103 |     def _sample(self, x, mode):
104 |         nex = list(deepcopy(x))
105 |         res = None
106 |         for _ in range(self.ntries):
107 |             nex[mode] = randint(self.sz[mode])
108 |             if self.counts[(nex[0], nex[2])] > 0 and tuple(nex) not in self.xs:
109 |                 res = (tuple(nex), -1.0)
110 |                 break
111 |         return res
112 | 
113 | 
114 | def type_index(xs):
115 |     index = ddict(lambda: {0: set(), 1: set()})
116 |     for i, j, k in xs:
117 |         index[k][0].add(i)
118 |         index[k][1].add(j)
119 |     #for p, idx in index.items():
120 |     #    print(p, len(idx[0]), len(idx[1]))
121 |     return {k: {0: list(v[0]), 1: list(v[1])} for k, v in index.items()}
122 | 


--------------------------------------------------------------------------------
/python/sansa/ml/kbc/keras/util.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy.fft import fft, ifft
  3 | import scipy.sparse as sp
  4 | import functools
  5 | import collections
  6 | 
  7 | 
  8 | def cconv(a, b):
  9 |     """
 10 |     Circular convolution of vectors
 11 | 
 12 |     Computes the circular convolution of two vectors a and b via their
 13 |     fast fourier transforms
 14 | 
 15 |     a \ast b = \mathcal{F}^{-1}(\mathcal{F}(a) \odot \mathcal{F}(b))
 16 | 
 17 |     Parameter
 18 |     ---------
 19 |     a: real valued array (shape N)
 20 |     b: real valued array (shape N)
 21 | 
 22 |     Returns
 23 |     -------
 24 |     c: real valued array (shape N), representing the circular
 25 |        convolution of a and b
 26 |     """
 27 |     return ifft(fft(a) * fft(b)).real
 28 | 
 29 | 
 30 | def ccorr(a, b):
 31 |     """
 32 |     Circular correlation of vectors
 33 | 
 34 |     Computes the circular correlation of two vectors a and b via their
 35 |     fast fourier transforms
 36 | 
 37 |     a \ast b = \mathcal{F}^{-1}(\overline{\mathcal{F}(a)} \odot \mathcal{F}(b))
 38 | 
 39 |     Parameter
 40 |     ---------
 41 |     a: real valued array (shape N)
 42 |     b: real valued array (shape N)
 43 | 
 44 |     Returns
 45 |     -------
 46 |     c: real valued array (shape N), representing the circular
 47 |        correlation of a and b
 48 |     """
 49 | 
 50 |     return ifft(np.conj(fft(a)) * fft(b)).real
 51 | 
 52 | 
 53 | def grad_sum_matrix(idx):
 54 |     uidx, iinv = np.unique(idx, return_inverse=True)
 55 |     sz = len(iinv)
 56 |     M = sp.coo_matrix((np.ones(sz), (iinv, np.arange(sz)))).tocsr()
 57 |     # normalize summation matrix so that each row sums to one
 58 |     n = np.array(M.sum(axis=1))
 59 |     #M = M.T.dot(np.diag(n))
 60 |     return uidx, M, n
 61 | 
 62 | 
 63 | def unzip_triples(xys, with_ys=False):
 64 |     xs, ys = list(zip(*xys))
 65 |     ss, os, ps = list(zip(*xs))
 66 |     if with_ys:
 67 |         return np.array(ss), np.array(ps), np.array(os), np.array(ys)
 68 |     else:
 69 |         return np.array(ss), np.array(ps), np.array(os)
 70 | 
 71 | 
 72 | def to_tensor(xs, ys, sz):
 73 |     T = [sp.lil_matrix((sz[0], sz[1])) for _ in range(sz[2])]
 74 |     for i in range(len(xs)):
 75 |         i, j, k = xs[i]
 76 |         T[k][i, j] = ys[i]
 77 |     return T
 78 | 
 79 | 
 80 | def init_nvecs(xs, ys, sz, rank, with_T=False):
 81 |     from scipy.sparse.linalg import eigsh
 82 | 
 83 |     T = to_tensor(xs, ys, sz)
 84 |     T = [Tk.tocsr() for Tk in T]
 85 |     S = sum([T[k] + T[k].T for k in range(len(T))])
 86 |     _, E = eigsh(sp.csr_matrix(S), rank)
 87 |     if not with_T:
 88 |         return E
 89 |     else:
 90 |         return E, T
 91 | 
 92 | 
 93 | class memoized(object):
 94 |     '''
 95 |     Decorator. Caches a function's return value each time it is called.
 96 |     If called later with the same arguments, the cached value is returned
 97 |     (not reevaluated).
 98 | 
 99 |     see https://wiki.python.org/moin/PythonDecoratorLibrary#Memoize
100 |     '''
101 | 
102 |     def __init__(self, func):
103 |         self.func = func
104 |         self.cache = {}
105 | 
106 |     def __call__(self, *args):
107 |         if not isinstance(args, collections.Hashable):
108 |             # uncachable, return direct function application
109 |             return self.func(*args)
110 |         if args in self.cache:
111 |             return self.cache[args]
112 |         else:
113 |             val = self.func(*args)
114 |             self.cache[args] = val
115 |             return val
116 | 
117 |     def __repr__(self):
118 |         '''return function's docstring'''
119 |         return self.func.__doc__
120 | 
121 |     def __get__(self, obj, objtype):
122 |         '''support instance methods'''
123 |         return functools.partial(self.__call__, obj)
124 | 


--------------------------------------------------------------------------------
/python/sansa/ml/kbc/rdfio.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import rdflib
 4 | from rdflib.term import URIRef
 5 | import math
 6 | from pyspark import SQLContext, SparkContext, RDD
 7 | from pyspark.rdd import Partitioner
 8 | 
 9 | __author__ = 'nilesh'
10 | 
11 | 
12 | class ThreeWayTensorPartitioner(Partitioner):
13 |     def __init__(self, dimensions: tuple, blockSizes: tuple):
14 |         self.dims = dimensions
15 |         self.partitionSizes = blockSizes
16 |         self.numPartitions = [int(math.ceil(self.dims[i] * 1.0 / self.partitionSizes[i])) for i in range(3)]
17 |         self.totalPartitions = reduce(lambda x, y: x*y, self.numPartitions)
18 | 
19 |     def __eq__(self, other):
20 |         return (isinstance(other, ThreeWayTensorPartitioner)
21 |                 and self.dims == other.dims
22 |                 and self.partitionSizes == other.partitionSizes)
23 | 
24 |     def __call__(self, k):
25 |         return self.partitionFunc(k)
26 | 
27 |     def partitionFunc(self, key):
28 |         for i in range(len(self.dims)):
29 |             assert(0 <= key[i] <= self.dims[i])
30 | 
31 |         i, j, k = key
32 |         ps1, ps2, ps3 = self.partitionSizes
33 |         pn1, pn2, pn3 = self.numPartitions
34 | 
35 |         return i / ps1 + j / ps2 * pn1 + k / ps3 * pn2 * pn1
36 | 
37 | 
38 | class RDFReader(object):
39 |     def __init__(self, sc: SparkContext):
40 |         self.sc = SQLContext(sc)
41 | 
42 |     def tripleRDD(self, file) -> RDD:
43 |         def parseNTriples(lines):
44 |             g = rdflib.Graph()
45 |             g.parse(data="\n".join(lines), format="nt")
46 |             allURIs = lambda statement: False not in [isinstance(term, URIRef) for term in statement]
47 |             return [statement for statement in g if not allURIs(statement)]
48 | 
49 |         triples = self.sc.read.text(file).map(lambda x: x.value).mapPartitions(parseNTriples)
50 |         return triples
51 | 
52 |     def tripleTensor(self, file, blockSizes: tuple):
53 |         spo = self.tripleRDD(file)
54 |         # Already filtered by URIs, no need to check types a la pattern matching
55 |         entityIDs = spo.flatMap(lambda x: [x[0], x[2]]).distinct().zipWithUniqueId() # (eURI, eID)
56 |         numEntities = entityIDs.countByKey()
57 |         relationIDs = spo.map(lambda x: x[1]).distinct().zipWithUniqueId() # (rURI, rID)
58 |         numRelations = relationIDs.countByKey()
59 | 
60 |         s_po = spo.map(lambda x: (x[0], (x[1], x[2])))
61 | 
62 |         def mapSubjectIDs(s__po_sid):
63 |             (s, ((p, o), sid)) = s__po_sid
64 |             return o, (sid, p)
65 | 
66 |         o__sid_p = s_po.join(entityIDs).map(mapSubjectIDs)
67 |         p__oid_sid = o__sid_p.join(entityIDs).map(mapSubjectIDs)
68 |         sid__pid_oid = p__oid_sid.join(relationIDs).map(mapSubjectIDs)
69 | 
70 |         spoMapped = sid__pid_oid.map(lambda x: (x[0], x[1][0], x[1][1]))
71 |         assert isinstance(spoMapped, RDD)
72 | 
73 |         d1, d2, d3 = blockSizes
74 | 
75 |         def blockify(s, o, p):
76 |             blockD1Index = int(s / d1)
77 | 
78 |         spoMapped.groupByKey().mapPartitions()
79 | 
80 | 
81 |         return sid__pid_oid
82 | 
83 | 
84 | 
85 | 
86 | os.environ['SPARK_HOME'] = "/Users/nilesh/IdeaProjects/spark-1.6.2-bin-hadoop2.6"
87 | os.environ['PYSPARK_PYTHON'] = "python3"
88 | os.environ['PYSPARK_DRIVER_PYTHON'] = "python3"
89 | reader = RDFReader(SparkContext(master="local[4]", appName="test", sparkHome="/Users/nilesh/IdeaProjects/spark-1.6.2-bin-hadoop2.6"))
90 | print(reader.tripleTensor("/Users/nilesh/IdeaProjects/elinker3/small-dataset.nt", 1).collect())


--------------------------------------------------------------------------------
/src/main/scala/net/sansa_stack/ml/kge/Functions.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.ml.kge
 2 | 
 3 | import ml.dmlc.mxnet._
 4 | import ml.dmlc.mxnet.{Symbol => s}
 5 | 
 6 | /**
 7 |   * Created by nilesh on 31/05/2017.
 8 |   */
 9 | object MaxMarginLoss {
10 |   def apply(margin: Float): (Symbol, Symbol) => Symbol = {
11 |     loss(margin) _
12 |   }
13 | 
14 |   def loss(margin: Float)(positiveScore: Symbol, negativeScore: Symbol): Symbol = {
15 |     var loss = s.max(negativeScore - positiveScore + margin, 0)
16 |     loss = s.sum(name = "sum")()(Map("data" -> loss))
17 |     s.make_loss(name = "loss")()(Map("data" -> loss))
18 |   }
19 | }
20 | 
21 | object Sigmoid {
22 |   def apply(x: Symbol): Symbol = {
23 |     s.Activation(name = "sigmoid")()(Map("data" -> x, "act_type" -> "sigmoid"))
24 |   }
25 | }
26 | 
27 | object Tanh {
28 |   def apply(x: Symbol): Symbol = {
29 |     s.Activation(name = "tanh")()(Map("data" -> x, "act_type" -> "tanh"))
30 |   }
31 | }
32 | 
33 | object L2Similarity {
34 |   def apply(x: Symbol, y: Symbol): Symbol = {
35 |     val difference = x - y
36 |     var score = s.square()()(Map("data" -> difference))
37 |     score = s.sum()()(Map("data" -> score, "axis" -> 0))
38 |     score*(-1.0)
39 |   }
40 | }
41 | 
42 | object DotSimilarity {
43 |   def apply(x: Symbol, y: Symbol): Symbol = {
44 |     s.dot("dot")()(Map("lhs" -> x, "rhs" -> y))
45 |   }
46 | }
47 | 
48 | object Hits {
49 |   def hitsAt1(label: NDArray, predicted: NDArray): Float = {
50 |     val labelA = label.toArray
51 |     val predA = predicted.toArray
52 |     labelA.zip(predA).map(x => if(x._1.toInt == x._2.toInt && x._1.toInt == 1) 1 else 0).sum
53 |   }
54 | }


--------------------------------------------------------------------------------
/src/main/scala/net/sansa_stack/ml/kge/Main.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.ml.kge
 2 | 
 3 | import ml.dmlc.mxnet.spark.MXNet
 4 | 
 5 | 
 6 | /**
 7 |   * Created by nilesh on 19/05/2017.
 8 |   */
 9 | object Main extends App {
10 | //  import ml.dmlc.mxnet.Symbol
11 |   import ml.dmlc.mxnet._
12 |   val x = Symbol.Variable("x")
13 |   val y = Symbol.Variable("y")
14 |   val diff = Symbol.pow(x, 2) + Symbol.pow(y, 3)
15 |   val a = NDArray.ones(1) * 10
16 |   val b = NDArray.ones(1) * 2
17 |   val ga = NDArray.empty(1)
18 |   val ga2 = NDArray.empty(1)
19 |   val executor = diff.bind(Context.cpu(), args=Map("x" -> a, "y" -> b), argsGrad=Map("x" -> ga, "y" -> ga2))
20 |   executor.forward()
21 |   println(executor.outputs(0).toArray.mkString(","))
22 | //  executor.
23 | 
24 |   // test gradient
25 |   val outGrad = NDArray.ones(1)
26 |   executor.backward(Array(outGrad))
27 |   println(executor.gradDict.toArray.apply(1).x._2.toArray.mkString(","))
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/scala/net/sansa_stack/ml/kge/RDFDatasetReader.scala:
--------------------------------------------------------------------------------
 1 | package net.sansa_stack.ml.kge
 2 | 
 3 | import net.sansa_stack.rdf.spark.model.{JenaSparkRDD, JenaSparkRDDOps}
 4 | import net.sansa_stack.rdf.spark.model.TripleRDD._
 5 | import net.sansa_stack.rdf.spark.model.JenaSparkRDD
 6 | import org.apache.jena.graph.Node_URI
 7 | import org.apache.spark.SparkContext
 8 | import org.apache.spark.rdd.RDD
 9 | import ml.dmlc.mxnet._
10 | 
11 | /**
12 |   * Created by nilesh on 31/05/2017.
13 |   */
14 | class RDFDatasetReader(sc: SparkContext, path: String) {
15 |   type Node = JenaSparkRDD#Node
16 | 
17 |   private val ops = JenaSparkRDDOps(sc)
18 |   import ops._
19 | 
20 |   private val triplesWithURIs = {
21 |     val graph = ops.loadGraphFromNTriples(path, "")
22 |     graph.filter{
23 |       case Triple(s, p, o) =>
24 |         s.isURI && p.isURI && o.isURI
25 |     }
26 |   }
27 | 
28 |   val relationIDs = triplesWithURIs.getPredicates.zipWithUniqueId()
29 | 
30 |   val entityIDs = (triplesWithURIs.getSubjects
31 |     ++ triplesWithURIs.getObjects)
32 |     .distinct
33 |     .zipWithUniqueId()
34 | 
35 |   def getNumEntities = entityIDs.count()
36 | 
37 |   def getNumRelations = relationIDs.count()
38 | 
39 |   def getMappedTriples(): Unit = {
40 |     val joinedBySubject = entityIDs.join(triplesWithURIs.map{
41 |       case Triple(s, p, o) =>
42 |         (s, (p, o))
43 |     })
44 | 
45 |     val subjectMapped: RDD[(Long, Node_URI, Node)] = joinedBySubject.map{
46 |       case (_, _ @ (subjectID: Long, _ @ (predicate: Node_URI, obj: Node))) =>
47 |         (subjectID, predicate, obj)
48 |     }
49 | 
50 |     val joinedByObject = entityIDs.join(subjectMapped.map{
51 |       case (s, p, o) =>
52 |         (o, (s, p))
53 |     })
54 | 
55 |     val subjectObjectMapped = joinedByObject.map{
56 |       case (_, _ @ (objectID: Long, _ @ (subjectID: Long, predicate: Node_URI))) =>
57 |         (subjectID, predicate, objectID)
58 |     }
59 | 
60 |     val joinedByPredicate = relationIDs.join(subjectObjectMapped.map{
61 |       case (s, p, o) =>
62 |         (p, (s, o))
63 |     })
64 | 
65 |     val allMapped = joinedByPredicate.map{
66 |       case (_: Node, _ @ (predicateID: Long, _ @ (subjectID: Long, objectID: Long))) =>
67 |         (subjectID, predicateID, objectID)
68 |     }
69 | 
70 |     allMapped
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/src/main/scala/net/sansa_stack/ml/kge/model/TransE.scala:
--------------------------------------------------------------------------------
  1 | package net.sansa_stack.ml.kge.model
  2 | 
  3 | import ml.dmlc.mxnet._
  4 | import ml.dmlc.mxnet.{Symbol => s}
  5 | import scala.io.Source
  6 | import scala.util.Random
  7 | import ml.dmlc.mxnet.optimizer.Adam
  8 | import net.sansa_stack.ml.kge.{MaxMarginLoss, L2Similarity, Main, Hits}
  9 | 
 10 | /**
 11 |   * Created by nilesh on 01/06/2017.
 12 |   */
 13 | class TransE(numEntities: Int, numRelations: Int, latentFactors: Int, batchSize: Int) {
 14 |   def getNet(): (Symbol, Seq[String]) = {
 15 |     // embedding weight vectors
 16 |     val entityWeight = s.Variable("entityWeight")
 17 |     val relationWeight = s.Variable("relationWeight")
 18 | 
 19 |     def entityEmbedding(data: Symbol) =
 20 |       s.Embedding()()(Map("data" -> data, "weight" -> entityWeight, "input_dim" -> numEntities, "output_dim" -> latentFactors))
 21 | 
 22 |     def relationEmbedding(data: Symbol) =
 23 |       s.Embedding()()(Map("data" -> data, "weight" -> relationWeight, "input_dim" -> numRelations, "output_dim" -> latentFactors))
 24 | 
 25 |     // inputs
 26 |     var head = s.Variable("subjectEntity")
 27 |     var relation = s.Variable("predicateRelation")
 28 |     var tail = s.Variable("objectEntity")
 29 |     var corruptHead = s.Variable("corruptSubjectEntity")
 30 |     var corruptTail = s.Variable("corruptObjectEntity")
 31 | 
 32 |     head = entityEmbedding(head)
 33 |     relation = relationEmbedding(relation)
 34 |     tail = entityEmbedding(tail)
 35 |     corruptHead = entityEmbedding(corruptHead)
 36 |     corruptTail = entityEmbedding(corruptTail)
 37 | 
 38 |     def getScore(head: Symbol, relation: Symbol, tail: Symbol) = L2Similarity(head + relation, tail)
 39 | 
 40 |     val posScore = getScore(head, relation, tail)
 41 |     val negScore = getScore(corruptHead, relation, corruptTail)
 42 |     val loss = MaxMarginLoss(1.0f)(posScore, negScore)
 43 | 
 44 |     (loss, Seq("subjectEntity", "predicateRelation", "objectEntity", "corruptSubjectEntity", "corruptObjectEntity"))
 45 |   }
 46 | 
 47 |   def train() = {
 48 |     val ctx = Context.cpu()
 49 |     //  val numEntities = 40943
 50 |     val (transeModel, paramNames) = getNet()
 51 | 
 52 |     import ml.dmlc.mxnet.Xavier
 53 | 
 54 |     val initializer = new Xavier(factorType = "in", magnitude = 2.34f)
 55 | 
 56 |     val (argShapes, outputShapes, auxShapes) = transeModel.inferShape(
 57 |       (for (paramName <- paramNames) yield paramName -> Shape(batchSize, 1))
 58 |         toMap)
 59 | 
 60 |     val argNames = transeModel.listArguments()
 61 |     val argDict = argNames.zip(argShapes.map(NDArray.empty(_, ctx))).toMap
 62 |     val gradDict = argNames.zip(argShapes).filter {
 63 |       case (name, shape) =>
 64 |         !paramNames.contains(name)
 65 |     }.map(x => x._1 -> NDArray.empty(x._2, ctx)).toMap
 66 |     argDict.foreach {
 67 |       case (name, ndArray) =>
 68 |         if (!paramNames.contains(name)) {
 69 |           initializer.initWeight(name, ndArray)
 70 |         }
 71 |     }
 72 | 
 73 |     def readDataBatched(stage: String) = {
 74 |       val triplesFile = s"/home/nilesh/utils/Spark-Tensors/data/$stage.txt"
 75 |       val entityIDFile = "/home/nilesh/utils/Spark-Tensors/data/entity2id.txt"
 76 |       val relationIDFile = "/home/nilesh/utils/Spark-Tensors/data/relation2id.txt"
 77 | 
 78 | 
 79 |       def getIDMap(path: String) = Source.fromFile(path)
 80 |         .getLines()
 81 |         .map(_.split("\t"))
 82 |         .map(x => x(0) -> x(1).toFloat).toMap
 83 | 
 84 |       val entityID = getIDMap(entityIDFile)
 85 |       val relationID = getIDMap(relationIDFile)
 86 | 
 87 |       val triples = Random.shuffle(Source.fromFile(triplesFile).getLines().map(_.split("\t")).toSeq)
 88 | 
 89 |       (triples.map(x => entityID(x(0))).toArray.grouped(batchSize).toSeq,
 90 |         triples.map(x => relationID(x(2))).toArray.grouped(batchSize).toSeq,
 91 |         triples.map(x => entityID(x(1))).toArray.grouped(batchSize).toSeq,
 92 |         triples.map(x => Random.nextInt(numEntities).toFloat).toArray.grouped(batchSize).toSeq,
 93 |         triples.map(x => Random.nextInt(numEntities).toFloat).toArray.grouped(batchSize).toSeq)
 94 |     }
 95 | 
 96 |     val executor = transeModel.bind(ctx, argDict, gradDict)
 97 | 
 98 |     val opt = new Adam(learningRate = 0.001f, wd = 0.0001f)
 99 |     val paramsGrads = gradDict.toList.zipWithIndex.map { case ((name, grad), idx) =>
100 |       (idx, name, grad, opt.createState(idx, argDict(name)))
101 |     }
102 | 
103 |     val head = argDict("subjectEntity")
104 |     val relation = argDict("predicateRelation")
105 |     val tail = argDict("objectEntity")
106 |     val corruptHead = argDict("corruptSubjectEntity")
107 |     val corruptTail = argDict("corruptObjectEntity")
108 | 
109 |     val (trainSubjects, trainRelations, trainObjects, trainCorruptSubjects, trainCorruptObjects) = readDataBatched("train")
110 |     val (testSubjects, testRelations, testObjects, _, _) = readDataBatched("test")
111 | 
112 |     var iter = 0
113 |     var minTestHits = 100f
114 |     for (epoch <- 0 until 100000) {
115 |       head.set(trainSubjects(iter))
116 |       relation.set(trainRelations(iter))
117 |       tail.set(trainObjects(iter))
118 |       corruptHead.set(trainCorruptSubjects(iter))
119 |       corruptTail.set(trainCorruptObjects(iter))
120 |       iter += 1
121 | 
122 |       if (iter >= trainSubjects.length) iter = 0
123 | 
124 |       executor.forward(isTrain = true)
125 |       executor.backward()
126 | 
127 |       paramsGrads.foreach {
128 |         case (idx, name, grad, optimState) =>
129 |           opt.update(idx, argDict(name), grad, optimState)
130 |       }
131 | 
132 | //      println(s"iter $epoch, training Hits@1: ${Math.sqrt(Hits.hitsAt1(NDArray.ones(batchSize), executor.outputs(0)) / batchSize)}, min test Hits@1: $minTestHits")
133 | 
134 |       println(s"iter $epoch, training loss: ${executor.outputs(0).toArray.sum}")
135 |       if (epoch != 0 && epoch % 50 == 0) {
136 |         val tmp = for (i <- 0 until testSubjects.length) yield {
137 |           head.set(testSubjects(iter))
138 |           relation.set(testRelations(iter))
139 |           tail.set(testObjects(iter))
140 | 
141 |           executor.forward(isTrain = false)
142 |           Hits.hitsAt1(NDArray.ones(batchSize), executor.outputs(0))
143 |         }
144 |         val testHits = Math.sqrt(tmp.toArray.sum / (testSubjects.length * batchSize))
145 |         if (testHits < minTestHits) minTestHits = testHits.toFloat
146 |       }
147 |     }
148 | 
149 |   }
150 | }
151 | 


--------------------------------------------------------------------------------