├── LICENSE ├── README.md ├── _config.yml ├── code ├── KBQA.py ├── __init__.py ├── baseline_eval.py ├── data_utils.py ├── feed_data.py ├── get_stats.py ├── qual_eval.py ├── scratch │ ├── check_nick_coverage.py │ ├── check_siva_clueweb_coverage.py │ └── concat_all_clueweb.py ├── train.py ├── unit_tests.py └── util.py ├── config.sh ├── config_data.sh ├── data_formatted ├── README ├── dev_with_kb_and_text_facts.json ├── small_dev_with_kb_and_text_facts.json ├── small_train_with_kb_and_text_facts.json ├── test_with_kb_and_text_facts.json └── train_with_kb_and_text_facts.json ├── get_data.sh ├── kb └── small_demo_kb.txt ├── model_outputs ├── dev_answers.txt └── test_answers.txt ├── run.sh ├── run_data.sh ├── test_saved_model_config.sh ├── text_kb └── small_demo_text_kb.txt └── vocab ├── entity_vocab.json └── relation_vocab.json /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Rajarshi Das 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### This repo contains the tensorflow implementation of the paper "[Question Answering on Knowledge Bases and Text using Universal Schema and Memory Networks](https://arxiv.org/abs/1704.08384)". 2 | 3 | ### Dependencies 4 | * TensorFlow <= 0.12 5 | 6 | ## Training 7 | I have set up training with most default params on a very small dataset so that it is easier to get started. Just running the script should work. 8 | ``` 9 | /bin/bash run.sh ./config.sh 10 | ``` 11 | ### Data 12 | The processed data (train/dev/test split) is stored in data_formatted/ directory. 13 | To download the KB files used for the project run, 14 | ``` 15 | sh get_data.sh 16 | ``` 17 | After downloading the data, you will have to change the appropriate entries in the config.sh file (kb_file and text_kb_file). 18 | 19 | ### Pretrained embeddings 20 | The embeddings used for initializing the network can be downloaded from [here](http://iesl.cs.umass.edu/downloads/spades/entity_lookup_table_50.pkl.gz) 21 | 22 | ### Model outputs 23 | We are also releasing the output predictions of our model for comparison. Find them in the model_outputs directory. 24 | 25 | ### Trained Model 26 | We are also sharing our pretrained model. Get it [here]( http://iesl.cs.umass.edu/downloads/spades/max_dev_out.ckpt). The following will load the model and get the answers from the dev set. Please change the config appropriately. 27 | ``` 28 | sh run.sh ./test_from_saved_model.sh 29 | ``` 30 | 31 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-hacker -------------------------------------------------------------------------------- /code/KBQA.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import tensorflow as tf 3 | import abc 4 | import util 5 | 6 | 7 | class QAbase(object): 8 | """ 9 | Base class for Question Ansering 10 | """ 11 | 12 | def __init__(self, entity_vocab_size, embedding_size, hops=3, 13 | question_encoder='lstm', use_peepholes=True, load_pretrained_model=False, 14 | load_pretrained_vectors=False, pretrained_entity_vectors=None, verbose=False): 15 | 16 | self.entity_vocab_size = entity_vocab_size 17 | self.embedding_size = embedding_size 18 | self.lstm_hidden_size = embedding_size 19 | self.question_encoder = question_encoder 20 | self.use_peepholes = use_peepholes 21 | self.hops = hops 22 | 23 | """Common Network parameters""" 24 | # projection 25 | self.W = tf.get_variable("W", shape=[self.embedding_size, 2 * self.embedding_size], 26 | initializer=tf.contrib.layers.xavier_initializer()) 27 | self.b = tf.Variable(tf.zeros([2 * self.embedding_size]), name="b") 28 | 29 | self.W1 = tf.get_variable("W1", shape=[2 * self.embedding_size, self.embedding_size], 30 | initializer=tf.contrib.layers.xavier_initializer()) 31 | 32 | self.b1 = tf.Variable(tf.zeros([self.embedding_size]), name="b1") 33 | # weights for each hop of the memory network 34 | self.R = [tf.get_variable('R{}'.format(h), shape=[2 * self.embedding_size, 2 * self.embedding_size], 35 | initializer=tf.contrib.layers.xavier_initializer()) for h in range(self.hops)] 36 | self.attn_weights_all_hops = [] 37 | # with tf.device('/cpu:0'): 38 | # embedding layer 39 | initializer_op = None 40 | trainable = False 41 | if load_pretrained_model: 42 | if verbose: 43 | print( 44 | 'Load pretrained model is set to {0} and hence entity_lookup_table trainable is set to {0}'.format( 45 | load_pretrained_model)) 46 | trainable = True 47 | if load_pretrained_vectors: 48 | if verbose: 49 | print('pretrained entity & word embeddings available. Initializing with them.') 50 | assert (pretrained_entity_vectors is not None) 51 | initializer_op = tf.constant_initializer(pretrained_entity_vectors) 52 | else: 53 | if verbose: 54 | print('No pretrained entity & word embeddings available. Learning entity embeddings from scratch') 55 | trainable = True 56 | initializer_op = tf.contrib.layers.xavier_initializer() 57 | 58 | self.entity_lookup_table = tf.get_variable("entity_lookup_table", 59 | shape=[self.entity_vocab_size - 1, self.embedding_size], 60 | dtype=tf.float32, 61 | initializer=initializer_op, trainable=trainable) 62 | 63 | # dummy memory is set to -inf, so that during softmax for attention weight, we correctly 64 | # assign these slots 0 weight. 65 | self.entity_dummy_mem = tf.constant(0.0, shape=[1, self.embedding_size], dtype='float32') 66 | 67 | self.entity_lookup_table_extended = tf.concat(0, [self.entity_lookup_table, self.entity_dummy_mem]) 68 | 69 | # for encoding question 70 | # with tf.variable_scope('q_forward'): 71 | self.q_fw_cell = tf.nn.rnn_cell.LSTMCell(self.lstm_hidden_size, use_peepholes=self.use_peepholes, 72 | state_is_tuple=True) 73 | # with tf.variable_scope('q_backward'): 74 | self.q_bw_cell = tf.nn.rnn_cell.LSTMCell(self.lstm_hidden_size, use_peepholes=self.use_peepholes, 75 | state_is_tuple=True) 76 | 77 | def get_question_embedding(self, question, question_lengths): 78 | """ encodes the question. Current implementation is encoding with biLSTM.""" 79 | # question_word_embedding: [B, max_question_length, embedding_dim] 80 | question_word_embedding = tf.nn.embedding_lookup(self.entity_lookup_table_extended, question) 81 | question_word_embedding_shape = tf.shape(question_word_embedding) 82 | if self.question_encoder == 'lstm': 83 | scope_name = tf.get_variable_scope() 84 | with tf.variable_scope(scope_name, reuse=True): 85 | lstm_outputs, lstm_output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=self.q_fw_cell, 86 | cell_bw=self.q_bw_cell, 87 | dtype=tf.float32, 88 | inputs=question_word_embedding, 89 | parallel_iterations=32, 90 | sequence_length=question_lengths) 91 | # fwd_out, bwd_out: [batch_size, embedding_dim] 92 | fwd_out_all, bwd_out_all = lstm_outputs 93 | last_fwd = util.last_relevant(fwd_out_all, question_lengths) 94 | last_bwd = bwd_out_all[:, 0, :] 95 | # question_embedding: [B,2D] 96 | question_embedding = tf.concat(1, [last_fwd, last_bwd]) 97 | else: 98 | raise NotImplementedError 99 | return question_embedding 100 | 101 | def get_key_embedding(self, *args, **kwargs): 102 | raise NotImplementedError 103 | 104 | def get_value_embedding(self, val_mem): 105 | # each is [B, max_num_slots, D] 106 | val_embedding = tf.nn.embedding_lookup(self.entity_lookup_table_extended, val_mem, name="val_embedding") 107 | return val_embedding 108 | 109 | def seek_attention(self, question_embedding, key, value, C, mask): 110 | """ Iterative attention. """ 111 | for h in range(self.hops): 112 | expanded_question_embedding = tf.expand_dims(question_embedding, 1) 113 | # self.key*expanded_question_embedding [B, M, 2D]; self.attn_weights: [B,M] 114 | attn_logits = tf.reduce_sum(key * expanded_question_embedding, 2) 115 | attn_logits = tf.select(mask, attn_logits, C) 116 | self.attn_weights = tf.nn.softmax(attn_logits) 117 | self.attn_weights_all_hops.append(self.attn_weights) 118 | # self.p = tf.Print(attn_weights, [attn_weights], message='At hop {}'.format(h), summarize=10) 119 | # attn_weights_reshape: [B, M, 1] 120 | attn_weights_reshape = tf.expand_dims(self.attn_weights, -1) 121 | # self.value * attn_weights_reshape:[B, M, D]; self.attn_value:[B, D] 122 | attn_value = tf.reduce_sum(value * attn_weights_reshape, 1) 123 | # attn_value_proj : [B, 2D] 124 | # attn_value_proj = tf.nn.relu(tf.add(tf.matmul(attn_value, self.W), self.b)) 125 | attn_value_proj = tf.add(tf.matmul(attn_value, self.W), self.b) 126 | sum = question_embedding + attn_value_proj 127 | # question_embedding: [B, 2D] 128 | question_embedding = tf.matmul(sum, self.R[h]) 129 | return question_embedding 130 | 131 | # def seek_attention(self, question_embedding, key, value, C, mask): 132 | # """ Iterative attention. """ 133 | # for h in range(self.hops): 134 | # attn_logits = tf.einsum('ijk,ik->ij', key, question_embedding) # self.attn_weights: [B,M] 135 | # attn_logits = tf.select(mask, attn_logits, C) 136 | # attn_weights = tf.nn.softmax(attn_logits) 137 | # attn_value = tf.einsum('ijk,ij->ik',value,attn_weights) # self.attn_value:[B, D] 138 | # attn_value_proj = tf.add(tf.matmul(attn_value, self.W), self.b) # attn_value_proj : [B, 2D] 139 | # total_emb = question_embedding + attn_value_proj 140 | # question_embedding = tf.matmul(total_emb, self.R[h]) # question_embedding: [B, 2D] 141 | # return question_embedding 142 | 143 | 144 | def __call__(self, *args, **kwargs): 145 | raise NotImplementedError 146 | 147 | 148 | class KBQA(QAbase): 149 | """ 150 | Class for KB Question Answering 151 | TODO(rajarshd): describe input/output behaviour 152 | """ 153 | 154 | def __init__(self, relation_vocab_size, 155 | key_encoder='concat', **kwargs): 156 | super(KBQA, self).__init__(**kwargs) 157 | self.key_encoder = key_encoder 158 | self.relation_vocab_size = relation_vocab_size 159 | 160 | """Specialized Network parameters""" 161 | self.relation_lookup_table = tf.get_variable("relation_lookup_table", shape=[self.relation_vocab_size - 1, 162 | self.embedding_size], 163 | initializer=tf.contrib.layers.xavier_initializer()) 164 | 165 | self.relation_dummy_mem = tf.constant(0.0, shape=[1, self.embedding_size], dtype='float32') 166 | 167 | self.relation_lookup_table = tf.concat(0, [self.relation_lookup_table, self.relation_dummy_mem]) 168 | 169 | def get_key_embedding(self, entity, relation): 170 | """TODO(rajarshd): describe various options""" 171 | # each is [B, max_num_slots, D] 172 | e1_embedding = tf.nn.embedding_lookup(self.entity_lookup_table_extended, entity, name="e1_embedding") 173 | r_embedding = tf.nn.embedding_lookup(self.relation_lookup_table, relation, name="r_embedding") 174 | 175 | # key shape is [B, max_num_slots, 2D] 176 | if self.key_encoder == 'concat': 177 | key = tf.concat(2, [e1_embedding, r_embedding]) 178 | else: 179 | raise NotImplementedError 180 | return key 181 | 182 | def __call__(self, memory, question, question_lengths): 183 | # split memory and get corresponding embeddings 184 | e1, r, e2 = tf.unpack(memory, axis=2) 185 | C = tf.ones_like(e1, dtype='float32') * -1000 186 | mask = tf.not_equal(e1, self.entity_vocab_size - 1) 187 | key = self.get_key_embedding(e1, r) 188 | value = self.get_value_embedding(e2) 189 | ques = self.get_question_embedding(question, question_lengths) 190 | 191 | # get attention on retrived informations based on the question 192 | attn_ques = self.seek_attention(ques, key, value, C, mask) 193 | 194 | # output embeddings - share with entity lookup table 195 | # B = tf.slice(self.entity_lookup_table, [0, 0], [1789936, -1]) 196 | B = self.entity_lookup_table_extended 197 | # project down 198 | model_answer = tf.add(tf.matmul(attn_ques, self.W1), self.b1) # model_answer: [B, D] 199 | logits = tf.matmul(model_answer, B, transpose_b=True, name='ent_mul_manzil') # scores: [B, num_entities] 200 | return logits 201 | 202 | 203 | class TextQA(QAbase): 204 | """ 205 | Class for QA with Text only 206 | TODO(rajarshd): describe input/output behaviour 207 | """ 208 | 209 | def __init__(self, key_encoder='lstm', 210 | separate_key_lstm=False, **kwargs): 211 | super(TextQA, self).__init__(**kwargs) 212 | self.key_encoder = key_encoder 213 | self.separate_key_lstm = separate_key_lstm 214 | 215 | """Specialized Network parameters""" 216 | # for encoding key 217 | if self.separate_key_lstm: 218 | with tf.variable_scope('k_forward'): 219 | self.k_fw_cell = tf.nn.rnn_cell.LSTMCell(self.lstm_hidden_size, use_peepholes=self.use_peepholes, 220 | state_is_tuple=True) 221 | with tf.variable_scope('k_backward'): 222 | self.k_bw_cell = tf.nn.rnn_cell.LSTMCell(self.lstm_hidden_size, use_peepholes=self.use_peepholes, 223 | state_is_tuple=True) 224 | 225 | def get_key_embedding(self, key_mem, key_lens): 226 | """TODO(rajarshd): describe various options""" 227 | # each is [B, max_num_slots, max_key_len, D] 228 | key_embedding = tf.nn.embedding_lookup(self.entity_lookup_table_extended, key_mem, name="key_embedding") 229 | # reshape the data to [(B, max_num_slots), max_key_len, D] 230 | dims = tf.shape(key_embedding) 231 | key_embedding_reshaped = tf.reshape(key_embedding, [-1, dims[2], self.embedding_size]) 232 | key_len_reshaped = tf.reshape(key_lens, [-1]) 233 | if self.key_encoder == 'lstm': 234 | scope_name = tf.get_variable_scope() 235 | with tf.variable_scope(scope_name, reuse=None): 236 | if self.separate_key_lstm: 237 | lstm_key_outputs, lstm_key_output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=self.k_fw_cell, 238 | cell_bw=self.k_bw_cell, 239 | dtype=tf.float32, 240 | inputs=key_embedding_reshaped, 241 | parallel_iterations=32, 242 | sequence_length=key_len_reshaped) 243 | else: 244 | lstm_key_outputs, lstm_key_output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=self.q_fw_cell, 245 | cell_bw=self.q_bw_cell, 246 | dtype=tf.float32, 247 | inputs=key_embedding_reshaped, 248 | parallel_iterations=32, 249 | sequence_length=key_len_reshaped) 250 | 251 | lstm_key_output_states_fw, lstm_key_output_states_bw = lstm_key_output_states 252 | fw_c, fw_h = lstm_key_output_states_fw 253 | bw_c, bw_h = lstm_key_output_states_bw 254 | # [(B, max_num_slots), 2D] 255 | key = tf.reshape(tf.concat(1, [fw_h, bw_h]), [-1, dims[1], 2 * self.embedding_size]) 256 | else: 257 | raise NotImplementedError 258 | return key 259 | 260 | def __call__(self, key_mem, key_len, val_mem, question, question_lengths): 261 | # key_mem is [B, max_num_mem, max_key_len] 262 | # key_len is [B, max_num_mem] 263 | # val_mem is [B, max_num_mem] 264 | 265 | C = tf.ones_like(key_len, dtype='float32') * -1000 266 | mask = tf.not_equal(key_len, 0) 267 | key = self.get_key_embedding(key_mem, key_len) 268 | value = self.get_value_embedding(val_mem) 269 | 270 | ques = self.get_question_embedding(question, question_lengths) 271 | 272 | # get attention on retrived informations based on the question 273 | attn_ques = self.seek_attention(ques, key, value, C, mask) 274 | 275 | # output embeddings - share with entity lookup table 276 | B = self.entity_lookup_table_extended 277 | # project down 278 | model_answer = tf.add(tf.matmul(attn_ques, self.W1), self.b1) # model_answer: [B, D] 279 | logits = tf.matmul(model_answer, B, transpose_b=True, name='ent_mul_manzil') # scores: [B, num_entities] 280 | return logits 281 | 282 | 283 | class TextKBQA(QAbase): 284 | """ 285 | Class for QA with Text+KB 286 | """ 287 | 288 | def __init__(self, relation_vocab_size, 289 | kb_key_encoder='concat', 290 | text_key_encoder='lstm', 291 | join='concat2', 292 | separate_key_lstm=False, **kwargs): 293 | super(TextKBQA, self).__init__(**kwargs) 294 | self.join = join 295 | self.kb_key_encoder = kb_key_encoder 296 | self.text_key_encoder = text_key_encoder 297 | self.separate_key_lstm = separate_key_lstm 298 | self.relation_vocab_size = relation_vocab_size 299 | 300 | """Specialized Network parameters""" 301 | # projection 302 | self.relation_lookup_table = tf.get_variable("relation_lookup_table", shape=[self.relation_vocab_size - 1, 303 | self.embedding_size], 304 | initializer=tf.contrib.layers.xavier_initializer()) 305 | 306 | self.relation_dummy_mem = tf.constant(0.0, shape=[1, self.embedding_size], dtype='float32') 307 | 308 | self.relation_lookup_table_extended = tf.concat(0, [self.relation_lookup_table, self.relation_dummy_mem]) 309 | 310 | # for encoding key 311 | if self.separate_key_lstm: 312 | with tf.variable_scope('k_forward'): 313 | self.k_fw_cell = tf.nn.rnn_cell.LSTMCell(self.lstm_hidden_size, use_peepholes=self.use_peepholes, 314 | state_is_tuple=True) 315 | with tf.variable_scope('k_backward'): 316 | self.k_bw_cell = tf.nn.rnn_cell.LSTMCell(self.lstm_hidden_size, use_peepholes=self.use_peepholes, 317 | state_is_tuple=True) 318 | 319 | def get_key_embedding(self, entity, relation, key_mem, key_lens): 320 | # each is [B, max_num_slots, D] 321 | e1_embedding = tf.nn.embedding_lookup(self.entity_lookup_table_extended, entity, name="e1_embedding") 322 | r_embedding = tf.nn.embedding_lookup(self.relation_lookup_table_extended, relation, name="r_embedding") 323 | 324 | # key shape is [B, max_num_slots, 2D] 325 | if self.kb_key_encoder == 'concat': 326 | kb_key = tf.concat(2, [e1_embedding, r_embedding]) 327 | else: 328 | raise NotImplementedError 329 | 330 | # each is [B, max_num_slots, max_key_len, D] 331 | key_embedding = tf.nn.embedding_lookup(self.entity_lookup_table_extended, key_mem, name="key_embedding") 332 | # reshape the data to [(B, max_num_slots), max_key_len, D] 333 | dims = tf.shape(key_embedding) 334 | key_embedding_reshaped = tf.reshape(key_embedding, [-1, dims[2], self.embedding_size]) 335 | key_len_reshaped = tf.reshape(key_lens, [-1]) 336 | if self.text_key_encoder == 'lstm': 337 | scope_name = tf.get_variable_scope() 338 | with tf.variable_scope(scope_name, reuse=None): 339 | if self.separate_key_lstm: 340 | lstm_key_outputs, lstm_key_output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=self.k_fw_cell, 341 | cell_bw=self.k_bw_cell, 342 | dtype=tf.float32, 343 | inputs=key_embedding_reshaped, 344 | parallel_iterations=32, 345 | sequence_length=key_len_reshaped) 346 | else: 347 | lstm_key_outputs, lstm_key_output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=self.q_fw_cell, 348 | cell_bw=self.q_bw_cell, 349 | dtype=tf.float32, 350 | inputs=key_embedding_reshaped, 351 | parallel_iterations=32, 352 | sequence_length=key_len_reshaped) 353 | 354 | lstm_key_output_states_fw, lstm_key_output_states_bw = lstm_key_output_states 355 | fw_c, fw_h = lstm_key_output_states_fw 356 | bw_c, bw_h = lstm_key_output_states_bw 357 | # [(B, max_num_slots), 2D] 358 | text_key = tf.reshape(tf.concat(1, [fw_h, bw_h]), [-1, dims[1], 2 * self.embedding_size]) 359 | else: 360 | raise NotImplementedError 361 | return kb_key, text_key 362 | 363 | def __call__(self, memory, key_mem, key_len, val_mem, question, question_lengths): 364 | # split memory and get corresponding embeddings 365 | e1, r, e2 = tf.unpack(memory, axis=2) 366 | kb_C = tf.ones_like(e1, dtype='float32') * -1000 367 | kb_mask = tf.not_equal(e1, self.entity_vocab_size - 1) 368 | kb_value = self.get_value_embedding(e2) 369 | 370 | # key_mem is [B, max_num_mem, max_key_len] 371 | # key_len is [B, max_num_mem] 372 | # val_mem is [B, max_num_mem] 373 | text_C = tf.ones_like(key_len, dtype='float32') * -1000 374 | text_mask = tf.not_equal(key_len, 0) 375 | text_value = self.get_value_embedding(val_mem) 376 | 377 | kb_key, text_key = self.get_key_embedding(e1, r, key_mem, key_len) 378 | ques = self.get_question_embedding(question, question_lengths) 379 | 380 | # get attention on retrived informations based on the question 381 | kb_attn_ques = self.seek_attention(ques, kb_key, kb_value, kb_C, kb_mask) # [B, 2D] 382 | text_attn_ques = self.seek_attention(ques, text_key, text_value, text_C, text_mask) # [B, 2D] 383 | 384 | if self.join == 'batch_norm': 385 | mean_kb_key, var_kb_key = tf.nn.moments(kb_key, axes=[0,1]) 386 | mean_kb_value, var_kb_value = tf.nn.moments(kb_value, axes=[0,1]) 387 | mean_text_key, var_text_key = tf.nn.moments(kb_key, axes=[0,1]) 388 | mean_text_value, var_text_value = tf.nn.moments(kb_value, axes=[0,1]) 389 | text_key = tf.nn.batch_normalization(text_key, mean_text_key, var_text_key, mean_kb_key, var_kb_key, 1e-8) 390 | text_value = tf.nn.batch_normalization(text_value, mean_text_value, var_text_value, mean_kb_value, var_kb_value, 1e-8) 391 | 392 | merged_key = tf.concat(1, [kb_key, text_key]) 393 | merged_value = tf.concat(1, [kb_value, text_value]) 394 | merged_C = tf.concat(1, [kb_C, text_C]) 395 | merged_mask = tf.concat(1, [kb_mask, text_mask]) 396 | 397 | # get attention on retrived informations based on the question 398 | attn_ques = self.seek_attention(ques, merged_key, merged_value, merged_C, merged_mask) # [B, 2D] 399 | model_answer = tf.add(tf.matmul(attn_ques, self.W1), self.b1) # model_answer: [B, D] 400 | 401 | # output embeddings - share with entity lookup table 402 | B = self.entity_lookup_table_extended 403 | logits = tf.matmul(model_answer, B, transpose_b=True, name='ent_mul_manzil') # scores: [B, num_entities] 404 | return logits 405 | -------------------------------------------------------------------------------- /code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rajarshd/TextKBQA/248a5d5ad46f64443a8a7e6e892f064f50be7e96/code/__init__.py -------------------------------------------------------------------------------- /code/baseline_eval.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | def augment_with_baseline_answers(baseline_answer_file, input_file, output_file): 4 | out = open(output_file, 'w') 5 | with open(baseline_answer_file) as input, open(input_file) as data_file: 6 | for baseline_answer_line, line in zip(input, data_file): 7 | baseline_answer_line = baseline_answer_line.strip() 8 | sentence, correct_answer, predicted_answer = baseline_answer_line.split('\t') 9 | correct = 1 if correct_answer == predicted_answer else 0 10 | data = json.loads(line) 11 | data['baseline_answer'] = predicted_answer 12 | data['is_correct'] =correct 13 | out.write(json.dumps(data)+'\n') 14 | 15 | 16 | def get_baseline_accuracy(input_file, min_num_mem, max_num_mem): 17 | num_correct = 0 18 | num_data = 0 19 | with open(input_file) as input: 20 | for line in input: 21 | line = line.strip() 22 | data = json.loads(line) 23 | num_facts = data['num_facts'] 24 | if num_facts < min_num_mem or num_facts > max_num_mem: 25 | continue 26 | num_data += 1 27 | num_correct += data['is_correct'] 28 | 29 | print('Num data {0:10d}, Num correct {1:10d}, %correct {2:10.4f}'.format(num_data, num_correct, 1.0*num_correct/num_data)) 30 | 31 | 32 | if __name__ == '__main__': 33 | 34 | baseline_answer_file = "/home/rajarshi/canvas/data/TextKBQA/dev_answers.txt" 35 | input_file = "/home/rajarshi/canvas/data/TextKBQA/dev_with_facts.json" 36 | output_file = "/home/rajarshi/canvas/data/TextKBQA/dev_with_baseline_answers.json" 37 | 38 | # augment_with_baseline_answers(baseline_answer_file, input_file, output_file) 39 | get_baseline_accuracy(output_file, 0, 25000) -------------------------------------------------------------------------------- /code/data_utils.py: -------------------------------------------------------------------------------- 1 | import json, os 2 | from collections import defaultdict 3 | import operator 4 | import util 5 | import argparse 6 | from tqdm import tqdm 7 | import numpy as np 8 | 9 | 10 | class KB(object): 11 | """Class for freebase""" 12 | def __init__(self, input_file, create_vocab=False, vocab_dir=''): 13 | self.input_file = input_file 14 | self.create_vocab = create_vocab 15 | self.vocab_dir = vocab_dir 16 | self.entity_vocab = None 17 | self.relation_vocab = None 18 | if self.create_vocab: 19 | self.entity_vocab, self.relation_vocab, self.facts = self.build_int_map() 20 | else: 21 | self.facts, self.facts_list = self.read_kb_facts() 22 | self.entity_vocab = json.load(open(vocab_dir+'/entity_vocab.json')) 23 | self.relation_vocab = json.load(open(vocab_dir + '/relation_vocab.json')) 24 | 25 | self.num_entities = len(self.entity_vocab) if self.entity_vocab is not None else None 26 | self.num_relations = len(self.relation_vocab) if self.relation_vocab is not None else None 27 | 28 | def read_kb_facts(self): 29 | facts = [] 30 | facts_list = defaultdict(list) 31 | print('Reading kb file at {}'.format(self.input_file)) 32 | with open(self.input_file) as fb: 33 | for counter, line in tqdm(enumerate(fb)): 34 | line = line.strip() 35 | line = line[1:-1] 36 | e1, r1, r2, e2 = [a.strip('\'') for a in [x.strip() for x in line.split(',')]] 37 | r = r1 + '_' + r2 38 | facts.append({'e1': e1, 'r': r, 'e2': e2}) 39 | facts_list[e1].append(counter) # just store the fact counter instead of the fact 40 | return facts, facts_list 41 | 42 | def build_int_map(self): 43 | entity_count_map = defaultdict(int) 44 | relation_count_map = defaultdict(int) 45 | facts = [] 46 | facts_list = defaultdict(list) 47 | with open(self.input_file) as fb: 48 | for line in tqdm(fb): 49 | line = line.strip() 50 | line = line[1:-1] 51 | e1, r1, r2, e2 = [a.strip('\'') for a in [x.strip() for x in line.split(',')]] 52 | r = r1 + '_' + r2 53 | entity_count_map[e1] += 1 54 | entity_count_map[e2] += 1 55 | relation_count_map[r] += 1 56 | facts.append({'e1': e1, 'r': r, 'e2': e2}) 57 | 58 | sort_key_by_freq = lambda x: sorted(x.items(), key=operator.itemgetter(1), reverse=True) 59 | entity_vocab = {k: counter + 2 for counter, (k, _) in enumerate(sort_key_by_freq(entity_count_map))} 60 | relation_vocab = {k: counter + 2 for counter, (k, _) in enumerate(sort_key_by_freq(relation_count_map))} 61 | entity_vocab['PAD'] = 0 62 | entity_vocab['UNK'] = 1 63 | relation_vocab['PAD'] = 0 64 | relation_vocab['UNK'] = 1 65 | relation_vocab['DUMMY_MEM'] = len(relation_vocab) 66 | #the dummy_mem key to entity_vocab is added at the end of augmenting 67 | # the entity_vocab with words from questions. c.f. augment_to_entity_vocab method 68 | 69 | return entity_vocab, relation_vocab, facts 70 | 71 | def save_vocab(self,**kwargs): 72 | assert len(self.vocab_dir) != 0 73 | with open(self.vocab_dir + '/entity_vocab.json', 'w') as ent_out, \ 74 | open(self.vocab_dir + '/relation_vocab.json', 'w') as rel_out: 75 | json.dump(self.entity_vocab, ent_out) 76 | json.dump(self.relation_vocab, rel_out) 77 | 78 | with open(self.vocab_dir+'/stats.json','w') as f_out: 79 | f_out.write("Num entities {}\n".format(self.num_entities)) 80 | f_out.write("Num relations {}\n".format(self.num_relations)) 81 | if 'num_words' in kwargs: 82 | f_out.write("Num words {}\n".format(kwargs['num_words'])) 83 | 84 | class TextKb(object): 85 | """Class for kb formed from training data""" 86 | 87 | def __init__(self, input_file, create_vocab=False, vocab_dir=''): 88 | 89 | self.facts_list, self.entity_facts_index_map, self.max_key_length = self.parse_kb(input_file) 90 | self.entity_vocab = json.load(open(vocab_dir + '/entity_vocab.json')) 91 | self.relation_vocab = json.load(open(vocab_dir + '/relation_vocab.json')) 92 | def parse_kb(self, input_file): 93 | kb_facts = [] 94 | entity_facts_index_map = {} #map from entity to tuple of (start_index, num_facts) 95 | prev_entity = None 96 | start_index = 0 97 | num_facts = 0 98 | max_key_length = -1 99 | print('Reading the text kb file...') 100 | with open(input_file) as fin: 101 | for counter, line in tqdm(enumerate(fin)): 102 | kb_instance = json.loads(line) 103 | kb_facts.append(kb_instance) 104 | entity = kb_instance['entity'] 105 | key_length = int(kb_instance['key_length']) 106 | if key_length > max_key_length: 107 | max_key_length = key_length 108 | if prev_entity != entity: 109 | if prev_entity is not None: 110 | entity_facts_index_map[prev_entity] = (start_index, num_facts) 111 | start_index = counter 112 | prev_entity = entity 113 | num_facts = 0 114 | num_facts+=1 115 | return kb_facts, entity_facts_index_map, max_key_length 116 | 117 | 118 | class QuestionAnswer(object): 119 | """Class for parsing a single question answer pair""" 120 | 121 | def __init__(self, json_string): 122 | self.json_str = json_string 123 | #indices are positions of entities in question str. 124 | self.entities = [] 125 | self.parsed_question = self.parse_question_str(self.json_str) 126 | 127 | def parse_question_str(self, json_str): 128 | q_json = json.loads(json_str) 129 | question = q_json['sentence'] 130 | answers = q_json['answerSubset'] 131 | entities = [] 132 | indices = [] 133 | ret = {} 134 | ret['question'] = question 135 | ret['answers'] = answers 136 | for entity in q_json['entities']: 137 | entities.append(entity['entity']) 138 | self.entities.append(entity['entity']) 139 | indices.append(entity['index']) 140 | ret['entities'] = entities 141 | ret['indices'] = indices 142 | #get the memory slots 143 | if 'start_indices' in q_json: 144 | ret['start_indices'] = q_json['start_indices'] 145 | ret['fact_lengths'] = q_json['lengths'] 146 | ret['num_facts'] = q_json['num_facts'] 147 | if 'text_kb_num_facts' in q_json: 148 | ret['text_kb_num_facts'] = q_json['text_kb_num_facts'] 149 | ret['text_kb_start_indices'] = q_json['text_kb_start_indices'] 150 | ret['text_kb_lengths'] = q_json['text_kb_lengths'] 151 | if 'black_lists' in q_json: 152 | ret['blacklists'] = q_json['black_lists'] 153 | 154 | return ret 155 | 156 | def get_supporting_KB_facts(self, KB): 157 | """get the supporting KB facts for this QA pair. Should be just called once.""" 158 | # look at the entities in the question. \ 159 | # Retrieve all facts from KB which have\ 160 | # the entities in the given question 161 | 162 | start_indices = [] 163 | lengths = [] 164 | for entity in self.entities: 165 | if entity in KB.facts_list: 166 | # facts.update(set(KB.facts_list[entity])) 167 | # KB.facts_list[entity] is a contiguous list of numbers since the KB is sorted wrt e1, hence I am storing\ 168 | # the start index and number 169 | start_index = KB.facts_list[entity][0] 170 | length = len(KB.facts_list[entity]) 171 | start_indices.append(start_index) 172 | lengths.append(length) 173 | return start_indices, lengths 174 | 175 | def get_supporting_text_kb_facts(self, text_kb): 176 | """get the supporting text KB facts for this QA pair. Should be just called once.""" 177 | start_indices = [] 178 | fact_lengths = [] 179 | for entity in self.parsed_question['entities']: 180 | if entity in text_kb.entity_facts_index_map: 181 | start_index, num_facts = text_kb.entity_facts_index_map[entity] 182 | start_indices.append(start_index) 183 | fact_lengths.append(num_facts) 184 | return start_indices, fact_lengths 185 | 186 | 187 | 188 | class Text(object): 189 | """Class for each textual questions file""" 190 | 191 | def __init__(self, input_file, **kwargs): 192 | self.input_file = input_file 193 | self.kb = kwargs['kb'] if 'kb' in kwargs else None #the kb object its entities are tied to 194 | self.max_kb_facts_allowed = kwargs['max_num_facts'] if 'max_num_facts' in kwargs \ 195 | else float('inf') 196 | self.min_kb_facts_allowed = kwargs['min_num_facts'] if 'min_num_facts' in kwargs \ 197 | else 0 198 | self.max_text_kb_facts_allowed = kwargs['max_num_text_facts'] if 'max_num_text_facts' in kwargs \ 199 | else float('inf') 200 | self.min_text_kb_facts_allowed = kwargs['min_num_text_facts'] if 'min_num_text_facts' in kwargs \ 201 | else 0 202 | self.max_q_length, self.max_num_kb_facts, self.max_num_text_kb_facts, self.question_list, \ 203 | self.num_entities, self.entity_set, self.answer_entities = self.read_and_parse() 204 | 205 | def read_and_parse(self): 206 | max_length = -1 207 | max_num_kb_facts = -1 208 | max_num_text_kb_facts = -1 209 | question_list = [] 210 | set_entities = set() 211 | answer_entities = [] 212 | print('Reading questions file at {}'.format(self.input_file)) 213 | with open(self.input_file) as f_in: 214 | for counter, line in tqdm(enumerate(f_in)): 215 | line = line.strip() 216 | qa = QuestionAnswer(line) #parse each line 217 | question_str = qa.parsed_question['question'] 218 | length_q = len(question_str.split(' ')) 219 | max_length = max(max_length, length_q) 220 | num_kb_facts = qa.parsed_question['num_facts'] if 'num_facts' in qa.parsed_question else 0 221 | num_text_kb_facts = qa.parsed_question['text_kb_num_facts'] if 'text_kb_num_facts' in qa.parsed_question else 0 222 | if num_kb_facts > self.max_kb_facts_allowed: 223 | num_kb_facts = self.max_kb_facts_allowed 224 | elif num_kb_facts < self.min_kb_facts_allowed: 225 | continue 226 | if num_text_kb_facts > self.max_text_kb_facts_allowed: 227 | num_text_kb_facts = self.max_text_kb_facts_allowed 228 | elif num_text_kb_facts < self.min_text_kb_facts_allowed: 229 | continue 230 | 231 | max_num_kb_facts = max(num_kb_facts, max_num_kb_facts) 232 | max_num_text_kb_facts = max(num_text_kb_facts, max_num_text_kb_facts) 233 | 234 | entities = qa.parsed_question['entities'] 235 | for entity in entities: 236 | set_entities.add(entity) 237 | set_entities.add(qa.parsed_question['answers'][0]) 238 | answer_entities.append(qa.parsed_question['answers'][0]) 239 | question_list.append(qa) 240 | return max_length,max_num_kb_facts, max_num_text_kb_facts, question_list, len(set_entities), set_entities, answer_entities 241 | 242 | def augment_to_entity_vocab(self): 243 | print("Augmenting words into entity vocab") 244 | entity_vocab = self.kb.entity_vocab 245 | assert entity_vocab is not None 246 | count_map = defaultdict(int) 247 | with open(self.input_file) as f_in: 248 | for line in tqdm(f_in): 249 | line = line.strip() 250 | qa = QuestionAnswer(line) #parse each line 251 | question_str = qa.parsed_question['question'] 252 | question_entities = qa.parsed_question['entities'] 253 | question_indices = qa.parsed_question['indices'] 254 | words = question_str.split(' ') 255 | count = 0 256 | for index, word in enumerate(words): 257 | if count >= len(question_indices) or\ 258 | index != question_indices[count]: 259 | count_map[word] += 1 260 | else: 261 | count_map[question_entities[count]] += 1 262 | count += 1 263 | sorted_k_v_list = util.sort_keys(count_map) 264 | self.num_words = len(sorted_k_v_list) 265 | for k,_ in sorted_k_v_list: 266 | if k not in entity_vocab: #question entities might already be present in entity_vocab 267 | entity_vocab[k] = len(entity_vocab) 268 | entity_vocab['DUMMY_MEM'] = len(entity_vocab) 269 | 270 | 271 | ########### stuff which would be called once ####################### 272 | 273 | def augment_qa_with_kb_facts(file_name, out_file_name, kb): 274 | out = open(out_file_name, 'w') 275 | with open(file_name) as input_file: 276 | for counter, line in tqdm(enumerate(input_file)): 277 | line = line.strip() 278 | qa = QuestionAnswer(line) 279 | start_indices, lengths = qa.get_supporting_KB_facts(kb) 280 | q_json = json.loads(line) 281 | q_json['start_indices'] = start_indices 282 | q_json['lengths'] = lengths 283 | num_facts = 0 284 | for length in lengths: 285 | num_facts += length 286 | q_json['num_facts'] = num_facts 287 | json_str = json.dumps(q_json) 288 | out.write(json_str + '\n') 289 | 290 | def augment_qa_with_text_kb_facts(file_name, out_file_name, text_kb, is_train_file=False): 291 | out = open(out_file_name, 'w') 292 | with open(file_name) as input_file: 293 | for counter, line in tqdm(enumerate(input_file)): 294 | line = line.strip() 295 | qa = QuestionAnswer(line) 296 | start_indices, lengths = qa.get_supporting_text_kb_facts(text_kb) 297 | q_json = json.loads(line) 298 | q_json['text_kb_start_indices'] = start_indices 299 | q_json['text_kb_lengths'] = lengths 300 | num_facts = 0 301 | for length in lengths: 302 | num_facts += length 303 | q_json['text_kb_num_facts'] = num_facts 304 | 305 | # # since the textkb is made out of sentences in train set 306 | # # hence removing the memory slots which are formed of this particular example. 307 | # if is_train_file: 308 | # q_word_list = qa.parsed_question['question'].split(' ') 309 | # answer_entity = qa.parsed_question['answers'][0] 310 | # #replace blank with answer entity: 311 | # for word_counter, word in enumerate(q_word_list): 312 | # if word == '_blank_': 313 | # q_word_list[word_counter] = answer_entity 314 | # break 315 | # question_entities = qa.parsed_question['entities'] 316 | # question_indices = qa.parsed_question['indices'] 317 | # #replace words with entities 318 | # for counter, index in enumerate(question_indices): 319 | # q_word_list[index] = question_entities[counter] 320 | # text_kb_fact_list = text_kb.facts_list 321 | # black_lists = [] #this will be a list of lists 322 | # for counter, start_index in enumerate(start_indices): 323 | # each_entity_black_list = [] 324 | # fact_length = lengths[counter] 325 | # for mem_counter, mem_index in enumerate(xrange(start_index, start_index+fact_length)): 326 | # mem = text_kb_fact_list[mem_index] 327 | # key = mem['key'] 328 | # val_entity = mem['value'] 329 | # for word_counter, word in enumerate(key): 330 | # if word == '_blank_': 331 | # key[word_counter] = val_entity 332 | # #check if key and q_word_list are equal; if they are black list that memory 333 | # if key == q_word_list: 334 | # each_entity_black_list.append(mem_counter) 335 | # black_lists.append(each_entity_black_list) 336 | # q_json['black_lists'] = black_lists 337 | json_str = json.dumps(q_json) 338 | out.write(json_str + '\n') 339 | 340 | 341 | def break_input_file(input_file, start, end): 342 | # only select inputs which have number of facts between start and end 343 | out_file = input_file + '.{}.{}'.format(start, end) 344 | f_out = open(out_file, 'w') 345 | with open(input_file) as f_in: 346 | for line in tqdm(f_in): 347 | q_json = json.loads(line) 348 | num_facts = q_json['num_facts'] 349 | if start <= num_facts <= end: 350 | f_out.write(line + '\n') 351 | 352 | def extract_appropriate_freebase(train_file, kb_file): 353 | train = Text(train_file) 354 | # all entities in train set 355 | train_entities = train.entity_set 356 | facts = [] 357 | print('Reading kb file at {}'.format(kb_file)) 358 | kb_file_out = open(kb_file+'.small', 'w') 359 | with open(kb_file) as fb: 360 | for line in tqdm(fb): 361 | line = line.strip() 362 | line = line[1:-1] 363 | e1, r1, r2, e2 = [a.strip('"') for a in [x.strip() for x in line.split(',')]] 364 | if e1.strip() in train_entities or e2.strip() in train_entities: 365 | kb_file_out.write(line+'\n') 366 | 367 | def count_num_entities(train_file, dev_file): 368 | num_entities = 0 369 | train = Text(train_file) 370 | dev = Text(dev_file) 371 | num_entities = train.num_entities + dev.num_entities 372 | print(train.num_entities, dev.num_entities, num_entities) 373 | train_entities = train.entity_set 374 | dev_entities = dev.entity_set 375 | print('Number of new elements in dev set {}'.format(len(dev_entities.difference(train_entities)))) 376 | train_answer_entities = train.answer_entities 377 | dev_answer_entities = dev.answer_entities 378 | unseen_count = 0 379 | for entity in dev_answer_entities: 380 | if entity not in train_entities: 381 | unseen_count += 1 382 | print('Number of dev questions which has unseen entities as answer {}'.format(unseen_count)) 383 | 384 | def freebase_sort_wrt_entity1(freebase_file, output_file): 385 | facts_list = defaultdict(list) 386 | print('Reading kb file at {}'.format(freebase_file)) 387 | with open(freebase_file) as fb: 388 | for line in tqdm(fb): 389 | line = line.strip() 390 | line = line[1:-1] 391 | e1, r1, r2, e2 = [a.strip('"') for a in [x.strip() for x in line.split(',')]] 392 | facts_list[e1].append([e1, r1, r2, e2]) 393 | print('Writing to file...') 394 | f_out = open(output_file, 'w') 395 | for e1, facts_of_e1 in tqdm(facts_list.iteritems()): 396 | for facts in facts_of_e1: 397 | f_out.write(str(facts)+'\n') 398 | 399 | def make_text_kb(train_file): 400 | """ 401 | Make textual kb from sentences in training data 402 | :return: 403 | """ 404 | output_file = "/home/rajarshi/canvas/data/SPADES_NEW/text_kb.spades.txt" 405 | if os.path.exists(output_file): 406 | os.remove(output_file) 407 | out = open(output_file, 'a') 408 | with open(train_file) as f_in: 409 | entity_to_sentence = defaultdict(list) 410 | print('Processing the training file...') 411 | for counter, line in tqdm(enumerate(f_in)): 412 | line = line.strip() 413 | qa = QuestionAnswer(line) # parse each line 414 | question_words_list = qa.parsed_question['question'].split(' ') 415 | question_entities = qa.parsed_question['entities'] 416 | question_entity_indices = qa.parsed_question['indices'] 417 | for counter, index in enumerate(question_entity_indices): #replace words with entities 418 | question_words_list[index] = question_entities[counter] 419 | answer_entity = qa.parsed_question['answers'][0] 420 | #replace _blank_ in question_words_list 421 | answer_index = -1 422 | for counter, word in enumerate(question_words_list): 423 | if word == '_blank_': 424 | question_words_list[counter] = answer_entity 425 | answer_index = counter 426 | question_entities.append(answer_entity) #question_entities now contains all entities including answer entity 427 | question_entity_indices.append(answer_index) #question_entity_indices now contains all entity indices 428 | for question_entity in question_entities: 429 | entity_to_sentence[question_entity].append((question_words_list, question_entity_indices)) 430 | print('Processing the entities in the train file...') 431 | for entity, sentence_index_list in tqdm(entity_to_sentence.iteritems()): 432 | text_kb_instance_map = {"entity":entity} 433 | for sentence_index_tuple in sentence_index_list: 434 | sentence, indices = sentence_index_tuple 435 | for index in indices: 436 | if sentence[index] == entity: 437 | continue 438 | text_kb_instance_map['value'] = sentence[index] 439 | sentence[index] = '_blank_' 440 | text_kb_instance_map['key'] = sentence #key is the sentence 441 | text_kb_instance_map['key_length'] = len(sentence) 442 | #write the json into a file 443 | text_kb_instance_json = json.dumps(text_kb_instance_map) 444 | out.write(text_kb_instance_json+'\n') 445 | sentence[index] = text_kb_instance_map['value'] # restoring it back 446 | 447 | 448 | if __name__ == '__main__': 449 | parser = argparse.ArgumentParser() 450 | parser.add_argument('-m', '--make_vocab', default=0, type=int) 451 | parser.add_argument('-v', '--vocab_dir', required=True) 452 | parser.add_argument('-t', '--train_file', required=True) 453 | parser.add_argument('-d', '--dev_file', required=True) 454 | parser.add_argument('--test_file', required=True) 455 | parser.add_argument('-k', '--kb_file', required=True) 456 | parser.add_argument('--text_kb_file', default='', type=str) 457 | parser.add_argument('--extract_relevant_kb', default=0, type=int) 458 | parser.add_argument('--make_text_kb', default=0, type=int) 459 | parser.add_argument('--augment_text_kb_facts', default=0, type=int) 460 | parser.add_argument('--augment_kb_facts', default=0, type=int) 461 | parser.add_argument('--sort_freebase', default=0, type=int) 462 | 463 | args = parser.parse_args() 464 | make_vocab = (args.make_vocab == 1) 465 | extract_relevant_kb = (args.extract_relevant_kb == 1) 466 | train_file = args.train_file 467 | kb_file = args.kb_file 468 | text_kb_file = args.text_kb_file 469 | dev_file = args.dev_file 470 | test_file = args.test_file 471 | vocab_dir = args.vocab_dir 472 | create_text_kb = (args.make_text_kb == 1) 473 | augment_text_kb_facts = (args.augment_text_kb_facts == 1) 474 | augment_kb_facts = (args.augment_kb_facts == 1) 475 | sort_freebase = (args.sort_freebase == 1) 476 | 477 | if make_vocab: 478 | print('Creating entity and relation vocab') 479 | kb = KB(kb_file, create_vocab=True, vocab_dir=vocab_dir) 480 | print('Augmenting entity vocab with question words') 481 | text_qs = Text(train_file, kb=kb) 482 | text_qs.augment_to_entity_vocab() 483 | print('Saving...') 484 | kb.save_vocab(num_words=text_qs.num_words) 485 | elif extract_relevant_kb: 486 | print("Extracting KB triples bases on train set") 487 | extract_appropriate_freebase(train_file, kb_file) 488 | elif create_text_kb: 489 | print('Constructing text kb') 490 | input_file = "/home/rajarshi/canvas/data/SPADES_NEW/text.spades.txt.input" 491 | make_text_kb(input_file) 492 | elif sort_freebase: 493 | print('Sorting freebase wrt entity 1') 494 | freebase_file = "/home/rajarshi/canvas/data/TextKBQA/freebase.spades.txt.orig" 495 | output_file = "/home/rajarshi/canvas/data/TextKBQA/freebase.spades.txt.new" 496 | freebase_sort_wrt_entity1(freebase_file, output_file) 497 | elif augment_kb_facts: 498 | print('Augmenting files with kb facts') 499 | kb = KB(kb_file, vocab_dir=vocab_dir) 500 | print('Augmenting train file') 501 | train_file_out = "/iesl/canvas/rajarshi/data/SPADES/train_with_kb_facts.json" 502 | augment_qa_with_kb_facts(train_file, train_file_out, kb) 503 | print('Augmenting dev file') 504 | dev_file_out = "/iesl/canvas/rajarshi/data/SPADES/dev_with_kb_facts.json" 505 | augment_qa_with_kb_facts(dev_file, dev_file_out, kb) 506 | print('Augmenting test file') 507 | test_file_out = "/iesl/canvas/rajarshi/data/SPADES/test_with_kb_facts.json" 508 | augment_qa_with_kb_facts(test_file, test_file_out, kb) 509 | 510 | elif augment_text_kb_facts: 511 | print('Augmenting files with text kb facts') 512 | text_kb = TextKb(text_kb_file, vocab_dir=vocab_dir) 513 | print('Augmenting train file') 514 | train_file_out = "/iesl/canvas/rajarshi/data/SPADES_NEW/train_with_kb_and_text_facts.json" 515 | augment_qa_with_text_kb_facts(train_file, train_file_out, text_kb, is_train_file=True) 516 | print('Augmenting dev_file') 517 | dev_file_out = "/iesl/canvas/rajarshi/data/SPADES_NEW/dev_with_kb_and_text_facts.json" 518 | augment_qa_with_text_kb_facts(dev_file, dev_file_out, text_kb) 519 | print('Augmenting test_file') 520 | test_file_out = "/iesl/canvas/rajarshi/data/SPADES_NEW/test_with_kb_and_text_facts.json" 521 | augment_qa_with_text_kb_facts(test_file, test_file_out, text_kb) 522 | 523 | print('Done') 524 | 525 | -------------------------------------------------------------------------------- /code/feed_data.py: -------------------------------------------------------------------------------- 1 | from data_utils import KB, Text, TextKb 2 | import numpy as np 3 | from tqdm import tqdm 4 | 5 | 6 | class Batcher(object): 7 | def __init__(self, input_file, kb_file, text_kb_file, batch_size, vocab_dir, return_one_epoch=False, shuffle=True, 8 | min_num_mem_slots=100, 9 | max_num_mem_slots=500, 10 | min_num_text_mem_slots=0, 11 | max_num_text_mem_slots=1000, 12 | use_kb_mem=True, 13 | use_text_mem=False): 14 | self.batch_size = batch_size 15 | self.input_file = input_file 16 | self.kb_file = kb_file 17 | self.text_kb_file = text_kb_file 18 | self.shuffle = shuffle 19 | self.max_num_mem_slots = max_num_mem_slots 20 | self.min_num_mem_slots = min_num_mem_slots 21 | self.max_num_text_mem_slots = max_num_text_mem_slots 22 | self.min_num_text_mem_slots = min_num_text_mem_slots 23 | self.vocab_dir = vocab_dir 24 | self.return_one_epoch = return_one_epoch 25 | self.use_kb_mem = use_kb_mem 26 | self.use_text_mem = use_text_mem 27 | self.questions, self.q_lengths, self.answers, \ 28 | self.kb_memory_slots, self.kb_num_memories, \ 29 | self.text_key_mem, self.text_key_len, \ 30 | self.text_val_mem, self.num_text_mems = self.read_files() 31 | self.max_key_len = None 32 | 33 | if self.use_text_mem and self.use_kb_mem: 34 | assert self.text_key_mem is not None and self.kb_memory_slots is not None 35 | elif self.use_kb_mem: 36 | assert self.text_key_mem is None and self.kb_memory_slots is not None 37 | else: 38 | assert self.text_key_mem is not None and self.kb_memory_slots is None 39 | 40 | self.num_questions = len(self.questions) 41 | print('Num questions {}'.format(self.num_questions)) 42 | self.start_index = 0 43 | if self.shuffle: 44 | self.shuffle_data() 45 | 46 | def get_next_batch(self): 47 | """ 48 | returns the next batch 49 | TODO(rajarshd): move the if-check outside the loop, so that conditioned is not checked every damn time. the conditions are suppose to be immutable. 50 | """ 51 | while True: 52 | if self.start_index >= self.num_questions: 53 | if self.return_one_epoch: 54 | return # stop after returning one epoch 55 | self.start_index = 0 56 | if self.shuffle: 57 | self.shuffle_data() 58 | else: 59 | num_data_returned = min(self.batch_size, self.num_questions - self.start_index) 60 | assert num_data_returned > 0 61 | end_index = self.start_index + num_data_returned 62 | if self.use_kb_mem and self.use_text_mem: 63 | yield self.questions[self.start_index:end_index], self.q_lengths[self.start_index:end_index], \ 64 | self.answers[self.start_index:end_index], self.kb_memory_slots[self.start_index:end_index], \ 65 | self.kb_num_memories[self.start_index:end_index], self.text_key_mem[self.start_index:end_index], \ 66 | self.text_key_len[self.start_index:end_index], self.text_val_mem[self.start_index:end_index], \ 67 | self.num_text_mems[self.start_index:end_index] 68 | elif self.use_kb_mem: 69 | yield self.questions[self.start_index:end_index], self.q_lengths[self.start_index:end_index], \ 70 | self.answers[self.start_index:end_index], self.kb_memory_slots[self.start_index:end_index], \ 71 | self.kb_num_memories[self.start_index:end_index] 72 | else: 73 | yield self.questions[self.start_index:end_index], self.q_lengths[self.start_index:end_index], \ 74 | self.answers[self.start_index:end_index], self.text_key_mem[self.start_index:end_index], \ 75 | self.text_key_len[self.start_index:end_index], self.text_val_mem[self.start_index:end_index], \ 76 | self.num_text_mems[self.start_index:end_index] 77 | self.start_index = end_index 78 | 79 | def shuffle_data(self): 80 | """ 81 | Shuffles maintaining the same order. 82 | """ 83 | perm = np.random.permutation(self.num_questions) # perm of index in range(0, num_questions) 84 | assert len(perm) == self.num_questions 85 | if self.use_kb_mem and self.use_text_mem: 86 | self.questions, self.q_lengths, self.answers, self.kb_memory_slots, self.kb_num_memories, self.text_key_mem,\ 87 | self.text_key_len, self.text_val_mem, self.num_text_mems = \ 88 | self.questions[perm], self.q_lengths[perm], self.answers[perm], self.kb_memory_slots[perm], \ 89 | self.kb_num_memories[perm], self.text_key_mem[perm], self.text_key_len[perm], self.text_val_mem[perm], self.num_text_mems[perm] 90 | elif self.use_kb_mem: 91 | self.questions, self.q_lengths, self.answers, self.kb_memory_slots, self.kb_num_memories = \ 92 | self.questions[perm], self.q_lengths[perm], self.answers[perm], self.kb_memory_slots[perm], \ 93 | self.kb_num_memories[perm] 94 | else: 95 | self.questions, self.q_lengths, self.answers, self.text_key_mem, self.text_key_len, self.text_val_mem,\ 96 | self.num_text_mems = self.questions[perm], self.q_lengths[perm], self.answers[perm], self.text_key_mem[perm],\ 97 | self.text_key_len[perm], self.text_val_mem[perm], self.num_text_mems[perm] 98 | def reset(self): 99 | self.start_index = 0 100 | 101 | def read_files(self): 102 | """reads the kb and text files and creates the numpy arrays after padding""" 103 | # read the KB file 104 | kb = KB(self.kb_file, vocab_dir=self.vocab_dir) if self.use_kb_mem else None 105 | # read text kb file 106 | text_kb = TextKb(self.text_kb_file, vocab_dir=self.vocab_dir) if self.use_text_mem else None 107 | self.max_key_len = text_kb.max_key_length if self.use_text_mem else None 108 | # Question file 109 | questions = Text(self.input_file, 110 | max_num_facts=self.max_num_mem_slots, 111 | min_num_facts=self.min_num_mem_slots, 112 | min_num_text_facts=self.min_num_text_mem_slots, 113 | max_num_text_facts=self.max_num_text_mem_slots) 114 | max_q_length, max_num_kb_facts, max_num_text_kb_facts, question_list = questions.max_q_length, \ 115 | questions.max_num_kb_facts, \ 116 | questions.max_num_text_kb_facts, \ 117 | questions.question_list 118 | entity_vocab = kb.entity_vocab if self.use_kb_mem else text_kb.entity_vocab 119 | relation_vocab = kb.relation_vocab if self.use_kb_mem else text_kb.relation_vocab 120 | num_questions = len(question_list) 121 | question_lengths = np.ones([num_questions]) * -1 122 | questions = np.ones([num_questions, max_q_length]) * entity_vocab['PAD'] 123 | answers = np.ones_like(question_lengths) * entity_vocab['UNK'] 124 | all_kb_memories = None 125 | num_kb_memories = None 126 | text_key_memories = None 127 | text_key_lengths = None 128 | text_val_memories = None 129 | num_text_memories = None 130 | 131 | if self.use_kb_mem: 132 | print('Make data tensors for kb') 133 | all_kb_memories = np.ones([num_questions, max_num_kb_facts, 3]) 134 | all_kb_memories[:, :, 0].fill(entity_vocab['DUMMY_MEM']) 135 | all_kb_memories[:, :, 2].fill(entity_vocab['DUMMY_MEM']) 136 | all_kb_memories[:, :, 1].fill(relation_vocab['DUMMY_MEM']) 137 | num_kb_memories = np.ones_like(question_lengths) * -1 138 | for q_counter, q in enumerate(tqdm(question_list)): 139 | question_str = q.parsed_question['question'] 140 | question_entities = q.parsed_question['entities'] 141 | question_indices = q.parsed_question['indices'] 142 | q_answers = q.parsed_question['answers'] 143 | # num_kb_memories.append(q.parsed_question['num_facts']) 144 | num_kb_memories[q_counter] = q.parsed_question['num_facts'] 145 | q_start_indices = np.asarray(q.parsed_question['start_indices']) 146 | q_fact_lengths = np.asarray( 147 | q.parsed_question['fact_lengths']) # for each entity in question retrieve the fact 148 | sorted_index = np.argsort(q_fact_lengths) 149 | q_fact_lengths = q_fact_lengths[sorted_index] 150 | q_start_indices = q_start_indices[sorted_index] 151 | question_words_list = question_str.split(' ') 152 | for counter, index in enumerate(question_indices): # replace the entities with their ids 153 | question_words_list[index] = question_entities[counter] 154 | question_int = [entity_vocab[w_q] if w_q.strip() in entity_vocab else entity_vocab['UNK'] for w_q in 155 | question_words_list] 156 | question_len = len(question_int) 157 | questions[q_counter, 0:question_len] = question_int 158 | question_lengths[q_counter] = question_len 159 | answer_int = [entity_vocab[a] if a in entity_vocab else entity_vocab['UNK'] for a in q_answers] 160 | answers[q_counter] = answer_int[0] 161 | 162 | # memories 163 | kb_facts = kb.facts 164 | mem_counter = 0 165 | for counter, start_index in enumerate(q_start_indices): 166 | num_facts = q_fact_lengths[counter] 167 | if mem_counter < self.max_num_mem_slots: 168 | for mem_index in xrange(start_index, start_index + num_facts): 169 | mem = kb_facts[mem_index] 170 | e1_int = entity_vocab[mem['e1']] if mem['e1'] in entity_vocab else entity_vocab['UNK'] 171 | e2_int = entity_vocab[mem['e2']] if mem['e2'] in entity_vocab else entity_vocab['UNK'] 172 | r_int = relation_vocab[mem['r']] if mem['r'] in relation_vocab else relation_vocab['UNK'] 173 | all_kb_memories[q_counter][mem_counter][0] = e1_int 174 | all_kb_memories[q_counter][mem_counter][1] = r_int 175 | all_kb_memories[q_counter][mem_counter][2] = e2_int 176 | mem_counter += 1 177 | if mem_counter == self.max_num_mem_slots: # will use the first max_num_mem_slots slots 178 | break 179 | if self.use_text_mem: 180 | 181 | print('Make data tensors for text kb') 182 | max_key_len = text_kb.max_key_length 183 | text_key_memories = np.ones([num_questions, max_num_text_kb_facts, max_key_len]) * entity_vocab['DUMMY_MEM'] 184 | text_key_lengths = np.zeros([num_questions, max_num_text_kb_facts]) 185 | text_val_memories = np.ones([num_questions, max_num_text_kb_facts]) * entity_vocab['DUMMY_MEM'] 186 | num_text_memories = np.ones_like(question_lengths) * -1 187 | for q_counter, q in enumerate(tqdm(question_list)): 188 | # TODO (rajarshd): Move the repeated piece of code in a method. 189 | question_str = q.parsed_question['question'] 190 | question_entities = q.parsed_question['entities'] 191 | question_indices = q.parsed_question['indices'] 192 | q_answers = q.parsed_question['answers'] 193 | question_words_list = question_str.split(' ') 194 | for counter, index in enumerate(question_indices): # replace the entities with their ids 195 | question_words_list[index] = question_entities[counter] 196 | question_int = [entity_vocab[w_q] if w_q.strip() in entity_vocab else entity_vocab['UNK'] for w_q in 197 | question_words_list] 198 | question_len = len(question_int) 199 | questions[q_counter, 0:question_len] = question_int 200 | question_lengths[q_counter] = question_len 201 | answer_int = [entity_vocab[a] if a in entity_vocab else entity_vocab['UNK'] for a in q_answers] 202 | answers[q_counter] = answer_int[0] 203 | 204 | # memories 205 | num_q_text_memories = q.parsed_question['text_kb_num_facts'] 206 | # in the training set, account for the discarded memories 207 | if 'black_lists' in q.parsed_question: 208 | num_discarded = 0 209 | for black_list in q.parsed_question['black_lists']: 210 | num_discarded += len(black_list) 211 | num_q_text_memories -= num_discarded 212 | num_text_memories[q_counter] = num_q_text_memories 213 | q_start_indices = np.asarray(q.parsed_question['text_kb_start_indices']) 214 | q_fact_lengths = np.asarray( 215 | q.parsed_question['text_kb_lengths']) # for each entity in question retrieve the fact 216 | q_black_lists = np.asarray( 217 | q.parsed_question['black_lists']) if 'black_lists' in q.parsed_question else None 218 | sorted_index = np.argsort(q_fact_lengths) 219 | q_fact_lengths = q_fact_lengths[sorted_index] 220 | q_start_indices = q_start_indices[sorted_index] 221 | q_black_lists = q_black_lists[sorted_index] if q_black_lists is not None else None 222 | text_kb_facts = text_kb.facts_list 223 | mem_counter = 0 224 | for counter, start_index in enumerate(q_start_indices): 225 | num_facts = q_fact_lengths[counter] 226 | black_list_entity = set(q_black_lists[counter]) if q_black_lists is not None else None 227 | if mem_counter < self.max_num_text_mem_slots: 228 | for mem_entity_counter, mem_index in enumerate(xrange(start_index, start_index + num_facts)): 229 | if black_list_entity is not None and mem_entity_counter in black_list_entity: 230 | continue 231 | mem = text_kb_facts[mem_index] 232 | key = mem['key'] 233 | key_int = [entity_vocab[k] if k in entity_vocab else entity_vocab['UNK'] for k in key] 234 | val = mem['value'] 235 | val_int = entity_vocab[val] if val in entity_vocab else entity_vocab['UNK'] 236 | key_len = int(mem['key_length']) 237 | text_key_memories[q_counter][mem_counter][0:key_len] = key_int 238 | text_val_memories[q_counter][mem_counter] = val_int 239 | text_key_lengths[q_counter][mem_counter] = key_len 240 | mem_counter += 1 241 | if mem_counter == self.max_num_text_mem_slots: # will use the first max_num_mem_slots slots 242 | break 243 | 244 | return questions, question_lengths, answers, all_kb_memories, num_kb_memories, \ 245 | text_key_memories, text_key_lengths, text_val_memories, num_text_memories 246 | -------------------------------------------------------------------------------- /code/get_stats.py: -------------------------------------------------------------------------------- 1 | import json 2 | import util 3 | from collections import defaultdict 4 | 5 | 6 | def get_fb_stats(freebase_data_file): 7 | with open(freebase_data_file) as fb: 8 | fact_counter = 0 9 | relation_set = set() 10 | entity_set = set() 11 | for line in fb: 12 | line = line.strip() 13 | line = line[1:-1] 14 | e1, r1, r2, e2 = [a.strip('"') for a in [x.strip() for x in line.split(',')]] 15 | r = r1 + '_' + r2 16 | fact_counter += 1 17 | relation_set.add(r) 18 | entity_set.add(e1) 19 | entity_set.add(e2) 20 | 21 | print("Total num of facts {}".format(fact_counter)) 22 | print("Num unique entities {}".format(len(entity_set))) 23 | print("Num unique relations {}".format(len(relation_set))) 24 | 25 | 26 | def get_questions_stats(train_data_file, dev_data_file): 27 | print('1. Getting the number of blanks') 28 | 29 | blank_str = '_blank_' 30 | num_blanks_map = defaultdict(int) 31 | word_freq_train = defaultdict(int) 32 | with open(train_data_file) as train_file: 33 | for counter, line in enumerate(util.verboserate(train_file)): 34 | line = line.strip() 35 | q_json = json.loads(line) 36 | q = q_json['sentence'] 37 | count = q.count(blank_str) 38 | num_blanks_map[count] += 1 39 | words = q.split(' ') 40 | for word in words: 41 | word = word.strip() 42 | word_freq_train[word] += 1 43 | a_list = q_json['answerSubset'] 44 | for a in a_list: 45 | word_freq_train[a] = word_freq_train[word] + 1 46 | 47 | print(num_blanks_map) 48 | 49 | print '2. Number of word types in the train set {}'.format(len(word_freq_train)) 50 | 51 | print '3. Checking overlap with the dev answers' 52 | dev_answers_present = set() 53 | dev_answers_oov = set() 54 | dev_answers = set() 55 | with open(dev_data_file) as dev_file: 56 | for line in dev_file: 57 | line = line.strip() 58 | dev_json = json.loads(line) 59 | a_list = dev_json['answerSubset'] 60 | for a in a_list: 61 | if a in word_freq_train: 62 | dev_answers_present.add(a) 63 | else: 64 | dev_answers_oov.add(a) 65 | dev_answers.add(a) 66 | 67 | print 'Number of unique dev answer strings {}'.format(len(dev_answers)) 68 | 69 | print 'Number of oov answer strings in dev set {}'.format(len(dev_answers_oov)) 70 | 71 | print 'Number of dev answer strings which have atleast 1 occurrences in train set {}'.format( 72 | len(dev_answers_present)) 73 | 74 | 75 | freebase_data_file = "/home/rajarshi/research/graph-parser/data/spades/freebase.spades.txt" 76 | train_data_file = "/home/rajarshi/research/graph-parser/data/spades/train.json" 77 | dev_data_file = "/home/rajarshi/research/graph-parser/data/spades/dev.json" 78 | # get_fb_stats() 79 | get_questions_stats(train_data_file, dev_data_file) 80 | -------------------------------------------------------------------------------- /code/qual_eval.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | from tqdm import tqdm 4 | import json 5 | 6 | 7 | class QualAnalysis(object): 8 | def __init__(self): 9 | 10 | self.kb_facts = self.read_kb_facts(kb_file) if use_kb else None 11 | self.text_kb_facts = self.read_text_kb_facts(text_kb_file) if use_text else None 12 | self.questions = self.read_questions(input_test_file) 13 | # print('Reading mid to word map') 14 | # self.mid_to_word_map = self.mid_to_word() 15 | 16 | def read_kb_facts(self, input_file): 17 | facts = [] 18 | print('Reading kb file at {}'.format(input_file)) 19 | with open(input_file) as fb: 20 | for line in tqdm(fb): 21 | line = line.strip() 22 | line = line[1:-1] 23 | e1, r1, r2, e2 = [a.strip('"') for a in [x.strip() for x in line.split(',')]] 24 | r = r1 + '_' + r2 25 | facts.append({'e1': e1, 'r': r, 'e2': e2}) 26 | return facts 27 | 28 | def read_text_kb_facts(self, input_file): 29 | facts = [] 30 | print('Reading text kb file at {}'.format(input_file)) 31 | with open(input_file) as fin: 32 | for counter, line in tqdm(enumerate(fin)): 33 | kb_instance = json.loads(line) 34 | facts.append(kb_instance) 35 | return facts 36 | 37 | def read_questions(self, input_file): 38 | questions = [] 39 | print('Reading file at {}'.format(input_file)) 40 | with open(input_file) as f_in: 41 | for counter, line in tqdm(enumerate(f_in)): 42 | question = json.loads(line) 43 | questions.append(question) 44 | return questions 45 | 46 | def get_relevant_memory(self, question_index, mem_index, use_kb=True): 47 | """ 48 | get the relevant memory either kb or text. Note one of use_kb and use_text can be true 49 | at a given time, if both are true this needs to be called twice with each value of use_kb (true, false) 50 | and the returned value needs to be handled appropriately. 51 | :param question_index: 52 | :param mem_index: 53 | :param use_kb: 54 | :return: 55 | """ 56 | question = self.questions[question_index] 57 | mem_index += 1 # convert from 0 index 58 | start_index_key = 'start_indices' if use_kb else 'text_kb_start_indices' 59 | length_key = 'lengths' if use_kb else 'text_kb_lengths' 60 | memory = self.kb_facts if use_kb else self.text_kb_facts 61 | start_indices = question[start_index_key] 62 | lengths = question[length_key] 63 | q_start_indices = np.asarray(start_indices) 64 | q_fact_lengths = np.asarray(lengths) 65 | sorted_index = np.argsort(q_fact_lengths) 66 | q_fact_lengths = q_fact_lengths[sorted_index] 67 | q_start_indices = q_start_indices[sorted_index] 68 | cum_num_mem_slots = 0 69 | counter = 0 70 | for fact_len in q_fact_lengths: 71 | if cum_num_mem_slots + fact_len >= mem_index: # the mem lies in the next partition 72 | # calculate the off set 73 | offset = mem_index - cum_num_mem_slots - 1 # -1 because converting to zero index 74 | return memory[q_start_indices[counter] + offset] 75 | else: 76 | cum_num_mem_slots += fact_len 77 | counter += 1 78 | 79 | def read_attn_wts_file(self, input_file, input_predicted_answer_file): 80 | 81 | f_out = open(output_dir+'/attn_memory.txt','a') 82 | f_out_correct = open(output_dir + '/attn_memory.txt.correct', 'a') 83 | print('Loading the attn wights...') 84 | attn_wts = np.load(input_file) 85 | print('Loading predicted answer file') 86 | num_questions = len(self.questions) 87 | answers = np.fromfile(input_predicted_answer_file) 88 | answers = answers.reshape(num_questions, -1) 89 | assert attn_wts.ndim == 2 90 | num_data, max_mem_slots = attn_wts.shape 91 | # get the index 92 | print('Sorting....') 93 | sorted_index = np.argsort(attn_wts, axis=1) 94 | sorted_wts = np.sort(attn_wts, axis=1) 95 | print('done...') 96 | # get the slice we are interested in 97 | start_index = max_mem_slots - topk 98 | sorted_index = sorted_index[:, start_index:] 99 | sorted_wts = sorted_wts[:, start_index:] 100 | for data_index in range(num_data): # refactor the double loop 101 | sentence = self.questions[data_index]['sentence'] 102 | split_sentence = sentence.split(' ') 103 | entities = self.questions[data_index]['entities'] 104 | for entity in entities: 105 | split_sentence[entity['index']] = entity['entity'] 106 | sentence_with_entities = ' '.join(split_sentence) 107 | is_correct = (answers[data_index][1] == answers[data_index][0]) 108 | f_out.write('Sentence: {}\n'.format(sentence)) 109 | f_out.write('Sentence with entities: {}\n'.format(sentence_with_entities)) 110 | f_out.write('Correct Answer: {}\n'.format(rev_entity_vocab[int(answers[data_index][1])])) 111 | f_out.write('Predicted Answer: {}\n'.format(rev_entity_vocab[int(answers[data_index][0])])) 112 | f_out.write('Memories\n') 113 | if is_correct: 114 | f_out_correct.write('Sentence: {}\n'.format(sentence)) 115 | f_out_correct.write('Sentence with entities: {}\n'.format(sentence_with_entities)) 116 | f_out_correct.write('Correct Answer: {}\n'.format(rev_entity_vocab[int(answers[data_index][1])])) 117 | f_out_correct.write('Predicted Answer: {}\n'.format(rev_entity_vocab[int(answers[data_index][0])])) 118 | f_out_correct.write('Memories\n') 119 | for index in reversed(range(topk)): 120 | mem_index = self.get_relevant_memory(data_index, sorted_index[data_index][index], use_kb=use_kb) 121 | f_out.write('Attn wt: {0:10.4f}\n'.format(sorted_wts[data_index][index])) 122 | f_out.write('Memory: {}\n'.format(mem_index)) 123 | if is_correct: 124 | f_out_correct.write('Attn wt: {0:10.4f}\n'.format(sorted_wts[data_index][index])) 125 | f_out_correct.write('Memory: {}\n'.format(mem_index)) 126 | if mem_index is not None: 127 | mem_index['value'] = self.mid_to_word_map[mem_index['value']] if mem_index['value'] in self.mid_to_word_map else mem_index['value'] 128 | key = [self.mid_to_word_map[word] if word in self.mid_to_word_map else word for word in mem_index['key']] 129 | mem_index['key'] = key 130 | mem_index['entity'] = self.mid_to_word_map[mem_index['entity']] if mem_index['entity'] in self.mid_to_word_map else mem_index['entity'] 131 | f_out_correct.write('Memory in words: {}\n'.format(mem_index)) 132 | f_out.write('Memory in words: {}\n'.format(mem_index)) 133 | 134 | f_out.write("=============\n") 135 | if is_correct: 136 | f_out_correct.write("=============\n") 137 | 138 | def get_siva_output(self, input_predicted_answer_file): 139 | 140 | num_questions = len(self.questions) 141 | outputs = np.fromfile(input_predicted_answer_file) # manzil had changed the output structure storing (sentence, prediction). Also he changed np.save to np.tofile so reading would change 142 | # outputs = np.load(input_predicted_answer_file) 143 | outputs = outputs.reshape(num_questions, -1) 144 | num_questions, sequence_length = outputs.shape 145 | predicted_answers = outputs[:,sequence_length-1] #last column 146 | # predicted_answers = outputs[:, 0] # last column 147 | correct_counter = 0 148 | for counter, question in enumerate(self.questions): 149 | print(question['sentence']+'\t'+ '[\"'+question['answerSubset'][0]+'\"]'+'\t'+'[\"'+rev_entity_vocab[predicted_answers[counter]]+'\"]') 150 | if question['answerSubset'][0] == rev_entity_vocab[predicted_answers[counter]]: 151 | correct_counter += 1 152 | print('Accuracy: {}'.format(correct_counter*1.0/num_questions)) 153 | # print(question['answerSubset']) 154 | # print(rev_entity_vocab[predicted_answers[counter]]) 155 | 156 | 157 | 158 | def __call__(self, *args, **kwargs): 159 | 160 | self.read_attn_wts_file(input_attn_file, input_predicted_answer_file) 161 | # self.get_siva_output(input_predicted_answer_file) 162 | print('Done') 163 | 164 | def mid_to_word(self): 165 | word_to_mid = {} 166 | mid_to_word = {} 167 | with open('/iesl/canvas/pat/data/freebase/freebase_names', 'r') as f: 168 | for line in tqdm(f): 169 | mid, word, _ = line.split('\t') 170 | word_to_mid[word] = 'm.' + mid[2:] 171 | mid_to_word['m.' + mid[2:]] = word 172 | return mid_to_word 173 | 174 | 175 | 176 | if __name__ == '__main__': 177 | parser = argparse.ArgumentParser() 178 | parser.add_argument("--use_kb", default=1, type=int) 179 | parser.add_argument("--use_text", default=0, type=int) 180 | parser.add_argument("--kb_file", required=True) 181 | parser.add_argument("--text_kb_file", required=True) 182 | parser.add_argument("--attn_file", required=True) 183 | parser.add_argument("--answer_file", required=True) 184 | parser.add_argument("--input_test_file", required=True) 185 | parser.add_argument("--k", default=5, type=int) 186 | parser.add_argument("--output_dir", required=True) 187 | 188 | args = parser.parse_args() 189 | kb_file = args.kb_file 190 | text_kb_file = args.text_kb_file 191 | use_kb = (args.use_kb == 1) 192 | use_text = (args.use_text == 1) 193 | input_attn_file = args.attn_file 194 | input_predicted_answer_file = args.answer_file 195 | input_test_file = args.input_test_file 196 | topk = args.k 197 | output_dir = args.output_dir 198 | vocab_dir = "/home/rajarshi/research/joint-text-and-kb-inference-semantic-parsing/vocab" 199 | print('Reading entity vocab') 200 | entity_vocab = json.load(open(vocab_dir + '/entity_vocab.json')) 201 | rev_entity_vocab = {} 202 | for k,v in entity_vocab.iteritems(): 203 | rev_entity_vocab[v] = k 204 | qual_analysis = QualAnalysis() 205 | qual_analysis() -------------------------------------------------------------------------------- /code/scratch/check_nick_coverage.py: -------------------------------------------------------------------------------- 1 | import json 2 | vocab_file="/iesl/canvas/nmonath/data/wikipedia/20160305/en/enwiki-20160305-lstm-vocab.tsv" 3 | entity_to_freebase_mapping="/iesl/canvas/nmonath/data/freebase/20160513/en-freebase_wiki_cat_title_map.txt" 4 | 5 | 6 | #Read the entity_to_freebase_mapping file 7 | print('Read the entity_to_freebase_mapping file') 8 | name_to_mid = {} 9 | with open(entity_to_freebase_mapping) as input_file: 10 | for line in input_file: 11 | line = line.strip() 12 | name, mid = line.split('\t') 13 | name_to_mid[name] = mid 14 | 15 | #read the vocab file 16 | # print('read nick\'s vocab file') 17 | # vocab_set = set() 18 | # with open(vocab_file) as vocab_in: 19 | # for line in vocab_in: 20 | # line = line.strip() 21 | # _, name = line.split(' ') 22 | # if name in name_to_mid: 23 | # vocab_set.add(name_to_mid[name].replace('/', '.')) 24 | # else: 25 | # vocab_set.add(name) 26 | 27 | #read the embedding file and read only the words (1st column) 28 | embeddings_file_from_nick = "/iesl/canvas/nmonath/data/wikipedia/20160305/en/embeddings/aabt/context/target.tsv" 29 | vocab_set = set() 30 | with open(embeddings_file_from_nick) as embedding_file: 31 | for line in embedding_file: 32 | split = line.split('\t') 33 | name = split[0] 34 | if name in name_to_mid: 35 | vocab_set.add(name_to_mid[name].replace('/', '.')) 36 | else: 37 | name = name.replace('W_SLUG_', '') 38 | name = name.replace('_lang_EN', '') 39 | vocab_set.add(name.lower()) 40 | 41 | 42 | #read entity vocab and check coverage 43 | print('Reading entity vocab') 44 | entity_vocab_file = "/home/rajarshi/research/joint-text-and-kb-inference-semantic-parsing/vocab/entity_vocab.json" 45 | entity_vocab = {} 46 | with open(entity_vocab_file) as entity_vocab_in: 47 | entity_vocab = json.load(entity_vocab_in) 48 | 49 | counter = 0 50 | mid_counter = 0 51 | for k, _ in entity_vocab.iteritems(): 52 | k = k.lower() 53 | if k in vocab_set: 54 | if k.startswith('m.'): 55 | mid_counter += 1 56 | counter += 1 57 | 58 | print('Total overlap is {}'.format(counter)) 59 | print('Overlap of entities is {}'.format(mid_counter)) -------------------------------------------------------------------------------- /code/scratch/check_siva_clueweb_coverage.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import defaultdict 3 | 4 | def get_coverage(input_file): 5 | entities_in_file = set() 6 | with open(input_file) as f_in: 7 | for line in f_in: 8 | line = line.strip() 9 | json_line = json.loads(line) 10 | if 'entities' not in json_line: 11 | continue 12 | entity_list = json_line['entities'] 13 | for entity in entity_list: 14 | if 'entity' not in entity: 15 | continue 16 | entities_in_file.add(entity['entity']) 17 | 18 | print('Total number of entities on file are {}'.format(len(entities_in_file))) 19 | hit = 0 20 | total = 0 21 | for entity in entities_in_file: 22 | if entity in entity_count_map: 23 | hit += 1 24 | total += 1 25 | 26 | print('Total {}, hit {}'.format(total, hit)) 27 | 28 | 29 | print('Gathering entities in clueweb file...') 30 | dir_name = "/iesl/local/rajarshi/clueweb_siva/" 31 | clueweb_files = ['spadesClueWeb09_1.1', 'spadesClueWeb09_1.2', 'spadesClueWeb09_1.3', 'spadesClueWeb09_1.wiki'] 32 | 33 | entity_count_map = defaultdict(int) 34 | for file_name in clueweb_files: 35 | file_path = dir_name + file_name 36 | print('Reading file {}'.format(file_path)) 37 | with open(file_path) as f_in: 38 | for line in f_in: 39 | line = line.strip() 40 | json_line = json.loads(line) 41 | if 'entities' not in json_line: 42 | continue 43 | entity_list = json_line['entities'] 44 | for entity in entity_list: 45 | if 'entity' not in entity: 46 | continue 47 | entity_count_map[entity['entity']] += 1 48 | 49 | print('Total Num entities {}'.format(len(entity_count_map))) 50 | 51 | project_dir = '/home/rajarshi/canvas/data/TextKBQA/' 52 | train_file_name = 'train_with_facts.json' 53 | dev_file_name = 'dev_with_facts.json' 54 | test_file_name = 'test.json' 55 | print('Checking coverage in train file') 56 | get_coverage(project_dir + train_file_name) 57 | 58 | print('Checking coverage in dev file') 59 | get_coverage(project_dir + dev_file_name) 60 | 61 | print('Checking coverage in test file') 62 | get_coverage(project_dir + test_file_name) 63 | -------------------------------------------------------------------------------- /code/scratch/concat_all_clueweb.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | def write_to_file(sentence_list, fout): 5 | for sentence in sentence_list: 6 | fout.write(sentence+'\n') 7 | 8 | dir_name = "/iesl/local/rajarshi/clueweb_siva/" 9 | clueweb_files = ['spadesClueWeb09_1.1', 'spadesClueWeb09_1.2', 10 | 'spadesClueWeb09_1.3', 'spadesClueWeb09_1.wiki'] 11 | LIST_MAX = 10000 12 | counter = 0 13 | output_file = dir_name+'all_sentences' 14 | if os.path.exists(output_file): 15 | os.remove(output_file) 16 | 17 | fout = open(output_file, 'a') 18 | for file_name in clueweb_files: 19 | file_path = dir_name + file_name 20 | print('Reading file {}'.format(file_path)) 21 | sentence_list = [] 22 | with open(file_path) as f_in: 23 | for line in f_in: 24 | line = line.strip() 25 | json_line = json.loads(line) 26 | entities = json_line['entities'] 27 | indices = [entity['index'] for entity in entities] 28 | mids = [entity['entity'] for entity in entities] 29 | words = json_line['words'] 30 | s_words = [word['word'] for word in words] 31 | for c, index in enumerate(indices): 32 | s_words[index] = mids[c] 33 | sentence = ' '.join(s_words) 34 | sentence = sentence.strip() 35 | sentence_list.append(sentence) 36 | if len(sentence_list) > LIST_MAX: 37 | write_to_file(sentence_list, fout) 38 | sentence_list = [] 39 | counter = counter+1 40 | print('Wrote {} lines'.format(LIST_MAX*counter)) -------------------------------------------------------------------------------- /code/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from feed_data import Batcher 3 | from KBQA import KBQA, TextQA, TextKBQA 4 | import tensorflow as tf 5 | import argparse 6 | import time 7 | import numpy as np 8 | import cPickle as pickle 9 | from tqdm import tqdm 10 | import pdb 11 | 12 | 13 | class Trainer(object): 14 | def __init__(self): 15 | with tf.Session() as sess: 16 | print('Blake hack for acquiring gpu') 17 | 18 | # pretraining 19 | entity_lookup_table = None 20 | if load_pretrained_vectors: 21 | print('Loading pretrained word embeddings...') 22 | with open(pretrained_vector_path, 'rb') as f: 23 | entity_lookup_table = pickle.load(f) 24 | if verbose: 25 | print("Loaded pretrained vectors of size: ", entity_lookup_table.shape) 26 | print("Entity vocab size: ", entity_vocab_size) 27 | 28 | # data 29 | self.batcher = Batcher(train_file, kb_file, text_kb_file, batch_size, vocab_dir, 30 | min_num_mem_slots=min_facts, max_num_mem_slots=max_facts, use_kb_mem=use_kb, 31 | use_text_mem=use_text, max_num_text_mem_slots=max_text_facts, 32 | min_num_text_mem_slots=min_facts) 33 | 34 | self.dev_batcher = Batcher(dev_file, kb_file, text_kb_file, dev_batch_size, vocab_dir, 35 | min_num_mem_slots=min_facts, max_num_mem_slots=dev_max_facts, 36 | return_one_epoch=True, shuffle=False, use_kb_mem=use_kb, use_text_mem=use_text, 37 | max_num_text_mem_slots=dev_max_text_facts, min_num_text_mem_slots=min_facts) 38 | 39 | # define network 40 | if use_kb and use_text: 41 | self.model = TextKBQA(entity_vocab_size=entity_vocab_size, relation_vocab_size=relation_vocab_size, 42 | embedding_size=embedding_size, hops=hops, load_pretrained_model=load_model, 43 | load_pretrained_vectors=load_pretrained_vectors, join=combine_text_kb_answer, 44 | pretrained_entity_vectors=entity_lookup_table, verbose=verbose, 45 | separate_key_lstm=separate_key_lstm) 46 | elif use_kb: 47 | self.model = KBQA(entity_vocab_size=entity_vocab_size, relation_vocab_size=relation_vocab_size, 48 | embedding_size=embedding_size, hops=hops, load_pretrained_model=load_model, 49 | load_pretrained_vectors=load_pretrained_vectors, 50 | pretrained_entity_vectors=entity_lookup_table, verbose=verbose) 51 | elif use_text: 52 | self.model = TextQA(entity_vocab_size=entity_vocab_size, embedding_size=embedding_size, hops=hops, load_pretrained_model=load_model, 53 | load_pretrained_vectors=load_pretrained_vectors, 54 | pretrained_entity_vectors=entity_lookup_table, verbose=verbose, 55 | separate_key_lstm=separate_key_lstm) 56 | 57 | # optimizer 58 | self.optimizer = tf.train.AdamOptimizer(lr) 59 | 60 | self.max_dev_acc = -1.0 61 | 62 | def bp(self, cost): 63 | tvars = tf.trainable_variables() 64 | grads = tf.gradients(cost, tvars) 65 | grads, _ = tf.clip_by_global_norm(grads, grad_clip_norm) 66 | train_op = self.optimizer.apply_gradients(zip(grads, tvars)) 67 | return train_op 68 | 69 | def initialize(self): 70 | #### inputs #### 71 | self.question = tf.placeholder(tf.int32, [None, None], name="question") 72 | self.question_lengths = tf.placeholder(tf.int32, [None], name="question_lengths") 73 | self.answer = tf.placeholder(tf.int32, [None], name="answer") 74 | if use_kb and use_text: 75 | self.memory = tf.placeholder(tf.int32, [None, None, 3], name="memory") 76 | self.text_key_mem = tf.placeholder(tf.int32, [None, None, None], name="key_mem") 77 | self.text_key_len = tf.placeholder(tf.int32, [None, None], name="key_len") 78 | self.text_val_mem = tf.placeholder(tf.int32, [None, None], name="val_mem") 79 | # network output 80 | self.output = self.model(self.memory, self.text_key_mem, self.text_key_len, self.text_val_mem, 81 | self.question, self.question_lengths) 82 | elif use_kb: 83 | self.memory = tf.placeholder(tf.int32, [None, None, 3], name="memory") 84 | # network output 85 | self.output = self.model(self.memory, self.question, self.question_lengths) 86 | elif use_text: 87 | self.text_key_mem = tf.placeholder(tf.int32, [None, None, None], name="key_mem") 88 | self.text_key_len = tf.placeholder(tf.int32, [None, None], name="key_len") 89 | self.text_val_mem = tf.placeholder(tf.int32, [None, None], name="val_mem") 90 | # network output 91 | self.output = self.model(self.text_key_mem, self.text_key_len, self.text_val_mem, self.question, 92 | self.question_lengths) 93 | 94 | # predict 95 | self.probs = tf.nn.softmax(self.output) 96 | self.predict_op = tf.argmax(self.output, 1, name="predict_op") 97 | self.rank_op = tf.nn.top_k(self.output, 50) 98 | 99 | # loss 100 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(self.output, self.answer) 101 | self.loss = tf.reduce_mean(cross_entropy, name="loss_op") 102 | 103 | if use_kb and use_text: 104 | # Graph created now load/save op for it 105 | # load the parameters for the kb only model 106 | #var_list = [v for v in tf.trainable_variables() if v.name.startswith('BiRNN/')] 107 | #var_list += [self.model.entity_lookup_table, self.model.relation_lookup_table, self.model.W, self.model.b, 108 | # self.model.W1, self.model.b1, self.model.R[0]] 109 | #self.saver = tf.train.Saver(var_list=var_list) 110 | self.saver = tf.train.Saver() 111 | else: 112 | self.saver = tf.train.Saver() 113 | 114 | # Add to the Graph the Ops that calculate and apply gradients. 115 | self.train_op = self.bp(self.loss) 116 | 117 | # return the variable initializer Op. 118 | init_op = tf.initialize_all_variables() 119 | 120 | return init_op 121 | 122 | def dev_eval(self, sess): 123 | print('Evaluating on dev set...') 124 | dev_start_time = time.time() 125 | num_dev_data = 0 126 | dev_loss = 0.0 127 | dev_acc = 0.0 128 | 129 | attn_weight = None 130 | preds = [] 131 | SRR = 0.0 132 | for data in tqdm(self.dev_batcher.get_next_batch()): 133 | 134 | if use_kb and use_text: 135 | dev_batch_question, dev_batch_q_lengths, dev_batch_answer, dev_batch_memory, dev_batch_num_memories, \ 136 | dev_batch_text_key_mem, dev_batch_text_key_len, dev_batch_text_val_mem, dev_batch_num_text_mems = data 137 | feed_dict_dev = {self.question: dev_batch_question, 138 | self.question_lengths: dev_batch_q_lengths, 139 | self.answer: dev_batch_answer, 140 | self.memory: dev_batch_memory, 141 | self.text_key_mem: dev_batch_text_key_mem, 142 | self.text_key_len: dev_batch_text_key_len, 143 | self.text_val_mem: dev_batch_text_val_mem} 144 | elif use_kb: 145 | dev_batch_question, dev_batch_q_lengths, dev_batch_answer, dev_batch_memory, dev_batch_num_memories = data 146 | feed_dict_dev = {self.question: dev_batch_question, 147 | self.question_lengths: dev_batch_q_lengths, 148 | self.answer: dev_batch_answer, 149 | self.memory: dev_batch_memory} 150 | elif use_text: 151 | dev_batch_question, dev_batch_q_lengths, dev_batch_answer, dev_batch_text_key_mem, dev_batch_text_key_len, \ 152 | dev_batch_text_val_mem, dev_batch_num_text_mems = data 153 | feed_dict_dev = {self.question: dev_batch_question, 154 | self.question_lengths: dev_batch_q_lengths, 155 | self.answer: dev_batch_answer, 156 | self.text_key_mem: dev_batch_text_key_mem, 157 | self.text_key_len: dev_batch_text_key_len, 158 | self.text_val_mem: dev_batch_text_val_mem} 159 | 160 | dev_batch_loss_value, dev_prediction, batch_attn_weight, topk = sess.run( 161 | [self.loss, self.predict_op, self.model.attn_weights_all_hops, self.rank_op], 162 | feed_dict=feed_dict_dev) 163 | 164 | for j,v in enumerate(topk.indices): 165 | for i,w in enumerate(v): 166 | if w == dev_batch_answer[j]: 167 | SRR += 1.0/(i+1) 168 | 169 | dev_loss += dev_batch_loss_value 170 | num_dev_data += dev_batch_question.shape[0] 171 | dev_acc += np.sum(dev_prediction == dev_batch_answer) 172 | attn_weight = batch_attn_weight[0] if attn_weight is None \ 173 | else np.vstack((attn_weight, batch_attn_weight[0])) 174 | # store predictions 175 | dev_prediction = np.expand_dims(dev_prediction, axis=1) 176 | dev_batch_answer = np.expand_dims(dev_batch_answer, axis=1) 177 | if dev_prediction is not None: 178 | concat = np.concatenate((dev_prediction, dev_batch_answer), axis=1) 179 | preds.append(concat) 180 | 181 | print('MRR: ', SRR/num_dev_data) 182 | dev_acc = (1.0 * dev_acc / num_dev_data) 183 | dev_loss = (1.0 * dev_loss / num_dev_data) 184 | if print_attention_weights: 185 | f_out = open(output_dir + "/attn_wts.npy", 'w') 186 | np.save(f_out, attn_weight) 187 | print('Wrote attention weights...') 188 | 189 | self.dev_batcher.reset() 190 | if dev_acc >= 0.3 or mode == 'test': 191 | f_out = open(output_dir + "/out_txt." + str(dev_acc), 'w') 192 | print('Writing to {}'.format("out_txt." + str(dev_acc))) 193 | 194 | preds = np.vstack(preds) 195 | preds.tofile(f_out) 196 | if mode == 'test': 197 | f_out1 = open(output_dir + "/out.txt", 'w') 198 | preds.tofile(f_out1) 199 | f_out1.close() 200 | 201 | f_out.close() 202 | print( 203 | 'It took {0:10.4f}s to evaluate on dev set of size: {3:10d} with dev loss: {1:10.4f} and dev acc: {2:10.4f}'.format( 204 | time.time() - dev_start_time, dev_loss, dev_acc, num_dev_data)) 205 | 206 | return dev_acc, dev_loss 207 | 208 | def fit(self): 209 | 210 | train_loss = 0.0 211 | batch_counter = 0 212 | train_acc = 0.0 213 | with tf.Session(config=tf.ConfigProto(log_device_placement=False)) as sess: 214 | sess.run(self.initialize()) 215 | 216 | if load_model: 217 | print('Loading retrained model from {}'.format(model_path)) 218 | self.saver.restore(sess, model_path) 219 | 220 | if mode == 'test': 221 | self.dev_eval(sess) 222 | # print(sess.run(self.model.b)) 223 | # self.dev_eval(sess) 224 | if mode == 'train': 225 | self.start_time = time.time() 226 | print('Starting to train') 227 | for data in self.batcher.get_next_batch(): 228 | batch_counter += 1 229 | if use_kb and use_text: 230 | batch_question, batch_q_lengths, batch_answer, batch_memory, batch_num_memories, \ 231 | batch_text_key_mem, batch_text_key_len, batch_text_val_mem, batch_num_text_mems = data 232 | feed_dict = {self.question: batch_question, 233 | self.question_lengths: batch_q_lengths, 234 | self.answer: batch_answer, 235 | self.memory: batch_memory, 236 | self.text_key_mem: batch_text_key_mem, 237 | self.text_key_len: batch_text_key_len, 238 | self.text_val_mem: batch_text_val_mem} 239 | elif use_kb: 240 | batch_question, batch_q_lengths, batch_answer, batch_memory, batch_num_memories = data 241 | feed_dict = {self.question: batch_question, 242 | self.question_lengths: batch_q_lengths, 243 | self.answer: batch_answer, 244 | self.memory: batch_memory} 245 | elif use_text: 246 | batch_question, batch_q_lengths, batch_answer, batch_text_key_mem, batch_text_key_len, \ 247 | batch_text_val_mem, batch_num_text_mems = data 248 | feed_dict = {self.question: batch_question, 249 | self.question_lengths: batch_q_lengths, 250 | self.answer: batch_answer, 251 | self.text_key_mem: batch_text_key_mem, 252 | self.text_key_len: batch_text_key_len, 253 | self.text_val_mem: batch_text_val_mem} 254 | 255 | # train 256 | batch_loss_value, _, prediction = sess.run([self.loss, self.train_op, self.predict_op], 257 | feed_dict=feed_dict) 258 | batch_train_acc = (1.0 * np.sum(prediction == batch_answer) / (batch_question.shape[0])) 259 | 260 | train_loss = 0.98 * train_loss + 0.02 * batch_loss_value 261 | train_acc = 0.98 * train_acc + 0.02 * batch_train_acc 262 | print('\t at iter {0:10d} at time {1:10.4f}s train loss: {2:10.4f}, train_acc: {3:10.4f} '.format( 263 | batch_counter, 264 | time.time() - self.start_time, 265 | train_loss, train_acc)) 266 | if batch_counter != 0 and batch_counter % dev_eval_counter == 0: # predict on dev 267 | dev_acc, dev_loss = self.dev_eval(sess) 268 | print('\t at iter {0:10d} at time {1:10.4f}s dev loss: {2:10.4f} dev_acc: {3:10.4f} '.format( 269 | batch_counter, time.time() - self.start_time, dev_loss, dev_acc)) 270 | if dev_acc > self.max_dev_acc: 271 | self.max_dev_acc = dev_acc 272 | # save this model 273 | save_path = self.saver.save(sess, output_dir + "/max_dev_out.ckpt") 274 | if use_kb and use_text: 275 | save_path = self.saver.save(sess, output_dir + "/full_max_dev_out.ckpt") 276 | with open(output_dir + "/dev_accuracies.txt", mode='a') as out: 277 | out.write( 278 | 'Dev accuracy while writing max_dev_out.ckpt {0:10.4f}\n'.format(self.max_dev_acc)) 279 | print("Saved model") 280 | if batch_counter % save_counter == 0: 281 | save_path = self.saver.save(sess, output_dir + "/out.ckpt") 282 | print("Saved model") 283 | 284 | 285 | if __name__ == '__main__': 286 | parser = argparse.ArgumentParser() 287 | parser.add_argument("-t", "--train_file", required=True) 288 | parser.add_argument("--dev_file", required=True) 289 | parser.add_argument("-k", "--kb_file", required=True) 290 | parser.add_argument("--text_kb_file", required=True) 291 | parser.add_argument("-v", "--vocab_dir", required=True) 292 | parser.add_argument("-b", "--batch_size", default=32) 293 | parser.add_argument("--dev_batch_size", default=200) 294 | parser.add_argument("-M", "--max_facts", required=True) 295 | parser.add_argument("--max_text_facts", required=True) 296 | parser.add_argument("-m", "--min_facts", required=True) 297 | parser.add_argument("-i", "--hops", default=3) 298 | parser.add_argument("-d", "--embedding_dim", default=50) 299 | parser.add_argument("--entity_vocab_size", required=True) 300 | parser.add_argument("--relation_vocab_size", required=True) 301 | parser.add_argument("--learning_rate", required=True) 302 | parser.add_argument("--grad_clip_norm", required=True) 303 | parser.add_argument("--verbose", default=0) 304 | parser.add_argument("--dev_eval_counter", default=200) 305 | parser.add_argument("--save_counter", default=200) 306 | parser.add_argument("--dev_max_facts", default=15000) 307 | parser.add_argument("--dev_max_text_facts", default=15000) 308 | parser.add_argument("--output_dir", default='.') 309 | parser.add_argument("--load_model", default=0) 310 | parser.add_argument("--model_path", default='') 311 | parser.add_argument("--load_pretrained_vectors", default=0) 312 | parser.add_argument("--pretrained_vector_path", default='') 313 | parser.add_argument("--use_kb", default=1, type=int) 314 | parser.add_argument("--use_text", default=0, type=int) 315 | parser.add_argument("--print_attention_weights", default=0, type=int) 316 | parser.add_argument("--mode", default='train') 317 | parser.add_argument("--combine_text_kb_answer", default='concat2') 318 | parser.add_argument("--separate_key_lstm", default=0, type=int) 319 | 320 | args = parser.parse_args() 321 | entity_vocab_size = int(args.entity_vocab_size) 322 | relation_vocab_size = int(args.relation_vocab_size) 323 | train_file = args.train_file 324 | dev_file = args.dev_file 325 | kb_file = args.kb_file 326 | text_kb_file = args.text_kb_file 327 | vocab_dir = args.vocab_dir 328 | embedding_size = int(args.embedding_dim) 329 | batch_size = int(args.batch_size) 330 | dev_batch_size = int(args.dev_batch_size) 331 | min_facts = int(args.min_facts) 332 | max_facts = int(args.max_facts) 333 | max_text_facts = int(args.max_text_facts) 334 | lr = float(args.learning_rate) 335 | grad_clip_norm = int(args.grad_clip_norm) 336 | verbose = (int(args.verbose) == 1) 337 | hops = int(args.hops) 338 | dev_eval_counter = int(args.dev_eval_counter) 339 | save_counter = int(args.save_counter) 340 | dev_max_facts = int(args.dev_max_facts) 341 | dev_max_text_facts = int(args.dev_max_text_facts) 342 | output_dir = args.output_dir 343 | load_model = (int(args.load_model) == 1) 344 | model_path = args.model_path 345 | use_kb = (args.use_kb == 1) 346 | use_text = (args.use_text == 1) 347 | if load_model: 348 | assert len(model_path) != 0 or model_path is not None 349 | load_pretrained_vectors = (int(args.load_pretrained_vectors) == 1) 350 | pretrained_vector_path = args.pretrained_vector_path 351 | print_attention_weights = (args.print_attention_weights == 1) 352 | mode = args.mode 353 | combine_text_kb_answer = args.combine_text_kb_answer 354 | separate_key_lstm = (args.separate_key_lstm == 1) 355 | 356 | t = Trainer() 357 | t.fit() 358 | -------------------------------------------------------------------------------- /code/unit_tests.py: -------------------------------------------------------------------------------- 1 | from feed_data import Batcher 2 | import sys 3 | 4 | 5 | def test_batcher(): 6 | 7 | train_file = "/iesl/canvas/rajarshi/data/TextKBQA/small_train_with_facts.json" 8 | kb_file = "/iesl/canvas/rajarshi/data/TextKBQA/freebase.spades.txt" 9 | batch_size = 32 10 | vocab_dir = "/home/rajarshi/research/joint-text-and-kb-inference-semantic-parsing/vocab/" 11 | min_num_mem_slots = 100 12 | max_num_mem_slots = 500 13 | batcher = Batcher(train_file, kb_file, batch_size, vocab_dir, 14 | min_num_mem_slots=min_num_mem_slots, max_num_mem_slots=max_num_mem_slots, 15 | return_one_epoch=True, shuffle=False) 16 | batch_counter = 0 17 | for data in batcher.get_next_batch(): 18 | batch_counter += 1 19 | batch_question, batch_q_lengths, batch_answer, batch_memory, batch_num_memories = data 20 | 21 | print("####### Test1: Checking number of batches returned#########") 22 | assert batch_counter == 1 23 | print("Test passed!") 24 | 25 | batch_size = 19 26 | batcher.batch_size = batch_size 27 | batcher.reset() 28 | batch_counter = 0 29 | for data in batcher.get_next_batch(): 30 | batch_counter += 1 31 | batch_question, batch_q_lengths, batch_answer, batch_memory, batch_num_memories = data 32 | 33 | print("####### Test2: Checking number of batches returned with different batch size #########") 34 | print(batch_counter) 35 | assert batch_counter == 2 36 | print("Test passed!") 37 | 38 | batch_size = 20 39 | batcher.batch_size = batch_size 40 | batcher.reset() 41 | for data in batcher.get_next_batch(): 42 | batch_counter += 1 43 | batch_question, batch_q_lengths, batch_answer, batch_memory, batch_num_memories = data 44 | print(batch_question[0]) 45 | print(batch_answer[0]) 46 | sys.exit(1) 47 | 48 | 49 | 50 | if __name__ == '__main__': 51 | test_batcher() 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /code/util.py: -------------------------------------------------------------------------------- 1 | # bunch of util codes 2 | import time 3 | import sys 4 | import operator 5 | from collections import defaultdict 6 | import tensorflow as tf 7 | import numpy as np 8 | import json 9 | 10 | 11 | # taken from kelvin guu's traversing_knowledge_graphs repo, (since I really liked it). 12 | def verboserate(iterable, time_wait=5, report=None): 13 | """ 14 | Iterate verbosely. 15 | """ 16 | try: 17 | total = len(iterable) 18 | except TypeError: 19 | total = '?' 20 | 21 | def default_report(steps, elapsed): 22 | print('{} of {} processed ({} s)'.format(steps, total, elapsed)) 23 | sys.stdout.flush() 24 | 25 | if report is None: 26 | report = default_report 27 | 28 | start = time.time() 29 | prev = start 30 | for steps, val in enumerate(iterable): 31 | current = time.time() 32 | since_prev = current - prev 33 | elapsed = current - start 34 | if since_prev > time_wait: 35 | report(steps, elapsed) 36 | prev = current 37 | yield val 38 | 39 | #util for sorting keys with decreasing freq of values 40 | sort_keys = (lambda x: sorted(x.items(), key=operator.itemgetter(1), reverse=True)) 41 | 42 | #util for getting the last relevant output from output of dynamic_rnn's 43 | def last_relevant(output, length): 44 | batch_size = tf.shape(output)[0] 45 | max_length = tf.shape(output)[1] 46 | out_size = tf.shape(output)[2] 47 | index = tf.range(0, batch_size) * max_length + (length - 1) 48 | flat = tf.reshape(output, [-1, out_size]) 49 | relevant = tf.gather(flat, index) 50 | return relevant 51 | 52 | 53 | def num_facts_statistics(input_file): 54 | """Check the distribution of number of facts""" 55 | bin_map = defaultdict(int) 56 | max_facts = -1 57 | with open(input_file) as f_in: 58 | for line in verboserate(f_in): 59 | q_json = json.loads(line) 60 | num_facts = q_json['num_facts'] 61 | if num_facts > max_facts: 62 | max_facts = num_facts 63 | if num_facts < 10: 64 | bin_map['0-10'] += 1 65 | elif num_facts < 100: 66 | bin_map['10-100'] += 1 67 | elif num_facts < 500: 68 | bin_map['100-500'] += 1 69 | elif num_facts < 1000: 70 | bin_map['500-1000'] += 1 71 | elif num_facts < 10000: 72 | bin_map['1000-10000'] += 1 73 | elif num_facts < 20000: 74 | bin_map['10000-20000'] += 1 75 | elif num_facts < 25000: 76 | bin_map['20000-25000'] += 1 77 | elif num_facts < 30000: 78 | bin_map['25000-30000'] += 1 79 | else: 80 | bin_map['> 30000'] += 1 81 | print('Max facts {0:10d}'.format(max_facts)) 82 | return bin_map 83 | 84 | def read_model_predictions(precition_file, entity_vocab_file, dev_file): 85 | 86 | #read the answers in a list 87 | question_list = [] 88 | with open(dev_file) as dev: 89 | for line in dev: 90 | line = line.strip() 91 | dev_q = json.loads(line) 92 | question = dev_q['sentence'] 93 | question_list.append(question) 94 | 95 | entity_vocab = {} 96 | with open(entity_vocab_file) as f_in: 97 | entity_vocab = json.load(f_in) 98 | rev_entity_vocab = {} 99 | for k,v in entity_vocab.iteritems(): 100 | rev_entity_vocab[v] = k 101 | 102 | data = np.load(precition_file) 103 | data = data.reshape(num_dev, -1) 104 | print data.shape 105 | seq_len = data.shape[1] 106 | num = data.shape[0] 107 | for i in range(num): 108 | predicted_answer = rev_entity_vocab[data[i][seq_len-2]] 109 | correct_answer = rev_entity_vocab[data[i][seq_len-1]] 110 | str = question_list[i]+' '+predicted_answer+' '+correct_answer 111 | if predicted_answer == correct_answer: 112 | print(str) 113 | 114 | if __name__ == "__main__": 115 | # input_file = "/iesl/canvas/rajarshi/data/TextKBQA/dev_with_facts.json" 116 | # print(num_facts_statistics(input_file)) 117 | # input_file = "/iesl/canvas/rajarshi/data/TextKBQA/train_with_facts.json" 118 | # print(num_facts_statistics(input_file)) 119 | prediction_file = "/home/rajarshi/research/joint-text-and-kb-inference-semantic-parsing/out/2017.01.14-15.52.14/out_txt.0.21875" 120 | entity_vocab_file = "/home/rajarshi/research/joint-text-and-kb-inference-semantic-parsing/vocab/entity_vocab.json" 121 | dev_file = "/iesl/canvas/rajarshi/data/TextKBQA/very_small_dev_with_facts.json" 122 | read_model_predictions(prediction_file, entity_vocab_file, dev_file) 123 | 124 | 125 | -------------------------------------------------------------------------------- /config.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | current_time=$(date "+%Y.%m.%d-%H.%M.%S") 3 | ROOT_DIR=$(pwd) 4 | vocab_dir="$ROOT_DIR/vocab/" 5 | kb_file="$ROOT_DIR/kb/small_demo_kb.txt" 6 | text_kb_file="$ROOT_DIR/text_kb/small_demo_text_kb.txt" 7 | train_file="$ROOT_DIR/data_formatted/small_train_with_kb_and_text_facts.json" 8 | dev_file="$ROOT_DIR/data_formatted/small_dev_with_kb_and_text_facts.json" 9 | combine_text_kb_answer='batch_norm' 10 | CANVAS_DIR="$ROOT_DIR/expt_outputs" 11 | OUTPUT_DIR=$CANVAS_DIR/demo_run/${current_time} 12 | load_model=0 13 | model_path='path/to/trained/model' # path to trained model 14 | load_pretrained_vectors=1 15 | pretrained_vector_path='/path/to/pretrained/vectors' 16 | use_kb=1 17 | use_text=1 18 | gpu_id=0 19 | dev_batch_size=32 20 | dev_eval_counter=500 21 | save_counter=1000 22 | batch_size=32 23 | entity_vocab_size=1817565 24 | relation_vocab_size=721 25 | max_facts=5000 26 | dev_max_facts=5000 27 | max_text_facts=2500 28 | dev_max_text_facts=5000 29 | embedding_dim=50 30 | min_facts=0 31 | learning_rate=1e-3 32 | grad_clip_norm=5 33 | verbose=1 34 | hops=3 35 | separate_key_lstm=0 36 | mode='train' #set this to train or test 37 | #mode='test' #set this to train or test 38 | create_expt_dir=1 #make it 0 if you dont want to creat an output directory and only print stuff 39 | 40 | if [ $create_expt_dir -eq 1 ]; then 41 | mkdir -p $OUTPUT_DIR 42 | else 43 | echo "WARNING!!! - create_expt_dir is not set. No output will be written." 44 | fi 45 | print_attention_weights=0 46 | -------------------------------------------------------------------------------- /config_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #dir to store the int mappings  3 | ROOT_DIR=$(pwd) 4 | vocab_dir="/home/rajarshi/research/joint-text-and-kb-inference-semantic-parsing/vocab_spades" 5 | kb_file="/iesl/canvas/rajarshi/data/SPADES_NEW/freebase.spades.txt.new" 6 | text_kb_file="/iesl/canvas/rajarshi/data/SPADES_NEW/text_kb.spades.txt" 7 | train_file="/iesl/canvas/rajarshi/data/SPADES_NEW/train_with_kb_facts.json" 8 | dev_file="/iesl/canvas/rajarshi/data/SPADES/dev_with_kb_facts.json" 9 | test_file="/iesl/canvas/rajarshi/data/SPADES/test_with_kb_facts.json" 10 | #because of the design, keep one of them on at a time. 11 | make_vocab=0 #1 to create new vocabs; 0 means you want to reuse a previously created ones 12 | extract_relevant_kb=0 #to extract part of KB which occur in train set 13 | sort_freebase=0 #sort freebase wrt e1 14 | make_text_kb=0 #1 to create text kb from train file 15 | augment_kb_facts=0 #1 to augment files with kb facts 16 | augment_text_kb_facts=0 #1 to augment files with text kb facts -------------------------------------------------------------------------------- /data_formatted/README: -------------------------------------------------------------------------------- 1 | 2017.04.23-02.38.25 - Rajarshi 2 | 3 | The train/dev/test split in this directory have been used for our experiments. These files have been generated by augmenting the original files with all possible relevant kb and text kb facts. 4 | -------------------------------------------------------------------------------- /data_formatted/small_dev_with_kb_and_text_facts.json: -------------------------------------------------------------------------------- 1 | {"text_kb_lengths": [435, 104], "text_kb_num_facts": 539, "sentence": "Apple was founded by Steve_Jobs and _blank_ .", "lengths": [332, 87], "text_kb_start_indices": [0, 435], "num_facts": 419, "start_indices": [0, 332], "entities": [{"index": 0, "score": 0.996726, "entity": "m.0k8z"}, {"index": 4, "score": 0.887073, "entity": "m.06y3r"}], "answerSubset": ["m.06y1l"], "words": [{"word": "Apple", "ner": "O", "pos": "NNP", "lemma": "Apple"}, {"word": "was", "ner": "O", "pos": "VBD", "lemma": "be"}, {"word": "founded", "ner": "O", "pos": "VBN", "lemma": "found"}, {"word": "by", "ner": "O", "pos": "IN", "lemma": "by"}, {"word": "Steve_Jobs", "ner": "O", "pos": "NNP", "lemma": "Steve_Jobs"}, {"word": "and", "ner": "O", "pos": "CC", "lemma": "and"}, {"word": "_blank_", "ner": "O", "pos": "NNP", "lemma": "_blank_"}, {"word": ".", "ner": "O", "pos": ".", "lemma": "."}], "answerString": ["Steve_Wozniak"]} 2 | -------------------------------------------------------------------------------- /data_formatted/small_train_with_kb_and_text_facts.json: -------------------------------------------------------------------------------- 1 | {"text_kb_lengths": [435, 104], "text_kb_num_facts": 539, "sentence": "Apple was founded by Steve_Jobs and _blank_ .", "lengths": [332, 87], "text_kb_start_indices": [0, 435], "num_facts": 419, "start_indices": [0, 332], "entities": [{"index": 0, "score": 0.996726, "entity": "m.0k8z"}, {"index": 4, "score": 0.887073, "entity": "m.06y3r"}], "answerSubset": ["m.06y1l"], "words": [{"word": "Apple", "ner": "O", "pos": "NNP", "lemma": "Apple"}, {"word": "was", "ner": "O", "pos": "VBD", "lemma": "be"}, {"word": "founded", "ner": "O", "pos": "VBN", "lemma": "found"}, {"word": "by", "ner": "O", "pos": "IN", "lemma": "by"}, {"word": "Steve_Jobs", "ner": "O", "pos": "NNP", "lemma": "Steve_Jobs"}, {"word": "and", "ner": "O", "pos": "CC", "lemma": "and"}, {"word": "_blank_", "ner": "O", "pos": "NNP", "lemma": "_blank_"}, {"word": ".", "ner": "O", "pos": ".", "lemma": "."}], "answerString": ["Steve_Wozniak"]} 2 | -------------------------------------------------------------------------------- /get_data.sh: -------------------------------------------------------------------------------- 1 | 2 | echo "Getting the freebase kb" 3 | wget -O kb/freebase.spades.txt http://iesl.cs.umass.edu/downloads/spades/freebase.spades.txt 4 | if [ $? -ne 0 ]; then 5 | echo "Failed to get the kb" 6 | echo "exiting..." 7 | exit 1 8 | fi 9 | echo "Getting the text kb" 10 | 11 | wget -O text_kb/text_kb.spades.txt http://iesl.cs.umass.edu/downloads/spades/text_kb.spades.txt 12 | if [ $? -ne 0 ]; then 13 | echo "Failed to get the kb" 14 | echo "exiting..." 15 | exit 1 16 | fi 17 | echo "Done..." 18 | 19 | echo "Getting the trained model" 20 | wget -O trained_model/max_dev_out.ckpt http://iesl.cs.umass.edu/downloads/spades/max_dev_out.ckpt 21 | if [ $? -ne 0 ]; then 22 | echo "Failed to get the model" 23 | echo "exiting..." 24 | exit 1 25 | fi 26 | echo "Done..." 27 | -------------------------------------------------------------------------------- /kb/small_demo_kb.txt: -------------------------------------------------------------------------------- 1 | ['m.0k8z', 'business.acquisition.acquiring_company', 'business.acquisition.company_acquired', 'm.01rtbw'] 2 | ['m.0k8z', 'business.acquisition.acquiring_company', 'business.acquisition.company_acquired', 'm.03dzs02'] 3 | ['m.0k8z', 'business.business_operation.assets.inverse', 'measurement_unit.dated_money_value.currency', 'm.09nqf'] 4 | ['m.0k8z', 'business.business_operation.current_assets.inverse', 'measurement_unit.dated_money_value.currency', 'm.09nqf'] 5 | ['m.0k8z', 'business.business_operation.current_liabilities.inverse', 'measurement_unit.dated_money_value.currency', 'm.09nqf'] 6 | ['m.0k8z', 'business.business_operation.industry.1', 'business.business_operation.industry.2', 'm.019z7b'] 7 | ['m.0k8z', 'business.business_operation.industry.1', 'business.business_operation.industry.2', 'm.01mf0'] 8 | ['m.0k8z', 'business.business_operation.industry.1', 'business.business_operation.industry.2', 'm.01mfj'] 9 | ['m.0k8z', 'business.business_operation.industry.1', 'business.business_operation.industry.2', 'm.03qb3f1'] 10 | ['m.0k8z', 'business.business_operation.industry.1', 'business.business_operation.industry.2', 'm.07c1v'] 11 | ['m.0k8z', 'business.business_operation.liabilities.inverse', 'measurement_unit.dated_money_value.currency', 'm.09nqf'] 12 | ['m.0k8z', 'business.business_operation.net_profit.inverse', 'measurement_unit.dated_money_value.currency', 'm.09nqf'] 13 | ['m.0k8z', 'business.business_operation.operating_income.inverse', 'measurement_unit.dated_money_value.currency', 'm.09nqf'] 14 | ['m.0k8z', 'business.business_operation.retained_earnings.inverse', 'measurement_unit.dated_money_value.currency', 'm.09nqf'] 15 | ['m.0k8z', 'business.business_operation.revenue.inverse', 'measurement_unit.dated_money_value.currency', 'm.09nqf'] 16 | ['m.0k8z', 'business.company_brand_relationship.company', 'business.company_brand_relationship.brand', 'm.0j_5dr_'] 17 | ['m.0k8z', 'business.company_brand_relationship.company', 'business.company_brand_relationship.brand', 'm.0mcx2'] 18 | ['m.0k8z', 'business.company_brand_relationship.company', 'business.company_brand_relationship.brand', 'm.0zd6'] 19 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.027lnzs'] 20 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.03hpd5x'] 21 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.04yg_s'] 22 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.068tk'] 23 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.07t99m'] 24 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.0gz0jd'] 25 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.0kmc'] 26 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.0m5m8'] 27 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.0mbxw'] 28 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.0mcx2'] 29 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.0wxp'] 30 | ['m.0k8z', 'business.company_product_line_relationship.company', 'business.company_product_line_relationship.product_line', 'm.0zd6'] 31 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.02p3xw8'] 32 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.02q23j4'] 33 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.02z6z4v'] 34 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.03hl5pv'] 35 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.04yg_s'] 36 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.04ygwp'] 37 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.05kd9b0'] 38 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.05px7xp'] 39 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.05px86k'] 40 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.066858v'] 41 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.08055kq'] 42 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.09rvlp4'] 43 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.09yc4yn'] 44 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0b_wnn'] 45 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0c0bg9c'] 46 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0cc8kq4'] 47 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0cnyyg3'] 48 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0dbpfb'] 49 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0f4_6'] 50 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0gg4gh4'] 51 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0ggbcny'] 52 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0j30q9x'] 53 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0jpgv'] 54 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0k706'] 55 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0n3s_98'] 56 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0nb7vg2'] 57 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0q4l2'] 58 | ['m.0k8z', 'business.company_product_relationship.company', 'business.company_product_relationship.consumer_product', 'm.0sjms'] 59 | ['m.0k8z', 'business.competitive_space_mediator.company', 'business.competitive_space_mediator.brand', 'm.027lnzs'] 60 | ['m.0k8z', 'business.competitive_space_mediator.company', 'business.competitive_space_mediator.space', 'm.0169zh'] 61 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.011xmf'] 62 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.014jjj'] 63 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0190r3'] 64 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0199cb'] 65 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.01bng'] 66 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.01fqmx'] 67 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.01g4wh'] 68 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.01k05j'] 69 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.01p5q2'] 70 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.01rvfd'] 71 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.022pvs0'] 72 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.022s0d'] 73 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0232w74'] 74 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_3qs'] 75 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_46d0'] 76 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_5njz'] 77 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.025yklv'] 78 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.026nytt'] 79 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.026nzp'] 80 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.026w6ww'] 81 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_6y_y'] 82 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_6z1d'] 83 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_7cgc'] 84 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_7cly'] 85 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.027rth_'] 86 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0281sx7'] 87 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0284zk2'] 88 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_8sgs'] 89 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_8vcg'] 90 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02bccg'] 91 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02cw6w'] 92 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02f2m7'] 93 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_fdfv'] 94 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_fdgr'] 95 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_fdhc'] 96 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02h67bm'] 97 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02hqpz6'] 98 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02hskp_'] 99 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02kb3t4'] 100 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02q1xr4'] 101 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02qzm8b'] 102 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02r8bh'] 103 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_rd1q'] 104 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02tbkm_'] 105 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02tc2vc'] 106 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02_wh6w'] 107 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02x65_9'] 108 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02z496'] 109 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02zd5vc'] 110 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02zd60y'] 111 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.02zdd_4'] 112 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0313br'] 113 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.038kp1'] 114 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03bvhpd'] 115 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03c3vn'] 116 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03cspsd'] 117 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03ct__n'] 118 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03d3pmy'] 119 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03gvrk1'] 120 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03hx3qh'] 121 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03qs2_'] 122 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03wqgrq'] 123 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03wvpq3'] 124 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03wwkdv'] 125 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03wxdbd'] 126 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.03z7s5'] 127 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.04140b'] 128 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.041c3x'] 129 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.041ymw'] 130 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.042p0_'] 131 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.047c3f'] 132 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.04dfq4x'] 133 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.04gn_w'] 134 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.04nrh5z'] 135 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.04p0w0'] 136 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.04ph7x'] 137 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.04w2kf'] 138 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.04ycpj6'] 139 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.04zvcs'] 140 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.04zvf5'] 141 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.04zzp33'] 142 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.05cp05'] 143 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.05ffdn'] 144 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.05g571'] 145 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.05lm637'] 146 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.05plcb'] 147 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.05r65m'] 148 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.05sv7tn'] 149 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0641dfm'] 150 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0680n8'] 151 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.06kf9v'] 152 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.06lnrv'] 153 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.06rc2b'] 154 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.06y1l'] 155 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.06y3r'] 156 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.070x33'] 157 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.07cny1'] 158 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.07f7g6h'] 159 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.07j833'] 160 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.07k6_l2'] 161 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.07r9x7'] 162 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.07rb0g'] 163 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.07xmqn'] 164 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.080jr5c'] 165 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.08qzjd'] 166 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.097l5b'] 167 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.097mlj'] 168 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.09cdjc'] 169 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.09vtqm'] 170 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0b2sv8'] 171 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0bglfhp'] 172 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0bhlbm1'] 173 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0brd1cs'] 174 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0c84nh'] 175 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0c9jhm'] 176 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0cn_31m'] 177 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0d21pp'] 178 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0d21v5'] 179 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0d21zn'] 180 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0dbm5q'] 181 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0dqj_4'] 182 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0dt0hb'] 183 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0f1rnm'] 184 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0fy2l4'] 185 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0gcrxq'] 186 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0gf91cv'] 187 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0gkwpt'] 188 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0gmsmf'] 189 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0gv8ss'] 190 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0gy1khd'] 191 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0hr5tjf'] 192 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0j5c023'] 193 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0k0v5lc'] 194 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0k1x9vh'] 195 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0n9lz5x'] 196 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0nc_zr4'] 197 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0pm6w'] 198 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0q0x'] 199 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.person', 'm.0r5w534'] 200 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.01jg6'] 201 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.01_n98'] 202 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.01rk91'] 203 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.01yc02'] 204 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.01z0qtz'] 205 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.021q0l'] 206 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02211by'] 207 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02_2lvc'] 208 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02_36yn'] 209 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02_51wr'] 210 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.0252htr'] 211 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02_5v9d'] 212 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02_77c8'] 213 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02_79wy'] 214 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02_9b3v'] 215 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02h527d'] 216 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02h7drz'] 217 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02kvlgh'] 218 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02r_m2b'] 219 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02wsmf8'] 220 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02wszlj'] 221 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02_wv2r'] 222 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02yx74t'] 223 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02_z469'] 224 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.02z7mcb'] 225 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.03bqjfv'] 226 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.03chy_1'] 227 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04192r'] 228 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04dfrw4'] 229 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04dfrwh'] 230 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04fl0zc'] 231 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04gcjnc'] 232 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04gcl7x'] 233 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04gclhh'] 234 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04gclhp'] 235 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04gcm2c'] 236 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04gcm5p'] 237 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04gcmg9'] 238 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04gcmrg'] 239 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04k7t2h'] 240 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04kp_lq'] 241 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04kplr_'] 242 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.04kp_mt'] 243 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.05tzh8_'] 244 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.05tzh9b'] 245 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.0bglg0c'] 246 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.0dq_5'] 247 | ['m.0k8z', 'business.employment_tenure.company', 'business.employment_tenure.title', 'm.0l3v_3n'] 248 | ['m.0k8z', 'business.issuer.issue.1', 'business.issuer.issue.2', 'm.07zmbv7'] 249 | ['m.0k8z', 'business.market_share.company', 'business.market_share.region', 'm.09c7w0'] 250 | ['m.0k8z', 'business.market_share.company', 'business.market_share.space', 'm.0169zh'] 251 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.0190r3'] 252 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.01rvfd'] 253 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.026nzp'] 254 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.030rb_3'] 255 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.03qs2_'] 256 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.03wqgrq'] 257 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.03wxdbd'] 258 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.05cp05'] 259 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.05r65m'] 260 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.06kf9v'] 261 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.06y3r'] 262 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.person', 'm.09thkp5'] 263 | ['m.0k8z', 'organization.leadership.organization', 'organization.leadership.role', 'm.0dq_5'] 264 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.01762z'] 265 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.0190r3'] 266 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.01gqf4'] 267 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.01rvfd'] 268 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.02_xvfj'] 269 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.03j_1t'] 270 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.04zvcs'] 271 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.06y3r'] 272 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.0bmx05'] 273 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.0cby0x'] 274 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.0cby3n'] 275 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.0d05fv'] 276 | ['m.0k8z', 'organization.organization_board_membership.organization', 'organization.organization_board_membership.member', 'm.0gdryj'] 277 | ['m.0k8z', 'organization.organization.founders.1', 'organization.organization.founders.2', 'm.02z496'] 278 | ['m.0k8z', 'organization.organization.founders.1', 'organization.organization.founders.2', 'm.06y1l'] 279 | ['m.0k8z', 'organization.organization.founders.1', 'organization.organization.founders.2', 'm.06y3r'] 280 | ['m.0k8z', 'organization.organization.headquarters.inverse', 'location.mailing_address.citytown', 'm.0r679'] 281 | ['m.0k8z', 'organization.organization.headquarters.inverse', 'location.mailing_address.postal_code', 'm.01zd5vw'] 282 | ['m.0k8z', 'organization.organization.headquarters.inverse', 'location.mailing_address.state_province_region', 'm.01n7q'] 283 | ['m.0k8z', 'organization.organization.legal_structure.1', 'organization.organization.legal_structure.2', 'm.079bdg'] 284 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_bn'] 285 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_bv'] 286 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_c0'] 287 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_c6'] 288 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_cd'] 289 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_cl'] 290 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_cs'] 291 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_cz'] 292 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_d4'] 293 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_db'] 294 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_dj'] 295 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_dq'] 296 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_dx'] 297 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_f2'] 298 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_f8'] 299 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_fg'] 300 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_fn'] 301 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_fv'] 302 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_g0'] 303 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_g6'] 304 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_gd'] 305 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_gl'] 306 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m1_gs'] 307 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m29wx'] 308 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m29x2'] 309 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m29x8'] 310 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m29xg'] 311 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m29xn'] 312 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m29xv'] 313 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m29y0'] 314 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m29y6'] 315 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m3bc9'] 316 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m3bch'] 317 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m3bcp'] 318 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m3bcw'] 319 | ['m.0k8z', 'organization.organization.locations.1', 'organization.organization.locations.2', 'm.03m3bd1'] 320 | ['m.0k8z', 'organization.organization.phone_number.1', 'organization.organization.phone_number.2', 'm.0mx_hlq'] 321 | ['m.0k8z', 'organization.organization.place_founded.1', 'organization.organization.place_founded.2', 'm.01n7q'] 322 | ['m.0k8z', 'organization.organization.place_founded.1', 'organization.organization.place_founded.2', 'm.0r679'] 323 | ['m.0k8z', 'organization.organization_relationship.parent', 'organization.organization_relationship.child', 'm.02hwrl'] 324 | ['m.0k8z', 'organization.organization_relationship.parent', 'organization.organization_relationship.child', 'm.03cjqss'] 325 | ['m.0k8z', 'organization.organization_relationship.parent', 'organization.organization_relationship.child', 'm.0411n0z'] 326 | ['m.0k8z', 'organization.organization_relationship.parent', 'organization.organization_relationship.child', 'm.09m8jd4'] 327 | ['m.0k8z', 'organization.organization_relationship.parent', 'organization.organization_relationship.child', 'm.0c81jt'] 328 | ['m.0k8z', 'organization.organization_relationship.parent', 'organization.organization_relationship.child', 'm.0cfhc5'] 329 | ['m.0k8z', 'organization.organization_relationship.parent', 'organization.organization_relationship.child', 'm.0hrdfsh'] 330 | ['m.0k8z', 'organization.organization_spin_off.parent_company', 'organization.organization_spin_off.child_company', 'm.01st38'] 331 | ['m.0k8z', 'venture_capital.venture_investment.company', 'venture_capital.venture_investment.investor', 'm.043ttv'] 332 | ['m.0k8z', 'venture_capital.venture_investment.company', 'venture_capital.venture_investment.investor', 'm.05cp05'] 333 | ['m.03cjr65', 'education.education.student', 'education.education.institution', 'm.0lvng'] 334 | ['m.06y3r', 'award.award_honor.award_winner', 'award.award_honor.award', 'm.04ss98'] 335 | ['m.06y3r', 'award.award_honor.award_winner', 'award.award_honor.award', 'm.0n2367_'] 336 | ['m.06y3r', 'award.award_honor.award_winner', 'award.award_honor.award', 'm.0rzb4km'] 337 | ['m.06y3r', 'award.award_honor.award_winner', 'award.award_honor.award_winner', 'm.017h4c'] 338 | ['m.06y3r', 'award.award_honor.award_winner', 'award.award_honor.award_winner', 'm.04jspq'] 339 | ['m.06y3r', 'award.award_honor.award_winner', 'award.award_honor.award_winner', 'm.06y1l'] 340 | ['m.06y3r', 'award.award_honor.award_winner', 'award.award_honor.ceremony', 'm.0ctd66'] 341 | ['m.06y3r', 'business.employment_tenure.person', 'business.employment_tenure.company', 'm.03mnk'] 342 | ['m.06y3r', 'business.employment_tenure.person', 'business.employment_tenure.company', 'm.05fxn'] 343 | ['m.06y3r', 'business.employment_tenure.person', 'business.employment_tenure.company', 'm.09b3v'] 344 | ['m.06y3r', 'business.employment_tenure.person', 'business.employment_tenure.company', 'm.0k8z'] 345 | ['m.06y3r', 'business.employment_tenure.person', 'business.employment_tenure.company', 'm.0kk9v'] 346 | ['m.06y3r', 'business.employment_tenure.person', 'business.employment_tenure.company', 'm.0xwj'] 347 | ['m.06y3r', 'business.employment_tenure.person', 'business.employment_tenure.title', 'm.01z0qtz'] 348 | ['m.06y3r', 'business.employment_tenure.person', 'business.employment_tenure.title', 'm.03bqjfv'] 349 | ['m.06y3r', 'business.employment_tenure.person', 'business.employment_tenure.title', 'm.0dq_5'] 350 | ['m.06y3r', 'education.education.student', 'education.education.institution', 'm.05ds6x'] 351 | ['m.06y3r', 'education.education.student', 'education.education.institution', 'm.06bw5'] 352 | ['m.06y3r', 'education.education.student', 'education.education.institution', 'm.076913x'] 353 | ['m.06y3r', 'education.education.student', 'education.education.institution', 'm.0k03wc8'] 354 | ['m.06y3r', 'film.film.executive_produced_by.2', 'film.film.executive_produced_by.1', 'm.0dyb1'] 355 | ['m.06y3r', 'film.film.subjects.2', 'film.film.subjects.1', 'm.0246dk'] 356 | ['m.06y3r', 'film.film.subjects.2', 'film.film.subjects.1', 'm.0j7j4ls'] 357 | ['m.06y3r', 'film.film.subjects.2', 'film.film.subjects.1', 'm.0k1zcyn'] 358 | ['m.06y3r', 'film.personal_film_appearance.person', 'film.personal_film_appearance.film', 'm.038gjc'] 359 | ['m.06y3r', 'film.personal_film_appearance.person', 'film.personal_film_appearance.film', 'm.03m5drv'] 360 | ['m.06y3r', 'film.personal_film_appearance.person', 'film.personal_film_appearance.film', 'm.0f6f_8'] 361 | ['m.06y3r', 'film.personal_film_appearance.person', 'film.personal_film_appearance.film', 'm.0h11b1s'] 362 | ['m.06y3r', 'film.personal_film_appearance.person', 'film.personal_film_appearance.film', 'm.0m2g3b0'] 363 | ['m.06y3r', 'film.personal_film_appearance.person', 'film.personal_film_appearance.film', 'm.0m2g3r9'] 364 | ['m.06y3r', 'film.personal_film_appearance.person', 'film.personal_film_appearance.film', 'm.0m2g62x'] 365 | ['m.06y3r', 'film.personal_film_appearance.person', 'film.personal_film_appearance.film', 'm.0qpx78t'] 366 | ['m.06y3r', 'film.personal_film_appearance.person', 'film.personal_film_appearance.type_of_appearance', 'm.021y7s_'] 367 | ['m.06y3r', 'film.personal_film_appearance.person', 'film.personal_film_appearance.type_of_appearance', 'm.02nsjx2'] 368 | ['m.06y3r', 'media_common.quotation.author.2', 'media_common.quotation.author.1', 'm.02cwm'] 369 | ['m.06y3r', 'media_common.quotation.author.2', 'media_common.quotation.author.1', 'm.040fw97'] 370 | ['m.06y3r', 'media_common.quotation.author.2', 'media_common.quotation.author.1', 'm.040fw9m'] 371 | ['m.06y3r', 'media_common.quotation.author.2', 'media_common.quotation.author.1', 'm.09ryyn5'] 372 | ['m.06y3r', 'media_common.quotation.author.2', 'media_common.quotation.author.1', 'm.0ggbtb1'] 373 | ['m.06y3r', 'media_common.quotation.author.2', 'media_common.quotation.author.1', 'm.0jvfvr2'] 374 | ['m.06y3r', 'organization.leadership.person', 'organization.leadership.organization', 'm.0k8z'] 375 | ['m.06y3r', 'organization.leadership.person', 'organization.leadership.role', 'm.0dq_5'] 376 | ['m.06y3r', 'organization.organization_board_membership.member', 'organization.organization_board_membership.organization', 'm.043bhr'] 377 | ['m.06y3r', 'organization.organization_board_membership.member', 'organization.organization_board_membership.organization', 'm.09b3v'] 378 | ['m.06y3r', 'organization.organization_board_membership.member', 'organization.organization_board_membership.organization', 'm.0dqq7d'] 379 | ['m.06y3r', 'organization.organization_board_membership.member', 'organization.organization_board_membership.organization', 'm.0k8z'] 380 | ['m.06y3r', 'organization.organization_board_membership.member', 'organization.organization_board_membership.organization', 'm.0kk9v'] 381 | ['m.06y3r', 'organization.organization.founders.2', 'organization.organization.founders.1', 'm.011wwj'] 382 | ['m.06y3r', 'organization.organization.founders.2', 'organization.organization.founders.1', 'm.02hwrl'] 383 | ['m.06y3r', 'organization.organization.founders.2', 'organization.organization.founders.1', 'm.05fxn'] 384 | ['m.06y3r', 'organization.organization.founders.2', 'organization.organization.founders.1', 'm.0k8z'] 385 | ['m.06y3r', 'organization.organization.founders.2', 'organization.organization.founders.1', 'm.0kk9v'] 386 | ['m.06y3r', 'people.deceased_person.cause_of_death.1', 'people.deceased_person.cause_of_death.2', 'm.03c4yw'] 387 | ['m.06y3r', 'people.deceased_person.place_of_burial.1', 'people.deceased_person.place_of_burial.2', 'm.0h967g4'] 388 | ['m.06y3r', 'people.deceased_person.place_of_death.1', 'people.deceased_person.place_of_death.2', 'm.0f04c'] 389 | ['m.06y3r', 'people.ethnicity.people.2', 'people.ethnicity.people.1', 'm.0150zs'] 390 | ['m.06y3r', 'people.ethnicity.people.2', 'people.ethnicity.people.1', 'm.01qhm_'] 391 | ['m.06y3r', 'people.ethnicity.people.2', 'people.ethnicity.people.1', 'm.02ctzb'] 392 | ['m.06y3r', 'people.ethnicity.people.2', 'people.ethnicity.people.1', 'm.02qv_h_'] 393 | ['m.06y3r', 'people.marriage.spouse', 'people.marriage.location_of_ceremony', 'm.05jvq5'] 394 | ['m.06y3r', 'people.marriage.spouse', 'people.marriage.spouse', 'm.025zhrp'] 395 | ['m.06y3r', 'people.marriage.spouse', 'people.marriage.spouse', 'm.02jv7j1'] 396 | ['m.06y3r', 'people.marriage.spouse', 'people.marriage.type_of_union', 'm.01g63y'] 397 | ['m.06y3r', 'people.marriage.spouse', 'people.marriage.type_of_union', 'm.04ztj'] 398 | ['m.06y3r', 'people.person.gender.1', 'people.person.gender.2', 'm.05zppz'] 399 | ['m.06y3r', 'people.person.nationality.1', 'people.person.nationality.2', 'm.09c7w0'] 400 | ['m.06y3r', 'people.person.parents.1', 'people.person.parents.2', 'm.02jvc1c'] 401 | ['m.06y3r', 'people.person.parents.1', 'people.person.parents.2', 'm.02jvcjf'] 402 | ['m.06y3r', 'people.person.parents.1', 'people.person.parents.2', 'm.02jvmvm'] 403 | ['m.06y3r', 'people.person.parents.1', 'people.person.parents.2', 'm.02jvn2b'] 404 | ['m.06y3r', 'people.person.parents.2', 'people.person.parents.1', 'm.0269sm0'] 405 | ['m.06y3r', 'people.person.parents.2', 'people.person.parents.1', 'm.02jvr_q'] 406 | ['m.06y3r', 'people.person.parents.2', 'people.person.parents.1', 'm.02jvs6w'] 407 | ['m.06y3r', 'people.person.parents.2', 'people.person.parents.1', 'm.02jvsf5'] 408 | ['m.06y3r', 'people.person.place_of_birth.1', 'people.person.place_of_birth.2', 'm.0d6lp'] 409 | ['m.06y3r', 'people.person.profession.1', 'people.person.profession.2', 'm.012t_z'] 410 | ['m.06y3r', 'people.person.profession.1', 'people.person.profession.2', 'm.01c979'] 411 | ['m.06y3r', 'people.person.profession.1', 'people.person.profession.2', 'm.03sbb'] 412 | ['m.06y3r', 'people.person.profession.1', 'people.person.profession.2', 'm.09x_r'] 413 | ['m.06y3r', 'people.person.religion.1', 'people.person.religion.2', 'm.04pk9'] 414 | ['m.06y3r', 'people.person.religion.1', 'people.person.religion.2', 'm.0899b'] 415 | ['m.06y3r', 'people.person.religion.1', 'people.person.religion.2', 'm.092bf5'] 416 | ['m.06y3r', 'people.person.religion.1', 'people.person.religion.2', 'm.0kpl'] 417 | ['m.06y3r', 'people.place_lived.person', 'people.place_lived.location', 'm.06pw6'] 418 | ['m.06y3r', 'people.sibling_relationship.sibling', 'people.sibling_relationship.sibling', 'm.02ds5z'] 419 | ['m.06y3r', 'people.sibling_relationship.sibling', 'people.sibling_relationship.sibling', 'm.0nf3spy'] 420 | ['m.0h0vmp4', 'film.performance.character', 'film.performance.actor', 'm.0170s4'] 421 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | source $1 4 | mkdir -p $vocab_dir 5 | 6 | #system variables 7 | #PYTHON_EXEC="~/anaconda2/bin/python" 8 | #data processing.. 9 | 10 | python_path="/usr/bin/python" 11 | 12 | cmd="$python_path -u $ROOT_DIR/code/train.py --train_file $train_file --dev_file $dev_file \ 13 | --kb_file $kb_file --text_kb_file $text_kb_file --vocab_dir $vocab_dir --max_facts $max_facts --min_facts $min_facts \ 14 | --max_text_facts $max_text_facts --dev_max_facts $dev_max_facts --dev_max_text_facts $dev_max_text_facts 15 | --entity_vocab_size $entity_vocab_size --relation_vocab_size $relation_vocab_size \ 16 | --learning_rate $learning_rate --verbose $verbose --embedding_dim $embedding_dim --grad_clip_norm $grad_clip_norm \ 17 | --hops $hops --dev_batch_size $dev_batch_size --batch_size $batch_size --output_dir $OUTPUT_DIR \ 18 | --load_model $load_model --model_path $model_path --load_pretrained_vectors $load_pretrained_vectors \ 19 | --pretrained_vector_path $pretrained_vector_path --save_counter $save_counter --dev_eval_counter $dev_eval_counter 20 | --use_kb $use_kb --use_text $use_text --print_attention_weights $print_attention_weights --mode $mode \ 21 | --combine_text_kb_answer $combine_text_kb_answer --separate_key_lstm $separate_key_lstm" 22 | 23 | 24 | if [ $create_expt_dir -eq 1 ]; then 25 | set > $OUTPUT_DIR/config.txt 26 | echo "Executing $cmd" | tee $OUTPUT_DIR/log.txt.$current_time 27 | CUDA_VISIBLE_DEVICES=$gpu_id $cmd | tee -a $OUTPUT_DIR/log.txt.$current_time 28 | #print the configs 29 | else 30 | echo "Executing $cmd" 31 | CUDA_VISIBLE_DEVICES=$gpu_id $cmd 32 | fi 33 | 34 | if [ $print_attention_weights -eq 1 ]; then 35 | cmd="$python_path -u $ROOT_DIR/code/qual_eval.py --use_kb $use_kb --use_text $use_text --kb_file $kb_file 36 | --text_kb_file $text_kb_file --attn_file $OUTPUT_DIR/attn_wts.npy --input_test_file $dev_file 37 | --answer_file $OUTPUT_DIR/out.txt --output_dir $OUTPUT_DIR" 38 | echo "Executing $cmd" 39 | $cmd 40 | fi 41 | 42 | -------------------------------------------------------------------------------- /run_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source $1 3 | 4 | mkdir -p $vocab_dir 5 | 6 | #system variables 7 | PYTHON_EXEC="/share/apps/python/bin/python" 8 | #data processing.. 9 | 10 | cmd="python $ROOT_DIR/code/data_utils.py --make_vocab $make_vocab --vocab_dir $vocab_dir --kb_file $kb_file\ 11 | --text_kb_file $text_kb_file --train_file $train_file --dev_file $dev_file --test_file $test_file \ 12 | --extract_relevant_kb $extract_relevant_kb --make_text_kb $make_text_kb \ 13 | --augment_text_kb_facts $augment_text_kb_facts --augment_kb_facts $augment_kb_facts --sort_freebase $sort_freebase" 14 | 15 | echo "Executing $cmd" 16 | $cmd -------------------------------------------------------------------------------- /test_saved_model_config.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | current_time=$(date "+%Y.%m.%d-%H.%M.%S") 3 | ROOT_DIR=$(pwd) 4 | vocab_dir="$ROOT_DIR/vocab/" 5 | kb_file="$ROOT_DIR/kb/freebase.spades.txt" 6 | text_kb_file="$ROOT_DIR/text_kb/text_kb.spades.txt" 7 | train_file="$ROOT_DIR/data_formatted/small_train_with_kb_and_text_facts.json" 8 | dev_file="$ROOT_DIR/data_formatted/dev_with_kb_and_text_facts.json" 9 | combine_text_kb_answer='batch_norm' 10 | CANVAS_DIR="$ROOT_DIR/expt_outputs" 11 | OUTPUT_DIR=$CANVAS_DIR/demo_run/${current_time} 12 | load_model=1 13 | model_path='trained_model/max_dev_out.ckpt' # path to trained model 14 | load_pretrained_vectors=0 15 | pretrained_vector_path='/path/to/pretrained/vectors' 16 | use_kb=1 17 | use_text=1 18 | gpu_id=0 19 | dev_batch_size=32 20 | dev_eval_counter=500 21 | save_counter=1000 22 | batch_size=32 23 | entity_vocab_size=1817565 24 | relation_vocab_size=721 25 | max_facts=5000 26 | dev_max_facts=5000 27 | max_text_facts=2500 28 | dev_max_text_facts=5000 29 | embedding_dim=50 30 | min_facts=0 31 | learning_rate=1e-3 32 | grad_clip_norm=5 33 | verbose=1 34 | hops=3 35 | separate_key_lstm=0 36 | mode='test' #set this to train or test 37 | create_expt_dir=1 #make it 0 if you dont want to creat an output directory and only print stuff 38 | 39 | if [ $create_expt_dir -eq 1 ]; then 40 | mkdir -p $OUTPUT_DIR 41 | else 42 | echo "WARNING!!! - create_expt_dir is not set. No output will be written." 43 | fi 44 | print_attention_weights=0 45 | -------------------------------------------------------------------------------- /vocab/relation_vocab.json: -------------------------------------------------------------------------------- 1 | {"film.film_song_relationship.composition_film.film_song_relationship.film": 560, "business.competitive_space_mediator.space_business.competitive_space_mediator.company": 373, "people.american_indian_group.us_indian_reservations.2_people.american_indian_group.us_indian_reservations.1": 614, "business.holding.currency_business.holding.as_of_date": 692, "business.company_product_relationship.consumer_product_business.company_product_relationship.to_date": 658, "business.business_location.hours.inverse_business.open_times.hour_end": 557, "business.product_theme.products.1_business.product_theme.products.2": 472, "business.issuer.issue.1_business.issuer.issue.2": 251, "business.defunct_company.ceased_operations.1_business.defunct_company.ceased_operations.2": 377, "location.mailing_address.citytown_location.mailing_address.state_province_region": 63, "film.film.language.2_film.film.language.1": 28, "organization.organization.phone_number.1_organization.organization.phone_number.2": 224, "business.competitive_space_mediator.brand_business.competitive_space_mediator.space": 569, "organization.organization.founders.2_organization.organization.founders.1": 125, "venture_capital.venture_investment.investor_venture_capital.venture_investment.currency": 599, "business.business_operation.cash.inverse_measurement_unit.dated_money_value.currency": 229, "location.cn_autonomous_region.designated_minority.1_location.cn_autonomous_region.designated_minority.2": 629, "film.film.featured_film_locations.2_film.film.featured_film_locations.1": 152, "business.competitive_space.market_size.inverse_business.market_size.valid_date": 693, "business.product_category.products.2_business.product_category.products.1": 586, "people.marriage.type_of_union_people.marriage.location_of_ceremony": 249, "education.education.major_field_of_study_education.education.institution": 136, "people.profession.part_of_professional_field.1_people.profession.part_of_professional_field.2": 482, "business.market_share.company_business.market_share.region": 607, "people.deceased_person.date_of_death.1_people.deceased_person.date_of_death.2": 94, "organization.organization.place_founded.1_organization.organization.place_founded.2": 166, "organization.organization.headquarters.inverse_location.mailing_address.state_province_region": 14, "film.film_film_distributor_relationship.film_film.film_film_distributor_relationship.film_cut": 517, "business.product_line.brand.2_business.product_line.brand.1": 508, "film.film.written_by.2_film.film.written_by.1": 95, "measurement_unit.dated_money_value.currency_business.business_operation.current_assets.inverse": 200, "business.company_product_relationship.company_business.company_product_relationship.from_date": 441, "business.product_endorsement.product_business.product_endorsement.to": 677, "business.employment_tenure.person_business.employment_tenure.from": 197, "film.dubbing_performance.character_film.dubbing_performance.actor": 360, "business.stock_ticker_symbol.stock_exchange_business.stock_ticker_symbol.start_date": 659, "business.company_name_change.company_business.company_name_change.end_date": 391, "people.cause_of_death.includes_causes_of_death.1_people.cause_of_death.includes_causes_of_death.2": 369, "organization.organization.geographic_scope.2_organization.organization.geographic_scope.1": 129, "people.family.country.2_people.family.country.1": 282, "business.acquisition.company_acquired_business.acquisition.acquiring_company": 263, "finance.currency.countries_used.2_finance.currency.countries_used.1": 363, "business.open_times.weekday_end_business.open_times.time_zone": 660, "film.film_film_distributor_relationship.film_film.film_film_distributor_relationship.distributor": 61, "film.film.film_art_direction_by.2_film.film.film_art_direction_by.1": 195, "business.open_times.weekday_end_business.business_location.hours.inverse": 570, "business.issue.market_capitalization.inverse_measurement_unit.dated_money_value.currency": 206, "business.shopping_center.store.1_business.shopping_center.store.2": 231, "film.film.country.1_film.film.country.2": 20, "award.award_honor.award_winner_award.award_honor.year": 81, "film.film.soundtrack.1_film.film.soundtrack.2": 221, "film.film.country.2_film.film.country.1": 21, "business.business_operation.liabilities.inverse_measurement_unit.dated_money_value.valid_date": 284, "film.film.gross_revenue.inverse_measurement_unit.dated_money_value.valid_date": 500, "business.sponsorship.sponsored_by_business.sponsorship.to": 463, "film.film.produced_by.2_film.film.produced_by.1": 79, "fashion.clothing_size.person_fashion.clothing_size.article_of_clothing": 623, "business.market_share.region_business.market_share.date": 678, "film.film_film_distributor_relationship.film_distribution_medium_film.film_film_distributor_relationship.region": 488, "organization.organization.date_founded.1_organization.organization.date_founded.2": 135, "business.brand.parent_brand.1_business.brand.parent_brand.2": 492, "business.trade_union.professions_represented.2_business.trade_union.professions_represented.1": 566, "education.education.major_field_of_study_education.education.student": 147, "film.content_rating_system.jurisdiction.2_film.content_rating_system.jurisdiction.1": 523, "film.film_festival_sponsorship.sponsor_film.film_festival_sponsorship.from": 708, "people.profession.specializations.2_people.profession.specializations.1": 300, "award.award_honor.ceremony_award.award_honor.year": 591, "film.film.music.2_film.film.music.1": 118, "business.consumer_product.msrp.inverse_measurement_unit.dated_money_value.valid_date": 661, "film.performance.special_performance_type_film.performance.character": 309, "location.mailing_address.postal_code_business.business_location.address.inverse": 497, "education.education.student_education.education.minor": 453, "organization.organization_relationship.parent_organization.organization_relationship.as_of_date": 146, "organization.leadership.organization_organization.leadership.as_of_date": 281, "fashion.clothing_size.article_of_clothing_fashion.clothing_size.person": 624, "film.film.film_festivals.2_film.film.film_festivals.1": 233, "organization.leadership.role_organization.leadership.as_of_date": 630, "award.award_honor.honored_for_award.award_honor.year": 246, "film.content_rating.country.2_film.content_rating.country.1": 422, "film.performance.special_performance_type_film.performance.actor": 162, "film.film_regional_release_date.film_regional_debut_venue_film.film_regional_release_date.release_date": 253, "business.product_endorsement.product_business.product_endorsement.from": 642, "film.film.subjects.2_film.film.subjects.1": 154, "film.film.estimated_budget.inverse_measurement_unit.dated_money_value.source": 694, "people.ethnicity.people.2_people.ethnicity.people.1": 47, "business.brand_slogan.brand_business.brand_slogan.from_date": 592, "business.brand_slogan.brand_business.brand_slogan.slogan": 558, "organization.organization_board_membership.organization_organization.organization_board_membership.role": 349, "venture_capital.venture_investment.currency_venture_capital.venture_investment.date": 596, "people.place_lived.location_people.place_lived.end_date": 257, "venture_capital.venture_funded_company.total_venture_investment_raised.inverse_measurement_unit.dated_money_value.valid_date": 594, "film.film_film_distributor_relationship.film_cut_film.film_film_distributor_relationship.region": 578, "business.employment_tenure.title_business.employment_tenure.company": 43, "film.performance.actor_film.performance.character": 26, "film.film_cut.type_of_film_cut_film.film_cut.film": 285, "business.holding.issue_business.holding.currency": 695, "business.asset_ownership.owned_asset_business.asset_ownership.from": 533, "business.brand_colors.colors_business.brand_colors.brand": 515, "people.person.languages.2_people.person.languages.1": 160, "film.film_regional_release_date.film_release_distribution_medium_film.film_regional_release_date.film_release_distribution_medium": 535, "organization.organization_board_membership.role_organization.organization_board_membership.to": 442, "business.business_operation.operating_income.inverse_measurement_unit.dated_money_value.source": 663, "business.brand.parent_brand.2_business.brand.parent_brand.1": 493, "film.film.executive_produced_by.1_film.film.executive_produced_by.2": 144, "business.product_endorsement.endorsee_business.product_endorsement.to": 679, "business.open_times.time_zone_business.business_location.hours.inverse": 680, "people.deceased_person.place_of_burial.2_people.deceased_person.place_of_burial.1": 150, "business.business_location.address.inverse_location.mailing_address.citytown": 128, "film.film_film_distributor_relationship.distributor_film.film_film_distributor_relationship.film": 62, "organization.organization.place_founded.2_organization.organization.place_founded.1": 167, "education.education.institution_education.education.start_date": 241, "people.ethnicity.included_in_group.2_people.ethnicity.included_in_group.1": 316, "fashion.clothing_size.region_fashion.clothing_size.person": 631, "business.defunct_company.reason_for_ceasing_operations.2_business.defunct_company.reason_for_ceasing_operations.1": 438, "business.consumer_product.external_reviews.1_business.consumer_product.external_reviews.2": 643, "finance.exchange_rate.source_of_exchange_finance.exchange_rate.date_of_rate": 121, "business.business_location.address.inverse_location.mailing_address.postal_code": 495, "people.place_lived.person_people.place_lived.end_date": 359, "education.education.specialization_education.education.degree": 443, "film.personal_film_appearance.type_of_appearance_film.personal_film_appearance.film": 368, "film.film_film_company_relationship.film_company_film.film_film_company_relationship.role_service": 571, "business.acquisition.company_acquired_business.acquisition.date": 348, "people.marriage.location_of_ceremony_people.marriage.type_of_union": 250, "people.profession.corresponding_type.1_people.profession.corresponding_type.2": 696, "business.business_operation.net_profit.inverse_measurement_unit.dated_money_value.source": 697, "fashion.clothing_size.article_of_clothing_fashion.clothing_size.region": 616, "organization.organization_board_membership.role_organization.organization_board_membership.member": 330, "education.education.student_education.education.start_date": 304, "people.marriage.location_of_ceremony_people.marriage.from": 240, "people.place_lived.person_people.place_lived.location": 12, "people.person.nationality.1_people.person.nationality.2": 4, "organization.leadership.person_organization.leadership.organization": 133, "business.shopping_center.number_of_stores.inverse_measurement_unit.dated_integer.year": 664, "organization.leadership.person_organization.leadership.from": 334, "film.film_film_distributor_relationship.distributor_film.film_film_distributor_relationship.film_cut": 532, "business.competitive_space.market_size.inverse_business.market_size.currency": 644, "business.product_theme.products.2_business.product_theme.products.1": 473, "organization.organization_board_membership.member_organization.organization_board_membership.role": 331, "business.market_share.space_business.market_share.date": 698, "film.film_film_distributor_relationship.distributor_film.film_film_distributor_relationship.year": 362, "business.company_product_line_relationship.product_line_business.company_product_line_relationship.from_date": 699, "people.profession.corresponding_type.2_people.profession.corresponding_type.1": 700, "business.shopping_center.address.inverse_location.mailing_address.citytown": 260, "measurement_unit.dated_money_value.source_business.business_operation.revenue.inverse": 665, "location.cn_autonomous_county.designated_minority.2_location.cn_autonomous_county.designated_minority.1": 474, "people.marriage.type_of_union_people.marriage.from": 116, "film.film_film_company_relationship.film_film.film_film_company_relationship.film_cut": 701, "business.stock_ticker_symbol.stock_exchange_business.stock_ticker_symbol.stock_exchange": 681, "business.product_line.parent_product_line.2_business.product_line.parent_product_line.1": 579, "business.business_operation.industry.1_business.business_operation.industry.2": 89, "film.dubbing_performance.film_film.dubbing_performance.actor": 323, "business.product_endorsement.endorser_business.product_endorsement.product": 461, "venture_capital.venture_investment.currency_venture_capital.venture_investment.investor": 600, "film.film_regional_release_date.film_release_region_film.film_regional_release_date.film": 22, "people.person.date_of_birth.1_people.person.date_of_birth.2": 54, "business.acquisition.company_acquired_business.acquisition.divisions_formed": 564, "education.education.student_education.education.degree": 186, "venture_capital.venture_investment.currency_venture_capital.venture_investment.investment_round": 645, "business.product_endorsement.endorsee_business.product_endorsement.product": 547, "business.stock_ticker_symbol.stock_exchange_business.stock_ticker_symbol.end_date": 625, "film.film_festival.individual_festivals.1_film.film_festival.individual_festivals.2": 352, "film.film.edited_by.2_film.film.edited_by.1": 176, "film.film.cinematography.2_film.film.cinematography.1": 174, "film.film_festival.location.2_film.film_festival.location.1": 342, "organization.organization.legal_structure.2_organization.organization.legal_structure.1": 255, "people.human_measurement.body_part_people.human_measurement.person": 509, "film.film_song_relationship.film_film.film_song_relationship.composition": 561, "film.film.directed_by.2_film.film.directed_by.1": 59, "organization.organization.advisors.1_organization.organization.advisors.2": 326, "organization.organization_board_membership.member_organization.organization_board_membership.from": 272, "people.person.place_of_birth.1_people.person.place_of_birth.2": 2, "film.film_film_distributor_relationship.region_film.film_film_distributor_relationship.film": 320, "business.asset_ownership.owned_asset_business.asset_ownership.to": 562, "organization.organization_merger.organizations_merging_organization.organization_merger.organizations_merging": 366, "business.company_brand_relationship.brand_business.company_brand_relationship.from_date": 572, "people.human_measurement.person_people.human_measurement.body_part": 510, "film.film_featured_song.performed_by.1_film.film_featured_song.performed_by.2": 412, "business.business_operation.revenue.inverse_measurement_unit.dated_money_value.valid_date": 185, "measurement_unit.dated_money_value.currency_business.business_operation.market_capitalization.inverse": 702, "measurement_unit.dated_money_value.source_film.film.estimated_budget.inverse": 703, "location.cn_autonomous_prefecture.designated_minority.2_location.cn_autonomous_prefecture.designated_minority.1": 540, "film.film_regional_release_date.film_film.film_regional_release_date.film_release_region": 23, "media_common.quotation.author.1_media_common.quotation.author.2": 67, "business.shopping_center.address.inverse_location.mailing_address.country": 328, "education.education.specialization_education.education.specialization": 494, "people.person.profession.1_people.person.profession.2": 8, "film.film_song_relationship.performers_film.film_song_relationship.film": 545, "business.business_operation.industry.2_business.business_operation.industry.1": 90, "film.film_crew_gig.crewmember_film.film_crew_gig.film_crew_role": 178, "business.business_location.hours.inverse_business.open_times.weekday_start": 541, "organization.organization_board_membership.organization_organization.organization_board_membership.from": 216, "measurement_unit.dated_money_value.currency_venture_capital.venture_funded_company.total_venture_investment_raised.inverse": 548, "film.dubbing_performance.film_film.dubbing_performance.character": 340, "business.employer.number_of_employees.inverse_measurement_unit.dated_integer.year": 305, "film.film_film_distributor_relationship.film_film.film_film_distributor_relationship.region": 321, "people.person.place_of_birth.2_people.person.place_of_birth.1": 3, "film.film_film_distributor_relationship.distributor_film.film_film_distributor_relationship.region": 394, "organization.organization.organization_type.1_organization.organization.organization_type.2": 268, "venture_capital.venture_investment.company_venture_capital.venture_investment.investment_round": 414, "people.family.rise_to_prominence.2_people.family.rise_to_prominence.1": 704, "venture_capital.venture_investment.investment_amount_venture_capital.venture_investment.currency": 705, "education.education.institution_education.education.end_date": 138, "film.film_film_company_relationship.role_service_film.film_film_company_relationship.film": 501, "organization.organization.organization_type.2_organization.organization.organization_type.1": 267, "business.company_product_line_relationship.company_business.company_product_line_relationship.to_date": 706, "location.mailing_address.state_province_region_location.mailing_address.country": 436, "film.performance.special_performance_type_film.performance.special_performance_type": 608, "film.film_crew_gig.film_film.film_crew_gig.crewmember": 52, "film.film.gross_revenue.inverse_measurement_unit.dated_money_value.source": 646, "film.film.featured_film_locations.1_film.film.featured_film_locations.2": 153, "film.film_film_distributor_relationship.film_film.film_film_distributor_relationship.year": 365, "film.film_film_distributor_relationship.film_cut_film.film_film_distributor_relationship.year": 534, "organization.organization.locations.2_organization.organization.locations.1": 112, "award.award_honor.award_award.award_honor.honored_for": 142, "film.film_song_relationship.film_film.film_song_relationship.performers": 546, "freebase.user_profile.person.2_freebase.user_profile.person.1": 447, "people.family.founder.1_people.family.founder.2": 484, "location.mailing_address.postal_code_location.mailing_address.country": 269, "organization.organization_partnership.members.1_organization.organization_partnership.members.2": 410, "business.competitive_space_mediator.brand_business.competitive_space_mediator.company": 525, "organization.organization.sectors.2_organization.organization.sectors.1": 190, "film.film_festival.focus.2_film.film_festival.focus.1": 511, "film.film_film_distributor_relationship.region_film.film_film_distributor_relationship.year": 409, "education.education.institution_education.education.student": 6, "DUMMY_MEM": 720, "organization.organization.headquarters.inverse_location.mailing_address.country": 101, "business.company_brand_relationship.brand_business.company_brand_relationship.to_date": 617, "measurement_unit.dated_money_value.currency_business.business_operation.net_profit.inverse": 183, "people.deceased_person.cause_of_death.2_people.deceased_person.cause_of_death.1": 84, "film.dubbing_performance.film_film.dubbing_performance.language": 380, "film.film_film_distributor_relationship.film_cut_film.film_film_distributor_relationship.film_distribution_medium": 595, "organization.email_contact.category_organization.organization.email.inverse": 647, "people.person.gender.1_people.person.gender.2": 39, "education.education.minor_education.education.specialization": 516, "finance.exchange_rate.target_of_exchange_finance.exchange_rate.source_of_exchange": 536, "film.content_rating_system.jurisdiction.1_film.content_rating_system.jurisdiction.2": 526, "education.education.degree_education.education.specialization": 444, "film.content_rating.film_rating_system.2_film.content_rating.film_rating_system.1": 573, "film.performance.film_film.performance.actor": 10, "business.business_operation.operating_income.inverse_measurement_unit.dated_money_value.currency": 106, "business.product_endorsement.endorser_business.product_endorsement.endorsee": 476, "business.employer.number_of_employees.inverse_measurement_unit.dated_integer.source": 632, "film.film_regional_release_date.film_release_region_film.film_regional_release_date.film_regional_debut_venue": 123, "business.stock_ticker_symbol.issue_business.stock_ticker_symbol.stock_exchange": 73, "finance.exchange_rate.source_of_exchange_finance.exchange_rate.target_of_exchange": 537, "film.film.genre.2_film.film.genre.1": 30, "education.education.minor_education.education.minor": 529, "organization.organization.phone_number.2_organization.organization.phone_number.1": 223, "organization.organization.locations.1_organization.organization.locations.2": 113, "organization.organization_relationship.parent_organization.organization_relationship.to": 401, "business.company_brand_relationship.company_business.company_brand_relationship.from_date": 469, "people.deceased_person.cause_of_death.1_people.deceased_person.cause_of_death.2": 85, "award.award_honor.honored_for_award.award_honor.honored_for": 325, "business.consumer_product.external_reviews.2_business.consumer_product.external_reviews.1": 648, "film.film.costume_design_by.2_film.film.costume_design_by.1": 214, "film.film_film_company_relationship.role_service_film.film_film_company_relationship.film_company": 574, "film.film.production_companies.2_film.film.production_companies.1": 108, "location.cn_autonomous_county.designated_minority.1_location.cn_autonomous_county.designated_minority.2": 475, "education.education.minor_education.education.degree": 470, "business.acquisition.divisions_formed_business.acquisition.acquiring_company": 496, "people.place_lived.location_people.place_lived.start_date": 245, "measurement_unit.dated_integer.source_measurement_unit.dated_integer.year": 565, "film.film.locations.1_film.film.locations.2": 549, "business.business_operation.assets.inverse_measurement_unit.dated_money_value.source": 682, "education.education.institution_education.education.major_field_of_study": 137, "business.employment_tenure.company_business.employment_tenure.from": 103, "film.film.locations.2_film.film.locations.1": 550, "film.film.directed_by.1_film.film.directed_by.2": 60, "organization.organization_merger.organizations_merging_organization.organization_merger.date": 399, "film.film.story_by.2_film.film.story_by.1": 156, "film.film.film_casting_director.2_film.film.film_casting_director.1": 228, "business.market_share.space_business.market_share.region": 662, "film.film.rating.2_film.film.rating.1": 217, "business.business_operation.operating_income.inverse_measurement_unit.dated_money_value.valid_date": 210, "location.mailing_address.citytown_business.business_location.address.inverse": 127, "location.mailing_address.state_province_region_business.business_location.address.inverse": 114, "business.company_product_relationship.company_business.company_product_relationship.to_date": 605, "award.award_honor.award_winner_award.award_honor.award_winner": 55, "film.film.edited_by.1_film.film.edited_by.2": 177, "venture_capital.venture_investment.company_venture_capital.venture_investment.date": 404, "UNK": 1, "film.film.featured_song.1_film.film.featured_song.2": 387, "organization.organization_relationship.child_organization.organization_relationship.to": 416, "business.sponsorship.sponsored_by_business.sponsorship.from": 434, "venture_capital.venture_investment.investment_round_venture_capital.venture_investment.investor": 396, "award.award_honor.award_winner_award.award_honor.ceremony": 99, "film.film.film_format.1_film.film.film_format.2": 318, "business.market_size.region_business.market_size.currency": 683, "measurement_unit.dated_money_value.source_business.business_operation.operating_income.inverse": 666, "organization.leadership.person_organization.leadership.role": 92, "film.film_festival.individual_festivals.2_film.film_festival.individual_festivals.1": 353, "award.award_honor.ceremony_award.award_honor.award_winner": 100, "film.film_regional_release_date.film_release_distribution_medium_film.film_regional_release_date.release_date": 139, "film.performance.actor_film.performance.special_performance_type": 163, "film.film_festival_sponsorship.festival_film.film_festival_sponsorship.from": 684, "film.film.film_format.2_film.film.film_format.1": 319, "organization.organization.sectors.1_organization.organization.sectors.2": 189, "education.education.major_field_of_study_education.education.degree": 295, "business.product_theme.product_lines.2_business.product_theme.product_lines.1": 542, "measurement_unit.dated_money_value.currency_business.issue.market_capitalization.inverse": 207, "education.education.institution_education.education.minor": 407, "people.sibling_relationship.sibling_people.sibling_relationship.sibling": 58, "measurement_unit.dated_money_value.currency_film.film.estimated_budget.inverse": 131, "people.profession.part_of_professional_field.2_people.profession.part_of_professional_field.1": 483, "organization.organization_relationship.child_organization.organization_relationship.from": 372, "film.film.film_set_decoration_by.2_film.film.film_set_decoration_by.1": 237, "people.person.religion.2_people.person.religion.1": 37, "business.business_operation.current_liabilities.inverse_measurement_unit.dated_money_value.valid_date": 293, "measurement_unit.dated_money_value.currency_business.business_operation.cash.inverse": 230, "film.content_rating.country.1_film.content_rating.country.2": 423, "film.film_song_relationship.performers_film.film_song_relationship.performers": 609, "business.acquisition.divisions_formed_business.acquisition.company_acquired": 563, "business.open_times.time_zone_business.open_times.hour_end": 649, "education.education.minor_education.education.start_date": 512, "media_common.quotation.author.2_media_common.quotation.author.1": 68, "business.acquisition.divisions_formed_business.acquisition.date": 580, "business.open_times.time_zone_business.open_times.weekday_start": 667, "people.marriage.spouse_people.marriage.type_of_union": 69, "organization.leadership.role_organization.leadership.from": 244, "people.marriage.location_of_ceremony_people.marriage.to": 299, "measurement_unit.dated_money_value.currency_business.business_operation.current_liabilities.inverse": 202, "people.deceased_person.place_of_cremation.2_people.deceased_person.place_of_cremation.1": 403, "film.film_regional_release_date.film_release_distribution_medium_film.film_regional_release_date.film_release_region": 311, "education.education.degree_education.education.minor": 471, "business.product_line.parent_product_line.1_business.product_line.parent_product_line.2": 581, "film.film.film_casting_director.1_film.film.film_casting_director.2": 227, "film.film_song_relationship.composition_film.film_song_relationship.performers": 506, "film.film_film_distributor_relationship.distributor_film.film_film_distributor_relationship.film_distribution_medium": 392, "film.film_series.films_in_series.2_film.film_series.films_in_series.1": 313, "business.shopping_center.number_of_anchors.inverse_measurement_unit.dated_integer.year": 685, "film.film.film_collections.2_film.film.film_collections.1": 464, "fashion.clothing_size.region_fashion.clothing_size.article_of_clothing": 618, "measurement_unit.dated_money_value.currency_business.business_operation.liabilities.inverse": 191, "business.business_operation.major_customer.1_business.business_operation.major_customer.2": 477, "education.education.specialization_education.education.major_field_of_study": 389, "education.education.major_field_of_study_education.education.minor": 425, "business.industry.parent_industry.2_business.industry.parent_industry.1": 428, "business.business_location.hours.inverse_business.open_times.time_zone": 686, "people.marriage.spouse_people.marriage.spouse": 49, "business.market_size.currency_business.market_size.region": 687, "film.film.produced_by.1_film.film.produced_by.2": 80, "business.asset_ownership.owner_business.asset_ownership.from": 491, "film.performance.character_film.performance.actor": 27, "organization.organization_board_membership.member_organization.organization_board_membership.to": 346, "film.film.executive_produced_by.2_film.film.executive_produced_by.1": 145, "location.mailing_address.postal_code_location.mailing_address.state_province_region": 50, "business.sponsorship.sponsored_recipient_business.sponsorship.sponsored_by": 277, "film.film.estimated_budget.inverse_measurement_unit.dated_money_value.currency": 132, "organization.organization_relationship.parent_organization.organization_relationship.child": 24, "organization.organization_spin_off.parent_company_organization.organization_spin_off.date": 481, "film.film.genre.1_film.film.genre.2": 31, "organization.organization_board_membership.organization_organization.organization_board_membership.member": 77, "award.award_honor.award_award.award_honor.ceremony": 336, "business.business_location.address.inverse_location.mailing_address.country": 615, "film.film_cut.film_film.film_cut.type_of_film_cut": 286, "venture_capital.venture_investment.company_venture_capital.venture_investment.investor": 198, "business.consumer_product.msrp.inverse_measurement_unit.dated_money_value.currency": 273, "film.film_regional_release_date.film_film.film_regional_release_date.film_regional_debut_venue": 172, "organization.organization.headquarters.inverse_location.mailing_address.postal_code": 140, "business.product_line.brand.1_business.product_line.brand.2": 513, "organization.organization_board_membership.organization_organization.organization_board_membership.to": 291, "business.market_share.space_business.market_share.company": 619, "location.mailing_address.citytown_organization.organization.headquarters.inverse": 18, "business.consumer_product.brand.2_business.consumer_product.brand.1": 265, "film.dubbing_performance.character_film.dubbing_performance.film": 341, "business.competitive_space.market_size.inverse_business.market_size.region": 650, "venture_capital.venture_investment.investor_venture_capital.venture_investment.company": 199, "organization.organization_partnership.members.2_organization.organization_partnership.members.1": 411, "finance.stock_exchange.primary_regions.1_finance.stock_exchange.primary_regions.2": 518, "business.product_theme.product_lines.1_business.product_theme.product_lines.2": 543, "award.award_honor.honored_for_award.award_honor.award": 143, "measurement_unit.dated_money_value.currency_measurement_unit.dated_money_value.source": 455, "film.film_cut.type_of_film_cut_film.film_cut.film_release_region": 467, "award.award_honor.honored_for_award.award_honor.award_winner": 86, "people.appointment.appointed_role_people.appointment.appointed_by": 356, "education.education.degree_education.education.end_date": 383, "film.film_regional_release_date.film_release_distribution_medium_film.film_regional_release_date.film": 83, "location.mailing_address.citytown_location.mailing_address.country": 298, "people.family.members.1_people.family.members.2": 208, "location.mailing_address.postal_code_business.shopping_center.address.inverse": 551, "film.film_cut.film_release_region_film.film_cut.type_of_film_cut": 468, "business.trade_union.professions_represented.1_business.trade_union.professions_represented.2": 567, "organization.leadership.organization_organization.leadership.to": 338, "people.appointment.appointed_by_people.appointment.appointed_role": 357, "people.family_name.people_with_this_family_name.1_people.family_name.people_with_this_family_name.2": 503, "people.deceased_person.place_of_burial.1_people.deceased_person.place_of_burial.2": 151, "business.currency_sub_unit.currency.2_business.currency_sub_unit.currency.1": 688, "education.education.student_education.education.institution": 7, "organization.organization.committees.2_organization.organization.committees.1": 460, "organization.leadership.role_organization.leadership.organization": 97, "people.ethnicity.geographic_distribution.2_people.ethnicity.geographic_distribution.1": 275, "measurement_unit.dated_money_value.source_business.business_operation.assets.inverse": 689, "business.product_endorsement.endorsee_business.product_endorsement.from": 610, "education.education.degree_education.education.major_field_of_study": 296, "business.business_location.phone_number.1_business.business_location.phone_number.2": 527, "business.brand_colors.brand_business.brand_colors.colors": 519, "organization.organization.committees.1_organization.organization.committees.2": 459, "film.film.filming.inverse_measurement_unit.time_interval.end": 633, "business.issuer.issue.2_business.issuer.issue.1": 252, "business.company_product_line_relationship.company_business.company_product_line_relationship.product_line": 449, "film.film.film_festivals.1_film.film.film_festivals.2": 234, "location.cn_autonomous_region.designated_minority.2_location.cn_autonomous_region.designated_minority.1": 634, "business.asset_ownership.owned_asset_business.asset_ownership.owner": 374, "business.company_product_line_relationship.product_line_business.company_product_line_relationship.company": 450, "business.market_size.currency_business.market_size.valid_date": 668, "people.person.parents.2_people.person.parents.1": 41, "people.marriage.type_of_union_people.marriage.spouse": 70, "film.film_film_distributor_relationship.film_cut_film.film_film_distributor_relationship.distributor": 531, "film.film_series.films_in_series.1_film.film_series.films_in_series.2": 314, "film.film_crew_gig.film_crew_role_film.film_crew_gig.crewmember": 179, "business.business_operation.major_customer.2_business.business_operation.major_customer.1": 478, "organization.organization_board_membership.member_organization.organization_board_membership.organization": 78, "business.stock_ticker_symbol.issue_business.stock_ticker_symbol.issue": 690, "film.personal_film_appearance.person_film.personal_film_appearance.film": 75, "people.person.religion.1_people.person.religion.2": 38, "business.business_location.opening_date.1_business.business_location.opening_date.2": 651, "business.business_operation.cash.inverse_measurement_unit.dated_money_value.valid_date": 308, "people.ethnicity.languages_spoken.1_people.ethnicity.languages_spoken.2": 170, "people.person.profession.2_people.person.profession.1": 9, "location.mailing_address.citytown_location.mailing_address.postal_code": 65, "film.film_cut.film_release_region_film.film_cut.film": 164, "people.marriage.spouse_people.marriage.to": 120, "education.education.specialization_education.education.minor": 520, "business.product_line.products.1_business.product_line.products.2": 385, "business.employment_tenure.title_business.employment_tenure.from": 262, "business.company_product_relationship.company_business.company_product_relationship.consumer_product": 219, "business.product_line.products.2_business.product_line.products.1": 386, "business.business_operation.net_profit.inverse_measurement_unit.dated_money_value.valid_date": 242, "film.film.written_by.1_film.film.written_by.2": 96, "business.holding.holder_business.holding.as_of_date": 669, "business.open_times.weekday_start_business.open_times.time_zone": 674, "people.family.founder.2_people.family.founder.1": 485, "business.competitive_space_mediator.company_business.competitive_space_mediator.brand": 528, "organization.leadership.role_organization.leadership.person": 93, "award.award_honor.award_award.award_honor.year": 382, "business.shopping_center.owner.2_business.shopping_center.owner.1": 302, "film.film.soundtrack.2_film.film.soundtrack.1": 222, "location.mailing_address.state_province_region_location.mailing_address.citytown": 64, "location.mailing_address.postal_code_location.mailing_address.citytown": 66, "business.product_endorsement.product_business.product_endorsement.endorser": 462, "business.acquisition.acquiring_company_business.acquisition.company_acquired": 264, "organization.leadership.person_organization.leadership.to": 431, "people.marriage.spouse_people.marriage.location_of_ceremony": 168, "people.deceased_person.place_of_death.2_people.deceased_person.place_of_death.1": 16, "film.film_featured_song.performed_by.2_film.film_featured_song.performed_by.1": 413, "business.oil_field.year_discovered.1_business.oil_field.year_discovered.2": 709, "business.open_times.weekday_start_business.business_location.hours.inverse": 544, "film.dubbing_performance.actor_film.dubbing_performance.language": 258, "measurement_unit.dated_money_value.source_film.film.gross_revenue.inverse": 652, "venture_capital.venture_investment.investor_venture_capital.venture_investment.date": 351, "business.shopping_center.address.inverse_location.mailing_address.state_province_region": 332, "film.film.film_art_direction_by.1_film.film.film_art_direction_by.2": 196, "people.ethnicity.people.1_people.ethnicity.people.2": 48, "film.film_regional_release_date.film_regional_debut_venue_film.film_regional_release_date.film": 173, "film.personal_film_appearance.film_film.personal_film_appearance.person": 76, "business.competitive_space_mediator.brand_business.competitive_space_mediator.brand": 626, "measurement_unit.dated_money_value.currency_business.business_operation.retained_earnings.inverse": 193, "film.film_crew_gig.crewmember_film.film_crew_gig.film": 53, "business.open_times.time_zone_business.open_times.hour_start": 670, "business.company_product_line_relationship.company_business.company_product_line_relationship.from_date": 593, "business.employment_tenure.title_business.employment_tenure.person": 56, "people.person.gender.2_people.person.gender.1": 40, "film.performance.actor_film.performance.film": 11, "education.education.student_education.education.specialization": 426, "award.award_honor.ceremony_award.award_honor.award": 337, "film.film_crew_gig.film_crew_role_film.film_crew_gig.film": 45, "finance.exchange_rate.target_of_exchange_finance.exchange_rate.date_of_rate": 122, "people.american_indian_group.us_indian_reservations.1_people.american_indian_group.us_indian_reservations.2": 620, "film.film.language.1_film.film.language.2": 29, "organization.organization_relationship.parent_organization.organization_relationship.from": 358, "measurement_unit.dated_money_value.currency_business.business_operation.revenue.inverse": 104, "measurement_unit.dated_money_value.source_business.business_operation.net_profit.inverse": 710, "business.business_operation.revenue.inverse_measurement_unit.dated_money_value.source": 671, "business.stock_ticker_symbol.stock_exchange_business.stock_ticker_symbol.issue": 74, "education.education.major_field_of_study_education.education.specialization": 390, "people.family.country.1_people.family.country.2": 283, "people.place_lived.location_people.place_lived.person": 13, "location.mailing_address.country_business.shopping_center.address.inverse": 329, "film.dubbing_performance.language_film.dubbing_performance.film": 381, "business.company_brand_relationship.brand_business.company_brand_relationship.company": 288, "film.film.film_collections.1_film.film.film_collections.2": 465, "film.performance.film_film.performance.character": 33, "film.film_film_distributor_relationship.film_film.film_film_distributor_relationship.film_distribution_medium": 307, "film.film_cut.film_film.film_cut.film_release_region": 165, "award.award_honor.ceremony_award.award_honor.honored_for": 204, "measurement_unit.dated_money_value.currency_business.business_operation.assets.inverse": 110, "business.business_operation.current_liabilities.inverse_measurement_unit.dated_money_value.currency": 203, "film.film_regional_release_date.film_regional_debut_venue_film.film_regional_release_date.film_release_distribution_medium": 445, "business.competitive_space.related_industries.2_business.competitive_space.related_industries.1": 601, "film.film_film_company_relationship.film_cut_film.film_film_company_relationship.film": 711, "venture_capital.venture_funded_company.total_venture_investment_raised.inverse_measurement_unit.dated_money_value.currency": 552, "business.industry.parent_industry.1_business.industry.parent_industry.2": 429, "location.mailing_address.postal_code_organization.organization.headquarters.inverse": 141, "business.brand_colors.colors_business.brand_colors.to_date": 707, "film.film_regional_release_date.film_release_region_film.film_regional_release_date.release_date": 32, "film.film_regional_release_date.film_release_distribution_medium_film.film_regional_release_date.film_regional_debut_venue": 446, "education.education.degree_education.education.institution": 181, "business.competitive_space.related_industries.1_business.competitive_space.related_industries.2": 602, "business.business_operation.assets.inverse_measurement_unit.dated_money_value.currency": 111, "business.brand_colors.colors_business.brand_colors.from_date": 712, "film.film_regional_release_date.film_release_region_film.film_regional_release_date.film_release_distribution_medium": 312, "education.education.minor_education.education.end_date": 479, "film.film.story_by.1_film.film.story_by.2": 157, "people.profession.specializations.1_people.profession.specializations.2": 301, "business.consumer_product.official_webpage.2_business.consumer_product.official_webpage.1": 635, "business.product_category.product_lines.1_business.product_category.product_lines.2": 636, "business.sponsorship.sponsored_recipient_business.sponsorship.to": 530, "people.person.parents.1_people.person.parents.2": 42, "measurement_unit.dated_money_value.source_measurement_unit.dated_money_value.currency": 456, "organization.organization_relationship.child_organization.organization_relationship.parent": 25, "business.open_times.time_zone_business.open_times.weekday_end": 672, "business.trade_union.industry.1_business.trade_union.industry.2": 673, "people.appointment.appointed_by_people.appointment.appointee": 344, "fashion.clothing_size.person_fashion.clothing_size.region": 637, "measurement_unit.dated_money_value.currency_business.consumer_product.msrp.inverse": 274, "business.employment_tenure.person_business.employment_tenure.to": 239, "organization.organization_merger.became_organization_organization.organization_merger.date": 440, "education.education.minor_education.education.student": 454, "organization.organization.legal_structure.1_organization.organization.legal_structure.2": 256, "measurement_unit.dated_integer.source_business.employer.number_of_employees.inverse": 638, "business.business_operation.current_assets.inverse_measurement_unit.dated_money_value.valid_date": 294, "business.business_location.phone_number.2_business.business_location.phone_number.1": 524, "film.film_film_distributor_relationship.film_distribution_medium_film.film_film_distributor_relationship.film_cut": 597, "film.film_song_relationship.performers_film.film_song_relationship.composition": 507, "organization.organization_spin_off.child_company_organization.organization_spin_off.date": 486, "film.film_regional_release_date.film_film.film_regional_release_date.film_release_distribution_medium": 82, "business.company_brand_relationship.company_business.company_brand_relationship.to_date": 582, "education.education.minor_education.education.institution": 408, "location.mailing_address.country_location.mailing_address.state_province_region": 437, "people.cause_of_death.includes_causes_of_death.2_people.cause_of_death.includes_causes_of_death.1": 370, "film.performance.special_performance_type_film.performance.film": 247, "film.film_festival_sponsorship.festival_film.film_festival_sponsorship.sponsor": 583, "business.market_share.company_business.market_share.date": 627, "education.education.institution_education.education.degree": 182, "organization.leadership.organization_organization.leadership.from": 211, "location.mailing_address.country_location.mailing_address.citytown": 297, "award.award_honor.award_winner_award.award_honor.award": 71, "PAD": 0, "business.business_location.hours.inverse_business.open_times.weekday_end": 575, "education.education.major_field_of_study_education.education.major_field_of_study": 322, "education.education.specialization_education.education.end_date": 466, "film.film.film_set_decoration_by.1_film.film.film_set_decoration_by.2": 238, "venture_capital.venture_investment.investor_venture_capital.venture_investment.investment_round": 397, "film.film_regional_release_date.film_release_region_film.film_regional_release_date.film_release_region": 180, "business.employment_tenure.company_business.employment_tenure.person": 35, "business.business_location.address.inverse_location.mailing_address.state_province_region": 115, "finance.currency.countries_used.1_finance.currency.countries_used.2": 364, "business.holding.currency_business.holding.issue": 713, "film.dubbing_performance.language_film.dubbing_performance.character": 225, "film.film_festival.focus.1_film.film_festival.focus.2": 514, "business.product_endorsement.endorser_business.product_endorsement.from": 611, "organization.organization_spin_off.child_company_organization.organization_spin_off.parent_company": 432, "people.appointment.appointee_people.appointment.appointed_by": 345, "people.appointment.appointed_role_people.appointment.declared_on": 538, "business.market_share.company_business.market_share.space": 621, "film.personal_film_appearance.type_of_appearance_film.personal_film_appearance.person": 158, "location.cn_autonomous_prefecture.designated_minority.1_location.cn_autonomous_prefecture.designated_minority.2": 539, "award.award_honor.honored_for_award.award_honor.ceremony": 205, "film.film_regional_release_date.film_film.film_regional_release_date.release_date": 88, "business.defunct_company.reason_for_ceasing_operations.1_business.defunct_company.reason_for_ceasing_operations.2": 439, "business.sponsorship.sponsored_recipient_business.sponsorship.from": 487, "business.product_category.product_lines.2_business.product_category.product_lines.1": 639, "people.ethnicity.population.inverse_measurement_unit.dated_integer.year": 553, "organization.leadership.role_organization.leadership.to": 335, "film.film_film_distributor_relationship.film_cut_film.film_film_distributor_relationship.film": 521, "business.holding.currency_business.holding.holder": 714, "measurement_unit.dated_money_value.source_measurement_unit.dated_money_value.valid_date": 498, "film.performance.character_film.performance.film": 34, "business.product_endorsement.product_business.product_endorsement.endorsee": 554, "business.brand_colors.colors_business.brand_colors.colors": 589, "business.asset_ownership.owner_business.asset_ownership.owned_asset": 375, "business.business_operation.retained_earnings.inverse_measurement_unit.dated_money_value.valid_date": 287, "people.appointment.appointed_role_people.appointment.appointee": 419, "business.employment_tenure.person_business.employment_tenure.title": 57, "business.product_category.products.1_business.product_category.products.2": 587, "venture_capital.venture_investment.currency_venture_capital.venture_investment.company": 603, "venture_capital.venture_investment.company_venture_capital.venture_investment.currency": 604, "location.mailing_address.state_province_region_organization.organization.headquarters.inverse": 15, "film.dubbing_performance.actor_film.dubbing_performance.character": 361, "film.film_film_distributor_relationship.film_distribution_medium_film.film_film_distributor_relationship.year": 347, "business.product_endorsement.endorsee_business.product_endorsement.endorser": 480, "business.competitive_space_mediator.space_business.competitive_space_mediator.brand": 576, "film.film_film_company_relationship.film_film.film_film_company_relationship.film_company": 457, "location.mailing_address.state_province_region_location.mailing_address.postal_code": 51, "business.employment_tenure.title_business.employment_tenure.to": 289, "business.market_size.region_business.competitive_space.market_size.inverse": 653, "measurement_unit.dated_money_value.currency_business.business_operation.operating_income.inverse": 107, "business.employment_tenure.person_business.employment_tenure.company": 36, "business.business_operation.current_assets.inverse_measurement_unit.dated_money_value.currency": 201, "people.family_name.people_with_this_family_name.2_people.family_name.people_with_this_family_name.1": 504, "business.company_product_relationship.consumer_product_business.company_product_relationship.company": 220, "film.film.sequel.1_film.film.sequel.2": 279, "organization.organization_board_membership.role_organization.organization_board_membership.from": 398, "business.shopping_center.store.2_business.shopping_center.store.1": 232, "organization.leadership.organization_organization.leadership.person": 134, "business.asset_ownership.owner_business.asset_ownership.to": 568, "people.deceased_person.place_of_cremation.1_people.deceased_person.place_of_cremation.2": 402, "education.education.specialization_education.education.start_date": 490, "business.product_endorsement.endorser_business.product_endorsement.to": 606, "education.education.specialization_education.education.institution": 355, "freebase.user_profile.person.1_freebase.user_profile.person.2": 448, "business.acquisition.acquiring_company_business.acquisition.date": 292, "film.film.filming.inverse_measurement_unit.time_interval.start": 628, "business.market_size.currency_business.competitive_space.market_size.inverse": 654, "business.market_size.region_business.market_size.valid_date": 655, "location.mailing_address.citytown_business.shopping_center.address.inverse": 261, "business.brand_slogan.slogan_business.brand_slogan.brand": 559, "film.film_film_distributor_relationship.region_film.film_film_distributor_relationship.distributor": 395, "film.performance.film_film.performance.special_performance_type": 248, "film.performance.character_film.performance.special_performance_type": 310, "business.holding.holder_business.holding.currency": 715, "business.business_operation.retained_earnings.inverse_measurement_unit.dated_money_value.currency": 194, "business.market_share.region_business.market_share.space": 675, "finance.currency.countries_formerly_used.1_finance.currency.countries_formerly_used.2": 405, "film.film.rating.1_film.film.rating.2": 218, "business.employment_tenure.company_business.employment_tenure.to": 149, "business.business_operation.net_profit.inverse_measurement_unit.dated_money_value.currency": 184, "business.business_location.hours.inverse_business.open_times.hour_start": 555, "business.employment_tenure.company_business.employment_tenure.title": 44, "award.award_honor.award_award.award_honor.award_winner": 72, "film.dubbing_performance.actor_film.dubbing_performance.film": 324, "business.business_operation.assets.inverse_measurement_unit.dated_money_value.valid_date": 243, "education.education.minor_education.education.major_field_of_study": 427, "organization.organization_merger.became_organization_organization.organization_merger.organizations_merging": 378, "people.marriage.type_of_union_people.marriage.to": 117, "business.company_brand_relationship.company_business.company_brand_relationship.brand": 290, "finance.currency.countries_formerly_used.2_finance.currency.countries_formerly_used.1": 406, "education.education.student_education.education.major_field_of_study": 148, "business.product_line.introduced.1_business.product_line.introduced.2": 640, "business.currency_sub_unit.currency.1_business.currency_sub_unit.currency.2": 691, "business.holding.issue_business.holding.holder": 451, "people.person.nationality.2_people.person.nationality.1": 5, "film.film.estimated_budget.inverse_measurement_unit.dated_money_value.valid_date": 505, "venture_capital.venture_investment.currency_venture_capital.venture_investment.investment_amount": 716, "location.mailing_address.state_province_region_business.shopping_center.address.inverse": 333, "film.film.subjects.1_film.film.subjects.2": 155, "film.personal_film_appearance.film_film.personal_film_appearance.type_of_appearance": 367, "people.place_lived.person_people.place_lived.start_date": 339, "education.education.major_field_of_study_education.education.start_date": 315, "business.shopping_center.address.inverse_location.mailing_address.postal_code": 556, "people.deceased_person.date_of_cremation.1_people.deceased_person.date_of_cremation.2": 588, "film.film_film_distributor_relationship.region_film.film_film_distributor_relationship.region": 612, "venture_capital.venture_investment.investment_round_venture_capital.venture_investment.company": 415, "film.film_film_distributor_relationship.film_distribution_medium_film.film_film_distributor_relationship.distributor": 393, "film.dubbing_performance.language_film.dubbing_performance.actor": 259, "film.film_crew_gig.film_film.film_crew_gig.film_crew_role": 46, "film.dubbing_performance.character_film.dubbing_performance.language": 226, "business.market_share.region_business.market_share.company": 613, "film.film_festival.location.1_film.film_festival.location.2": 343, "film.content_rating.film_rating_system.1_film.content_rating.film_rating_system.2": 577, "film.film.film_production_design_by.2_film.film.film_production_design_by.1": 236, "people.family.rise_to_prominence.1_people.family.rise_to_prominence.2": 717, "people.appointment.appointed_by_people.appointment.declared_on": 371, "education.education.institution_education.education.specialization": 354, "business.trade_union.industry.2_business.trade_union.industry.1": 676, "business.acquisition.acquiring_company_business.acquisition.divisions_formed": 499, "people.marriage.location_of_ceremony_people.marriage.spouse": 169, "award.award_honor.award_winner_award.award_honor.honored_for": 87, "organization.leadership.organization_organization.leadership.role": 98, "organization.organization.email.inverse_organization.email_contact.category": 656, "business.consumer_product.brand.1_business.consumer_product.brand.2": 266, "education.education.specialization_education.education.student": 424, "business.company_product_relationship.consumer_product_business.company_product_relationship.from_date": 590, "business.company_name_change.company_business.company_name_change.start_date": 384, "education.education.major_field_of_study_education.education.end_date": 254, "film.film.film_production_design_by.1_film.film.film_production_design_by.2": 235, "film.film.costume_design_by.1_film.film.costume_design_by.2": 215, "venture_capital.venture_investment.investment_round_venture_capital.venture_investment.currency": 657, "organization.organization_board_membership.role_organization.organization_board_membership.organization": 350, "film.film.initial_release_date.1_film.film.initial_release_date.2": 188, "organization.organization_relationship.child_organization.organization_relationship.as_of_date": 213, "people.family.members.2_people.family.members.1": 209, "film.film_film_company_relationship.film_film.film_film_company_relationship.role_service": 502, "business.holding.holder_business.holding.issue": 452, "people.deceased_person.place_of_death.1_people.deceased_person.place_of_death.2": 17, "film.film.production_companies.1_film.film.production_companies.2": 109, "measurement_unit.dated_money_value.currency_film.film.gross_revenue.inverse": 417, "film.film_film_company_relationship.film_company_film.film_film_company_relationship.film": 458, "film.film_festival_sponsorship.sponsor_film.film_festival_sponsorship.festival": 584, "business.competitive_space_mediator.company_business.competitive_space_mediator.space": 376, "business.business_operation.liabilities.inverse_measurement_unit.dated_money_value.currency": 192, "people.person.languages.1_people.person.languages.2": 161, "film.film_regional_release_date.film_regional_debut_venue_film.film_regional_release_date.film_release_region": 124, "people.ethnicity.languages_spoken.2_people.ethnicity.languages_spoken.1": 171, "location.mailing_address.country_business.business_location.address.inverse": 622, "people.appointment.appointee_people.appointment.appointed_role": 420, "location.mailing_address.country_organization.organization.headquarters.inverse": 102, "people.ethnicity.geographic_distribution.1_people.ethnicity.geographic_distribution.2": 276, "business.business_operation.market_capitalization.inverse_measurement_unit.dated_money_value.currency": 718, "education.education.degree_education.education.start_date": 430, "people.marriage.spouse_people.marriage.from": 91, "film.film_film_distributor_relationship.film_distribution_medium_film.film_film_distributor_relationship.film": 306, "organization.organization.headquarters.inverse_location.mailing_address.citytown": 19, "business.shopping_center.owner.1_business.shopping_center.owner.2": 303, "finance.stock_exchange.primary_regions.2_finance.stock_exchange.primary_regions.1": 522, "organization.organization.advisors.2_organization.organization.advisors.1": 327, "film.film.sequel.2_film.film.sequel.1": 280, "organization.leadership.person_organization.leadership.as_of_date": 400, "people.human_measurement.person_people.human_measurement.date": 719, "education.education.student_education.education.end_date": 212, "film.film_film_distributor_relationship.region_film.film_film_distributor_relationship.film_distribution_medium": 489, "film.film.music.1_film.film.music.2": 119, "location.mailing_address.country_location.mailing_address.postal_code": 270, "organization.organization_spin_off.parent_company_organization.organization_spin_off.child_company": 433, "film.film.featured_song.2_film.film.featured_song.1": 388, "film.film_film_distributor_relationship.region_film.film_film_distributor_relationship.film_cut": 585, "education.education.degree_education.education.student": 187, "people.ethnicity.included_in_group.1_people.ethnicity.included_in_group.2": 317, "business.consumer_product.official_webpage.1_business.consumer_product.official_webpage.2": 641, "organization.organization.founders.1_organization.organization.founders.2": 126, "business.business_operation.revenue.inverse_measurement_unit.dated_money_value.currency": 105, "film.film.cinematography.1_film.film.cinematography.2": 175, "measurement_unit.dated_money_value.currency_measurement_unit.dated_money_value.valid_date": 271, "people.appointment.appointee_people.appointment.declared_on": 435, "organization.organization.geographic_scope.1_organization.organization.geographic_scope.2": 130, "film.personal_film_appearance.person_film.personal_film_appearance.type_of_appearance": 159, "organization.organization_merger.organizations_merging_organization.organization_merger.became_organization": 379, "film.film.gross_revenue.inverse_measurement_unit.dated_money_value.currency": 418, "people.deceased_person.date_of_burial.1_people.deceased_person.date_of_burial.2": 421, "business.sponsorship.sponsored_by_business.sponsorship.sponsored_recipient": 278, "film.film_regional_release_date.film_regional_debut_venue_film.film_regional_release_date.film_regional_debut_venue": 598} --------------------------------------------------------------------------------