├── .gitignore ├── .DS_Store ├── data └── .gitignore ├── exp └── .gitignore ├── exp_onto └── .gitignore ├── imgs ├── main.png └── model.pdf ├── run_onto.sh ├── run_gcn_onto.sh ├── baseline.sh ├── run.sh ├── run_label_gcn.sh ├── run_model_change.sh ├── run_regu.sh ├── inconsistency.sublime-project ├── LICENSE ├── README.md ├── resources └── constant.py ├── analysis.py ├── eval_metric.py ├── scorer.py ├── config_parser.py ├── label_corr.py ├── attention.py ├── model_utils.py ├── models.py ├── data_utils.py ├── baseline.py ├── main.py └── inconsistency.sublime-workspace /.gitignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | *.pyc 3 | *.npy 4 | *.json 5 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xwhan/Extremely-Fine-Grained-Entity-Typing/HEAD/.DS_Store -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /exp/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /exp_onto/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /imgs/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xwhan/Extremely-Fine-Grained-Entity-Typing/HEAD/imgs/main.png -------------------------------------------------------------------------------- /imgs/model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xwhan/Extremely-Fine-Grained-Entity-Typing/HEAD/imgs/model.pdf -------------------------------------------------------------------------------- /run_onto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=$1 python main.py $2 -lstm_type single -enhanced_mention -goal onto 4 | -------------------------------------------------------------------------------- /run_gcn_onto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=$1 python main.py $2 -lstm_type single -enhanced_mention -goal onto -gcn 4 | -------------------------------------------------------------------------------- /baseline.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=$1 python main.py $2 -lstm_type single -enhanced_mention -data_setup joint -add_crowd -multitask 4 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=$1 python main.py $2 -lstm_type single -enhanced_mention -data_setup joint -add_crowd -multitask -incon_w $3 -add_regu 4 | -------------------------------------------------------------------------------- /run_label_gcn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=1 python main.py $1 -lstm_type single -enhanced_mention -data_setup joint -add_crowd -multitask -gcn -model_debug -------------------------------------------------------------------------------- /run_model_change.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=$1 python main.py $2 -lstm_type single -enhanced_mention -data_setup joint -add_crowd -multitask -model_debug 4 | -------------------------------------------------------------------------------- /run_regu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=$1 python main.py $2 -lstm_type single -enhanced_mention -data_setup joint -add_crowd -multitask -incon_w $3 -add_regu -regu_steps $4 4 | -------------------------------------------------------------------------------- /inconsistency.sublime-project: -------------------------------------------------------------------------------- 1 | { 2 | "build_systems": 3 | [ 4 | { 5 | "file_regex": "^[ ]*File \"(...*?)\", line ([0-9]*)", 6 | "name": "Anaconda Python Builder", 7 | "selector": "source.python", 8 | "shell_cmd": "\"/Users/wenhan/anaconda2/bin/python\" -u \"$file\"" 9 | } 10 | ], 11 | "folders": 12 | [ 13 | { 14 | "path": "." 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Wenhan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Code for our NAACL 2019 paper: 2 | 3 | ## Imposing Label-Relational Inductive Bias for Extremely Fine-Grained Entity Typing 4 | 5 | Paper link: [http://arxiv.org/abs/1903.02591](http://arxiv.org/abs/1903.02591) 6 | 7 | Model Overview: 8 |

9 | 10 | ### Requirements 11 | * ``PyTorch 0.4.1`` 12 | * ``tensorboardX`` 13 | * ``tqdm`` 14 | * ``gluonnlp`` 15 | 16 | ### Running the code 17 | First prepare the dataset and embeddings 18 | * download data from [http://nlp.cs.washington.edu/entity_type/data/ultrafine_acl18.tar.gz](http://nlp.cs.washington.edu/entity_type/data/ultrafine_acl18.tar.gz), unzip if and put it under ``data/`` 19 | 20 | 21 | #### 1. Ultra-Fine experiments (10331 free-text labels and millions of training data) 22 | 23 | ##### Train the best model on Ultra-Fine 24 | ``` 25 | CUDA_VISIBLE_DEVICES=1 python main.py $RUN_ID$ -lstm_type single -model_debug -enhanced_mention -data_setup joint -add_crowd -multitask -gcn 26 | ``` 27 | 28 | ##### You can then test your saved model 29 | ``` 30 | CUDA_VISIBLE_DEVICES=1 python main.py $RUN_ID$ -lstm_type single -model_debug -enhanced_mention -data_setup joint -add_crowd -multitask -gcn -load -mode test -eval_data crowd/test.json 31 | ``` 32 | 33 | 34 | ##### Ablation experiments 35 | **a) w/o gcn** 36 | 37 | ``` 38 | CUDA_VISIBLE_DEVICES=1 python main.py $RUN_ID$ -lstm_type single -model_debug -enhanced_mention -data_setup joint -add_crowd -multitask 39 | ``` 40 | 41 | **b) w/o enhanced mention-context interaction** 42 | 43 | ``` 44 | CUDA_VISIBLE_DEVICES=1 python main.py $RUN_ID$ -lstm_type single -gcn -enhanced_mention -data_setup joint -add_crowd -multitask 45 | ``` 46 | 47 | 48 | #### 2. Experiments on OntoNotes 49 | **Training** 50 | 51 | ``` 52 | CUDA_VISIBLE_DEVICES=1 python main.py $RUN_ID$ -lstm_type single -enhanced_mention -goal onto -gcn 53 | ``` 54 | 55 | **Testing** 56 | 57 | ``` 58 | CUDA_VISIBLE_DEVICES=1 python main.py $RUN_ID$ -lstm_type single -enhanced_mention -goal onto -gcn -mode test -load -eval_data ontonotes/g_dev.json 59 | ``` 60 | 61 | #### Notes 62 | **The meaning of the arguments can be found in ``config_parser.py``** 63 | 64 | ### Acknowledgement 65 | We thank [Choi et al](https://homes.cs.washington.edu/~eunsol/papers/acl_18.pdf) for the release of the Ultra-Fine dataset and the basic model: [https://github.com/uwnlp/open_type](https://github.com/uwnlp/open_type). -------------------------------------------------------------------------------- /resources/constant.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple, defaultdict 2 | 3 | def load_vocab_dict(vocab_file_name, vocab_max_size=None, start_vocab_count=None): 4 | with open(vocab_file_name) as f: 5 | text = [x.strip() for x in f.readlines()] 6 | if vocab_max_size: 7 | text = text[:vocab_max_size] 8 | if start_vocab_count: 9 | file_content = dict(zip(text, range(0 + start_vocab_count, len(text) + start_vocab_count))) 10 | else: 11 | file_content = dict(zip(text, range(0, len(text)))) 12 | return file_content 13 | 14 | FILE_ROOT = 'data/release/' 15 | GLOVE_VEC = 'data/glove.840B.300d.txt' 16 | FASTTEXT_WIKI_VEC = '' 17 | FASTTEXT_CRAWL_VEC = '' 18 | EXP_ROOT = 'exp/' 19 | EXP_ROOT_ONTO = 'exp_onto/' 20 | 21 | ANSWER_NUM_DICT = {"open": 10331, "onto":89, "wiki": 4600, "kb":130, "gen":9} 22 | 23 | KB_VOCAB = load_vocab_dict(FILE_ROOT + "/ontology/types.txt", 130) 24 | WIKI_VOCAB = load_vocab_dict(FILE_ROOT + "/ontology/types.txt", 4600) 25 | ANSWER_VOCAB = load_vocab_dict(FILE_ROOT + "/ontology/types.txt") 26 | ONTO_ANS_VOCAB = load_vocab_dict(FILE_ROOT + '/ontology/onto_ontology.txt') 27 | ANS2ID_DICT = {"open": ANSWER_VOCAB, "wiki": WIKI_VOCAB, "kb": KB_VOCAB, "onto":ONTO_ANS_VOCAB} 28 | 29 | open_id2ans = {v: k for k, v in ANSWER_VOCAB.items()} 30 | wiki_id2ans = {v: k for k, v in WIKI_VOCAB.items()} 31 | kb_id2ans = {v:k for k,v in KB_VOCAB.items()} 32 | g_id2ans = {v: k for k, v in ONTO_ANS_VOCAB.items()} 33 | 34 | ID2ANS_DICT = {"open": open_id2ans, "wiki": wiki_id2ans, "kb": kb_id2ans, "onto":g_id2ans} 35 | label_string = namedtuple("label_types", ["head", "wiki", "kb"]) 36 | LABEL = label_string("HEAD", "WIKI", "KB") 37 | 38 | CHAR_DICT = defaultdict(int) 39 | char_vocab = [u""] 40 | with open(FILE_ROOT + "/ontology/char_vocab.english.txt") as f: 41 | char_vocab.extend(c.strip() for c in f.readlines()) 42 | CHAR_DICT.update({c: i for i, c in enumerate(char_vocab)}) 43 | 44 | import json 45 | 46 | pronouns_set = set(['he', 'I', 'they', 'him', 'it', 47 | 'himself', 'we','she', 'her', 'me', 'you', 'me', 'us', 'them', 'you', 'themselves','itself']) 48 | 49 | pronoun_index_dev = [] 50 | else_index_dev = [] 51 | with open(FILE_ROOT + 'crowd/dev.json') as f: 52 | line_elems = [json.loads(sent.strip()) for sent in f.readlines()] 53 | mention_seq = [line_elem["mention_span"].split() for line_elem in line_elems] 54 | 55 | for index, mention in enumerate(mention_seq): 56 | if ' '.join(mention).strip().lower() in pronouns_set: 57 | pronoun_index_dev.append(index) 58 | else: 59 | else_index_dev.append(index) 60 | 61 | pronoun_index_test = [] 62 | with open(FILE_ROOT + 'crowd/test.json') as f: 63 | line_elems = [json.loads(sent.strip()) for sent in f.readlines()] 64 | mention_seq = [line_elem["mention_span"].split() for line_elem in line_elems] 65 | 66 | for index, mention in enumerate(mention_seq): 67 | if ' '.join(mention).strip().lower() in pronouns_set: 68 | pronoun_index_test.append(index) 69 | 70 | if __name__ == '__main__': 71 | print(len(pronoun_index_dev)) -------------------------------------------------------------------------------- /analysis.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | sys.path.insert(0, './resources/') 4 | import constant 5 | from model_utils import metric_dicts 6 | import numpy as np 7 | from eval_metric import mrr 8 | import random 9 | 10 | with open(constant.FILE_ROOT + 'crowd/dev.json') as f: 11 | line_elems = [json.loads(sent.strip()) for sent in f.readlines()] 12 | left_seq = [line_elem['left_context_token'] for line_elem in line_elems] 13 | mention_seq = [line_elem["mention_span"].split() for line_elem in line_elems] 14 | right_seq = [line_elem['right_context_token'] for line_elem in line_elems] 15 | seqs = [i+j+k for i,j,k in list(zip(left_seq, mention_seq, right_seq))] 16 | y_str_list = [line_elem['y_str'] for line_elem in line_elems] 17 | 18 | 19 | 20 | # index = random.sample(range(len(seqs)), 1)[0] 21 | # print(' '.join(seqs[index])) 22 | # print(' '.join(mention_seq[index])) 23 | # print(y_str_list[index]) 24 | 25 | gold_and_pred = json.load(open('best_predictions.json')) 26 | # gold_and_pred = json.load(open('nogcn_predictions.json')) 27 | # probs = np.load('nointer_probs.npy') 28 | # y = np.load('nointer_y.npy') 29 | 30 | general_types = set(['person', 'group', 'organization', 'location', 'entity', 'time', 'object', 'place', 'event']) 31 | 32 | # # error analysis 33 | # ps = [] 34 | # rs = [] 35 | # for true, pred in gold_and_pred: 36 | # if pred: 37 | # trueset = set(true) - general_types 38 | # predset = set(pred) - general_types 39 | # if len(trueset) == 0: 40 | # continue 41 | # if len(predset) == 0: 42 | # ps.append(0) 43 | # rs.append(0) 44 | # continue 45 | 46 | # p = len(predset.intersection(trueset)) / float(len(predset)) 47 | # r = len(predset.intersection(trueset)) / float(len(trueset)) 48 | # ps.append(p) 49 | # rs.append(r) 50 | # else: 51 | # print('empty') 52 | 53 | # print(np.mean(ps)) 54 | # print(rs) 55 | 56 | # find the samples with pronouns 57 | pronouns_results = [] 58 | else_results = [] 59 | pronoun_probs = [] 60 | else_probs = [] 61 | pronoun_ys = [] 62 | else_ys = [] 63 | pronouns_set = set(['he', 'I', 'they', 'him', 'it', 64 | 'himself', 'we','she', 'her', 'me', 'you', 'me', 'us', 'them', 'you', 'themselves','itself']) 65 | for index, mention in enumerate(mention_seq): 66 | true, pred = gold_and_pred[index] 67 | trueset = set(true) - general_types 68 | predset = set(pred) - general_types 69 | if ' '.join(mention).strip().lower() in pronouns_set: 70 | pronouns_results.append([list(trueset), list(predset)]) 71 | # pronoun_probs.append(probs[index,:]) 72 | # pronoun_ys.append(y[index,:]) 73 | else: 74 | else_results.append([list(trueset), list(predset)]) 75 | # else_probs.append(probs[index,:]) 76 | # else_ys.append(y[index,:]) 77 | 78 | # print(pronoun_ys) 79 | 80 | _, output = metric_dicts(gold_and_pred) 81 | print('overall:', output) 82 | 83 | _, output = metric_dicts(pronouns_results) 84 | print('pronouns:', output) 85 | 86 | _, output = metric_dicts(else_results) 87 | print('else:', output) 88 | -------------------------------------------------------------------------------- /eval_metric.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def f1(p, r): 4 | if r == 0.: 5 | return 0. 6 | return 2 * p * r / float(p + r) 7 | 8 | def strict(true_and_prediction): 9 | num_entities = len(true_and_prediction) 10 | correct_num = 0. 11 | for true_labels, predicted_labels in true_and_prediction: 12 | correct_num += set(true_labels) == set(predicted_labels) 13 | precision = recall = correct_num / num_entities 14 | return precision, recall, f1(precision, recall) 15 | 16 | def macro(true_and_prediction): 17 | num_examples = len(true_and_prediction) 18 | p = 0. 19 | r = 0. 20 | pred_example_count = 0. 21 | pred_label_count = 0. 22 | gold_label_count = 0. 23 | for true_labels, predicted_labels in true_and_prediction: 24 | if predicted_labels: 25 | pred_example_count += 1 26 | pred_label_count += len(predicted_labels) 27 | per_p = len(set(predicted_labels).intersection(set(true_labels))) / float(len(predicted_labels)) 28 | p += per_p 29 | if len(true_labels): 30 | gold_label_count += 1 31 | per_r = len(set(predicted_labels).intersection(set(true_labels))) / float(len(true_labels)) 32 | r += per_r 33 | if pred_example_count > 0: 34 | precision = p / pred_example_count 35 | if gold_label_count > 0: 36 | recall = r / gold_label_count 37 | avg_elem_per_pred = pred_label_count / pred_example_count 38 | return num_examples, pred_example_count, avg_elem_per_pred, precision, recall, f1(precision, recall) 39 | 40 | 41 | def micro(true_and_prediction): 42 | num_examples = len(true_and_prediction) 43 | num_predicted_labels = 0. 44 | num_true_labels = 0. 45 | num_correct_labels = 0. 46 | pred_example_count = 0. 47 | for true_labels, predicted_labels in true_and_prediction: 48 | if predicted_labels: 49 | pred_example_count += 1 50 | num_predicted_labels += len(predicted_labels) 51 | num_true_labels += len(true_labels) 52 | num_correct_labels += len(set(predicted_labels).intersection(set(true_labels))) 53 | if pred_example_count == 0: 54 | return num_examples, 0, 0, 0, 0, 0 55 | precision = num_correct_labels / num_predicted_labels 56 | recall = num_correct_labels / num_true_labels 57 | avg_elem_per_pred = num_predicted_labels / pred_example_count 58 | return num_examples, pred_example_count, avg_elem_per_pred, precision, recall, f1(precision, recall) 59 | 60 | def mrr(dist_list, gold): 61 | """ 62 | dist_list: list of list of label probability for all labels. 63 | gold: list of gold indexes. 64 | 65 | Get mean reciprocal rank. (this is slow, as have to sort for 10K vocab) 66 | """ 67 | mrr_per_example = [] 68 | dist_arrays = np.array(dist_list) 69 | dist_sorted = np.argsort(-dist_arrays, axis=1) 70 | for ind, gold_i in enumerate(gold): 71 | gold_i_where = [i for i in range(len(gold_i)) if gold_i[i] == 1] 72 | rr_per_array = [] 73 | sorted_index = dist_sorted[ind, :] 74 | for gold_i_where_i in gold_i_where: 75 | for k in range(len(sorted_index)): 76 | if sorted_index[k] == gold_i_where_i: 77 | rr_per_array.append(1.0 / (k + 1)) 78 | mrr_per_example.append(np.mean(rr_per_array)) 79 | return sum(mrr_per_example) * 1.0 / len(mrr_per_example) 80 | 81 | -------------------------------------------------------------------------------- /scorer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json, sys, pickle 3 | from eval_metric import mrr, macro 4 | 5 | def stratify(all_labels, types): 6 | """ 7 | Divide label into three categories. 8 | """ 9 | coarse = types[:9] 10 | fine = types[9:130] 11 | return ([l for l in all_labels if l in coarse], 12 | [l for l in all_labels if ((l in fine) and (not l in coarse))], 13 | [l for l in all_labels if (not l in coarse) and (not l in fine)]) 14 | 15 | def get_mrr(pred_fname): 16 | dicts = pickle.load(open(pred_fname, "rb")) 17 | mrr_value = mrr(dicts['pred_dist'], dicts['gold_id_array']) 18 | return mrr_value 19 | 20 | def compute_prf1(fname): 21 | with open(fname) as f: 22 | total = json.load(f) 23 | true_and_predictions = [] 24 | for k, v in total.items(): 25 | true_and_predictions.append((v['gold'], v['pred'])) 26 | count, pred_count, avg_pred_count, p, r, f1 = macro(true_and_predictions) 27 | perf_total = "{0}\t{1:.2f}\tP:{2:.1f}\tR:{3:.1f}\tF1:{4:.1f}".format(count, avg_pred_count, p * 100, 28 | r * 100, f1 * 100) 29 | print(perf_total) 30 | 31 | def compute_granul_prf1(fname, type_fname): 32 | with open(fname) as f: 33 | total = json.load(f) 34 | coarse_true_and_predictions = [] 35 | fine_true_and_predictions = [] 36 | finer_true_and_predictions = [] 37 | with open(type_fname) as f: 38 | types = [x.strip() for x in f.readlines()] 39 | for k, v in total.items(): 40 | coarse_gold, fine_gold, finer_gold = stratify(v['gold'], types) 41 | coarse_pred, fine_pred, finer_pred = stratify(v['pred'], types) 42 | coarse_true_and_predictions.append((coarse_gold, coarse_pred)) 43 | fine_true_and_predictions.append((fine_gold, fine_pred)) 44 | finer_true_and_predictions.append((finer_gold, finer_pred)) 45 | 46 | for true_and_predictions in [coarse_true_and_predictions, fine_true_and_predictions, finer_true_and_predictions]: 47 | count, pred_count, avg_pred_count, p, r, f1 = macro(true_and_predictions) 48 | perf = "{0}\t{1:.2f}\tP:{2:.1f}\tR:{3:.1f}\tF1:{4:.1f}".format(count, avg_pred_count, p * 100, 49 | r * 100, f1 * 100) 50 | print(perf) 51 | 52 | def load_augmented_input(fname): 53 | output_dict = {} 54 | with open(fname) as f: 55 | for line in f: 56 | elem = json.loads(line.strip()) 57 | mention_id = elem.pop("annot_id") 58 | output_dict[mention_id] = elem 59 | return output_dict 60 | 61 | def visualize(gold_pred_fname, original_fname, type_fname): 62 | with open(gold_pred_fname) as f: 63 | total = json.load(f) 64 | original = load_augmented_input(original_fname) 65 | with open(type_fname) as f: 66 | types = [x.strip() for x in f.readlines()] 67 | for annot_id, v in total.items(): 68 | elem = original[annot_id] 69 | mention = elem['mention_span'] 70 | left = elem['left_context_token'] 71 | right = elem['right_context_token'] 72 | text_str = ' '.join(left)+" __"+mention+"__ "+' '.join(right) 73 | gold = v['gold'] 74 | print(' | '.join([text_str, ', '.join([("__"+v+"__" if v in gold else v )for v in v['pred']]), ','.join(gold)])) 75 | 76 | if __name__ == '__main__': 77 | gold_pred_str_fname = sys.argv[1]+'.json' 78 | mrr_fname = sys.argv[1]+'.p' 79 | type_fname = './resources/types.txt' 80 | # compute mrr 81 | mrr_value = get_mrr(mrr_fname) 82 | print("MRR {0:.4f}".format(mrr_value)) 83 | # compute precision, recall, f1 84 | compute_prf1(gold_pred_str_fname) 85 | print('printing performance for coarse, fine, finer labels in order') 86 | compute_granul_prf1(gold_pred_str_fname, type_fname) 87 | -------------------------------------------------------------------------------- /config_parser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | def read_args(): 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("model_id", help="Identifier for model") 7 | # Data 8 | parser.add_argument("-train_data", help="Train data", default="ontonotes/augmented_train.json") 9 | parser.add_argument("-dev_data", help="Dev data", default="ontonotes/g_dev.json") 10 | parser.add_argument("-eval_data", help="Test data", default="ontonotes/g_test.json") 11 | # parser.add_argument("-num_epoch", help="The number of epoch", default=5000, type=int) 12 | parser.add_argument("-num_epoch", help="The number of epoch", default=5000, type=int) 13 | parser.add_argument("-batch_size", help="The batch size", default=1000, type=int) 14 | parser.add_argument("-eval_batch_size", help="The batch size", default=1998, type=int) 15 | parser.add_argument("-goal", help="Limiting vocab to smaller vocabs (either ontonote or figer)", default="open", 16 | choices=["open", "onto", "wiki", 'kb']) 17 | parser.add_argument("-seed", help="Pytorch random Seed", default=1888, type=int) 18 | parser.add_argument("-gpu", help="Using gpu or cpu", default=False, action="store_true") 19 | 20 | parser.add_argument("-embed_source", default='glove', type=str) 21 | parser.add_argument("-max_batch", default=50000, type=int) 22 | 23 | # learning 24 | parser.add_argument("-mode", help="Whether to train or test", default="train", choices=["train", "test", "visual"]) 25 | parser.add_argument("-learning_rate", help="start learning rate", default=0.001, type=float) 26 | parser.add_argument("-mention_dropout", help="drop out rate for mention", default=0.5, type=float) 27 | parser.add_argument("-input_dropout", help="drop out rate for sentence", default=0.2, type=float) 28 | parser.add_argument("-incon_w", default=0.2, type=float) 29 | parser.add_argument("-use_lr_schedule", action='store_true') 30 | parser.add_argument("-use_sparse_adam", action='store_true') 31 | 32 | # Data ablation study 33 | parser.add_argument("-add_crowd", help="Add indomain data as train", default=False, action='store_true') 34 | parser.add_argument("-data_setup", help="Whether to use joint data set-up", default="single", 35 | choices=["single", "joint"]) 36 | parser.add_argument("-only_crowd", help="Only using indomain data as train", default=False, action='store_true') 37 | parser.add_argument("-remove_el", help="Remove supervision from entity linking", default=False, action='store_true') 38 | parser.add_argument("-remove_open", help="Remove supervision from headwords", default=False, action='store_true') 39 | 40 | # Model 41 | parser.add_argument("-multitask", help="Using a multitask loss term.", default=False, action='store_true') 42 | parser.add_argument("-enhanced_mention", help="Use attention and cnn for mention representation", default=False, action='store_true') 43 | parser.add_argument("-lstm_type", default="two", choices=["two", "single"]) 44 | parser.add_argument("-dim_hidden", help="The number of hidden dimension.", default=100, type=int) 45 | parser.add_argument("-rnn_dim", help="The number of RNN dimension.", default=100, type=int) 46 | # Save / log related 47 | # parser.add_argument("-save_period", help="How often to save", default=5000, type=int) 48 | parser.add_argument("-eval_period", help="How often to run dev", default=1000, type=int) 49 | parser.add_argument("-log_period", help="How often to save", default=1000, type=int) 50 | 51 | parser.add_argument("-load", help="Load existing model.", action='store_true') 52 | parser.add_argument("-reload_model_name", help="") 53 | 54 | # debugging model architextures 55 | parser.add_argument("-model_debug", action='store_true') 56 | parser.add_argument("-gcn", action='store_true', help='whether to use') 57 | parser.add_argument("-add_regu", action='store_true') 58 | parser.add_argument("-regu_steps", default=8000, type=int) 59 | parser.add_argument("-self_attn", action='store_true', help="replace LSTM with self-attention encoder") 60 | parser.add_argument("-label_prop", action='store_true', help='not useful anymore') 61 | parser.add_argument("-thresh", default=0.5, type=float) 62 | 63 | args = parser.parse_args() 64 | 65 | if args.goal == 'onto': 66 | args.eval_period = 50 67 | 68 | return args 69 | 70 | def get_logger(args): 71 | logger = logging.getLogger() 72 | logger.setLevel(logging.INFO) 73 | formatter = logging.Formatter('%(asctime)s %(levelname)s: - %(message)s', datefmt='%m-%d %H:%M:%S') 74 | fh = logging.FileHandler('./logs/{}.txt'.format(args.model_id), mode='w+') 75 | fh.setLevel(logging.INFO) 76 | fh.setFormatter(formatter) 77 | ch = logging.StreamHandler() 78 | ch.setLevel(logging.INFO) 79 | ch.setFormatter(formatter) 80 | logger.addHandler(ch) 81 | logger.addHandler(fh) 82 | 83 | logger.info("------HYPERPARAMETERS-------") 84 | for k, v in vars(args).items(): 85 | logger.info(k + ': ' + str(v)) 86 | logger.info("----------------------------") 87 | 88 | return logger 89 | -------------------------------------------------------------------------------- /label_corr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import itertools 3 | import json 4 | import sys 5 | from collections import defaultdict 6 | from tqdm import tqdm 7 | import gluonnlp 8 | from sklearn.metrics.pairwise import cosine_similarity 9 | from collections import defaultdict 10 | 11 | sys.path.insert(0, './resources') 12 | import constant 13 | 14 | def build_concurr_matrix(emb_name='fasttext', emb_source='wiki-news-300d-1M-subword', goal='open'): 15 | # def build_concurr_matrix(emb_name='glove', emb_source='glove.840B.300d', goal='open'): 16 | data_path = 'data/release/' 17 | # build the yid concurr matrix 18 | if goal == 'onto': 19 | label2id = constant.ANS2ID_DICT["onto"] 20 | id2label = constant.g_id2ans 21 | else: 22 | label2id = constant.ANS2ID_DICT["open"] 23 | id2label = constant.open_id2ans 24 | 25 | if goal != 'onto': 26 | print('Building label word embedding for open') 27 | words = [] 28 | for label in label2id.keys(): 29 | words += label.split('_') 30 | word_counter = gluonnlp.data.count_tokens(words) 31 | word_vocab = gluonnlp.Vocab(word_counter) 32 | embed = gluonnlp.embedding.create(emb_name, source=emb_source) 33 | word_vocab.set_embedding(embed) 34 | label_vectors = [] 35 | for id_ in range(len(label2id.keys())): 36 | label = id2label[id_] 37 | label_words = label.split('_') 38 | label_vectors.append(word_vocab.embedding[label_words].asnumpy().sum(0)) 39 | affinity = cosine_similarity(label_vectors) 40 | else: 41 | print("BOW features for ontonotes") 42 | words = [] 43 | for label in label2id.keys(): 44 | label = label.replace('/', ' ') 45 | labels = label.strip().split() 46 | words += labels 47 | word_counter = gluonnlp.data.count_tokens(words) 48 | word_vocab = gluonnlp.Vocab(word_counter) 49 | embed = gluonnlp.embedding.create(emb_name, source=emb_source) 50 | word_vocab.set_embedding(embed) 51 | 52 | label_list = [] 53 | label_vectors = [] 54 | for id_ in range(len(label2id.keys())): 55 | label = id2label[id_] 56 | label = label.replace('/', ' ') 57 | labels = label.strip().split() 58 | label_list.append(labels) 59 | label_vectors.append(word_vocab.embedding[labels].asnumpy().sum(0)) 60 | label_vectors = np.array(label_vectors) 61 | affinity = cosine_similarity(label_vectors) 62 | 63 | matrix = np.zeros((len(label2id.keys()), len(label2id.keys()))) 64 | if goal == 'onto': 65 | train_file_list = ['ontonotes/augmented_train.json'] 66 | else: 67 | train_file_list = ['distant_supervision/headword_train.json', 'distant_supervision/el_train.json', 'crowd/train_m.json'] 68 | 69 | type_count = defaultdict(int) 70 | for f_id, file in enumerate(train_file_list): 71 | file = data_path + file 72 | with open(file) as f: 73 | for sent in tqdm(f.readlines()): 74 | line_elem = json.loads(sent.strip()) 75 | y_strs = line_elem['y_str'] 76 | # y_ids = list(set([label2id[x] for x in y_strs if x in label2id])) 77 | for x in y_strs: 78 | type_count[x] += 1 79 | 80 | 81 | y_ids = [label2id[x] for x in y_strs if x in label2id] 82 | if len(y_ids) > 1: 83 | for (x, y) in itertools.combinations(y_ids, 2): 84 | # if x == y: 85 | # print(y_strs) 86 | # assert False 87 | matrix[x,y] = matrix[x,y] + 1 88 | 89 | 90 | # print(type_count['child']) 91 | # print(type_count['daughter']) 92 | # print(np.mean(list(type_count.values()))) 93 | 94 | # add self-connection 95 | matrix += np.identity(matrix.shape[0]) 96 | 97 | # print(len(concurr_labels)) 98 | # print(np.count_nonzero(matrix)/np.prod(matrix.shape)) 99 | target = np.tanh(np.log(matrix + 1e-8)) 100 | mask = (matrix == 0).astype(float) 101 | mask_inverse = (matrix > 0).astype(float) 102 | 103 | return matrix, affinity, target, mask, mask_inverse 104 | 105 | if __name__ == '__main__': 106 | co_occurence, _, _, _, _ = build_concurr_matrix() 107 | co_occurence = co_occurence - np.identity(co_occurence.shape[0]) 108 | print(np.max(co_occurence)) 109 | 110 | # id2label = constant.open_id2ans 111 | # label2id = constant.ANS2ID_DICT["open"] 112 | # person_id = label2id['person'] 113 | 114 | # # print(co_occurence[:10,:10]) 115 | 116 | # person_id_row = co_occurence[person_id, :] 117 | # label_freq = {} 118 | # inconsistent_pairs = [] 119 | # for index, value in enumerate(person_id_row): 120 | # if value != 0: 121 | # label_freq[id2label[index]] = value 122 | # else: 123 | # inconsistent_pairs.append(['person', id2label[index]]) 124 | # # print(label_freq) 125 | # print(inconsistent_pairs) 126 | 127 | # with open(z) -------------------------------------------------------------------------------- /attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | import copy 6 | import math 7 | 8 | def attention(query, key, value, mask=None, dropout=None): 9 | "Compute 'Scaled Dot Product Attention'" 10 | d_k = query.size(-1) 11 | scores = torch.matmul(query, key.transpose(-2, -1)) \ 12 | / math.sqrt(d_k) 13 | if mask is not None: 14 | scores = scores.masked_fill(mask == 0, -1e9) 15 | p_attn = F.softmax(scores, dim = -1) 16 | if dropout is not None: 17 | p_attn = dropout(p_attn) 18 | return torch.matmul(p_attn, value), p_attn 19 | 20 | 21 | def clones(module, N): 22 | "Produce N identical layers." 23 | return nn.ModuleList([copy.deepcopy(module) for _ in range(N)]) 24 | 25 | 26 | class MultiHeadedAttention(nn.Module): 27 | def __init__(self, h, d_model, dropout=0.1): 28 | "Take in model size and number of heads." 29 | super(MultiHeadedAttention, self).__init__() 30 | assert d_model % h == 0 31 | # We assume d_v always equals d_k 32 | self.d_k = d_model // h 33 | self.h = h 34 | self.linears = clones(nn.Linear(d_model, d_model), 4) 35 | self.attn = None 36 | self.dropout = nn.Dropout(p=dropout) 37 | 38 | def forward(self, query, key, value, mask=None): 39 | "Implements Figure 2" 40 | if mask is not None: 41 | # Same mask applied to all h heads. 42 | mask = mask.unsqueeze(1) 43 | nbatches = query.size(0) 44 | 45 | # 1) Do all the linear projections in batch from d_model => h x d_k 46 | query, key, value = \ 47 | [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2) 48 | for l, x in zip(self.linears, (query, key, value))] 49 | 50 | # 2) Apply attention on all the projected vectors in batch. 51 | x, self.attn = attention(query, key, value, mask=mask, 52 | dropout=self.dropout) 53 | 54 | # 3) "Concat" using a view and apply a final linear. 55 | x = x.transpose(1, 2).contiguous() \ 56 | .view(nbatches, -1, self.h * self.d_k) 57 | return self.linears[-1](x) 58 | 59 | class PositionwiseFeedForward(nn.Module): 60 | "Implements FFN equation." 61 | def __init__(self, d_model, d_ff, dropout=0.1): 62 | super(PositionwiseFeedForward, self).__init__() 63 | self.w_1 = nn.Linear(d_model, d_ff) 64 | self.w_2 = nn.Linear(d_ff, d_model) 65 | self.dropout = nn.Dropout(dropout) 66 | 67 | def forward(self, x): 68 | return self.w_2(self.dropout(F.relu(self.w_1(x)))) 69 | 70 | class PositionalEncoding(nn.Module): 71 | "Implement the PE function." 72 | def __init__(self, d_model, dropout, max_len=5000): 73 | super(PositionalEncoding, self).__init__() 74 | self.dropout = nn.Dropout(p=dropout) 75 | 76 | # Compute the positional encodings once in log space. 77 | pe = torch.zeros(max_len, d_model) 78 | position = torch.arange(0, max_len).unsqueeze(1).float() 79 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * 80 | -(math.log(10000.0) / d_model)) 81 | pe[:, 0::2] = torch.sin(position * div_term) 82 | pe[:, 1::2] = torch.cos(position * div_term) 83 | pe = pe.unsqueeze(0) 84 | self.register_buffer('pe', pe) 85 | 86 | def forward(self, x): 87 | x = x + Variable(self.pe[:, :x.size(1)], 88 | requires_grad=False) 89 | return self.dropout(x) 90 | 91 | class LayerNorm(nn.Module): 92 | "Construct a layernorm module (See citation for details)." 93 | def __init__(self, features, eps=1e-6): 94 | super(LayerNorm, self).__init__() 95 | self.a_2 = nn.Parameter(torch.ones(features)) 96 | self.b_2 = nn.Parameter(torch.zeros(features)) 97 | self.eps = eps 98 | 99 | def forward(self, x): 100 | mean = x.mean(-1, keepdim=True) 101 | std = x.std(-1, keepdim=True) 102 | return self.a_2 * (x - mean) / (std + self.eps) + self.b_2 103 | 104 | class SublayerConnection(nn.Module): 105 | """ 106 | A residual connection followed by a layer norm. 107 | Note for code simplicity the norm is first as opposed to last. 108 | """ 109 | def __init__(self, size, dropout): 110 | super(SublayerConnection, self).__init__() 111 | self.norm = LayerNorm(size) 112 | self.dropout = nn.Dropout(dropout) 113 | 114 | def forward(self, x, sublayer): 115 | "Apply residual connection to any sublayer with the same size." 116 | return x + self.dropout(sublayer(self.norm(x))) 117 | 118 | class EncoderLayer(nn.Module): 119 | "Encoder is made up of self-attn and feed forward (defined below)" 120 | def __init__(self, size, self_attn, feed_forward, dropout): 121 | super(EncoderLayer, self).__init__() 122 | self.self_attn = self_attn 123 | self.feed_forward = feed_forward 124 | self.sublayer = clones(SublayerConnection(size, dropout), 2) 125 | self.size = size 126 | 127 | def forward(self, x, mask): 128 | "Follow Figure 1 (left) for connections." 129 | x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, mask)) 130 | return self.sublayer[1](x, self.feed_forward) 131 | 132 | class Encoder(nn.Module): 133 | "Core encoder is a stack of N layers" 134 | def __init__(self, layer, N): 135 | super(Encoder, self).__init__() 136 | self.layers = clones(layer, N) 137 | self.norm = LayerNorm(layer.size) 138 | 139 | def forward(self, x, mask): 140 | "Pass the input (and mask) through each layer in turn." 141 | for layer in self.layers: 142 | x = layer(x, mask) 143 | return self.norm(x) 144 | 145 | class SimpleEncoder(nn.Module): 146 | """ 147 | takes (batch_size, seq_len, embed_dim) as inputs 148 | calculate MASK, POSITION_ENCODING 149 | """ 150 | def __init__(self, embed_dim, head=4, layer=1, dropout=0.1): 151 | super(SimpleEncoder, self).__init__() 152 | d_ff = 2 * embed_dim 153 | 154 | self.position = PositionalEncoding(embed_dim, dropout) 155 | attn = MultiHeadedAttention(head, embed_dim) 156 | ff = PositionwiseFeedForward(embed_dim, d_ff) 157 | self.encoder = Encoder(EncoderLayer(embed_dim, attn, ff, dropout), layer) 158 | 159 | def forward(self, x, mask, lens): 160 | mask = mask.unsqueeze(-2) 161 | x = self.position(x) 162 | x = self.encoder(x, mask) 163 | return x 164 | 165 | if __name__ == '__main__': 166 | encoder = SimpleEncoder(350, 2, 1) 167 | inputs = torch.zeros(1000,50,350) 168 | lens = [10] * 1000 169 | encoder(inputs, lens) -------------------------------------------------------------------------------- /model_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import torch.nn.functional as F 4 | import numpy as np 5 | from torch import nn 6 | from torch.autograd import Variable 7 | 8 | from label_corr import build_concurr_matrix 9 | import torch.nn.init as init 10 | 11 | import eval_metric 12 | sys.path.insert(0, './resources') 13 | import constant 14 | 15 | sigmoid_fn = nn.Sigmoid() 16 | 17 | 18 | def get_eval_string(true_prediction): 19 | """ 20 | Given a list of (gold, prediction)s, generate output string. 21 | """ 22 | count, pred_count, avg_pred_count, p, r, f1 = eval_metric.micro(true_prediction) 23 | _, _, _, ma_p, ma_r, ma_f1 = eval_metric.macro(true_prediction) 24 | output_str = "Eval: {0} {1} {2:.3f} P:{3:.3f} R:{4:.3f} F1:{5:.3f} Ma_P:{6:.3f} Ma_R:{7:.3f} Ma_F1:{8:.3f}".format( 25 | count, pred_count, avg_pred_count, p, r, f1, ma_p, ma_r, ma_f1) 26 | accuracy = sum([set(y) == set(yp) for y, yp in true_prediction]) * 1.0 / len(true_prediction) 27 | output_str += '\t Dev accuracy: {0:.1f}%'.format(accuracy * 100) 28 | return output_str 29 | 30 | def metric_dicts(true_prediction): 31 | count, pred_count, avg_pred_count, p, r, f1 = eval_metric.micro(true_prediction) 32 | _, _, _, ma_p, ma_r, ma_f1 = eval_metric.macro(true_prediction) 33 | output_str = "Eval: {0} {1} {2:.3f} P:{3:.3f} R:{4:.3f} F1:{5:.3f} Ma_P:{6:.3f} Ma_R:{7:.3f} Ma_F1:{8:.3f}".format( 34 | count, pred_count, avg_pred_count, p, r, f1, ma_p, ma_r, ma_f1) 35 | accuracy = sum([set(y) == set(yp) for y, yp in true_prediction]) * 1.0 / len(true_prediction) 36 | output_str += '\t Dev accuracy: {0:.1f}%'.format(accuracy * 100) 37 | result = {"precision": p, "recall": r, 'f1': f1, "ma_precision": ma_p, "ma_recall": ma_r, "ma_f1": ma_f1, "accu": accuracy} 38 | return result, output_str 39 | 40 | 41 | def fine_grained_eval(true_prediction): 42 | general_types = set(['person', 'group', 'organization', 'location', 'entity', 'time', 'object', 'place', 'event']) 43 | fine_results = [] 44 | # for index in constant.pronoun_index_dev: 45 | for index in constant.else_index_dev: 46 | true, pred = true_prediction[index] 47 | # trueset = set(true) - general_types 48 | # predset = set(pred) - general_types 49 | trueset = set(true) 50 | predset = set(pred) 51 | fine_results.append([list(trueset), list(predset)]) 52 | metrics, output = metric_dicts(fine_results) 53 | return metrics, output 54 | 55 | def get_output_index(outputs, thresh=0.5): 56 | """ 57 | Given outputs from the decoder, generate prediction index. 58 | :param outputs: 59 | :return: 60 | """ 61 | pred_idx = [] 62 | outputs = sigmoid_fn(outputs).data.cpu().clone() 63 | for single_dist in outputs: 64 | single_dist = single_dist.numpy() 65 | arg_max_ind = np.argmax(single_dist) 66 | pred_id = [arg_max_ind] 67 | pred_id.extend( 68 | [i for i in range(len(single_dist)) if single_dist[i] > thresh and i != arg_max_ind]) 69 | pred_idx.append(pred_id) 70 | return pred_idx 71 | 72 | 73 | def get_gold_pred_str(pred_idx, gold, goal): 74 | """ 75 | Given predicted ids and gold ids, generate a list of (gold, pred) pairs of length batch_size. 76 | """ 77 | id2word_dict = constant.ID2ANS_DICT[goal] 78 | gold_strs = [] 79 | for gold_i in gold: 80 | gold_strs.append([id2word_dict[i] for i in range(len(gold_i)) if gold_i[i] == 1]) 81 | pred_strs = [] 82 | for pred_idx1 in pred_idx: 83 | pred_strs.append([(id2word_dict[ind]) for ind in pred_idx1]) 84 | return list(zip(gold_strs, pred_strs)) 85 | 86 | def sort_batch_by_length(tensor: torch.autograd.Variable, sequence_lengths: torch.autograd.Variable): 87 | """ 88 | @ from allennlp 89 | Sort a batch first tensor by some specified lengths. 90 | 91 | Parameters 92 | ---------- 93 | tensor : Variable(torch.FloatTensor), required. 94 | A batch first Pytorch tensor. 95 | sequence_lengths : Variable(torch.LongTensor), required. 96 | A tensor representing the lengths of some dimension of the tensor which 97 | we want to sort by. 98 | 99 | Returns 100 | ------- 101 | sorted_tensor : Variable(torch.FloatTensor) 102 | The original tensor sorted along the batch dimension with respect to sequence_lengths. 103 | sorted_sequence_lengths : Variable(torch.LongTensor) 104 | The original sequence_lengths sorted by decreasing size. 105 | restoration_indices : Variable(torch.LongTensor) 106 | Indices into the sorted_tensor such that 107 | ``sorted_tensor.index_select(0, restoration_indices) == original_tensor`` 108 | """ 109 | 110 | if not isinstance(tensor, Variable) or not isinstance(sequence_lengths, Variable): 111 | raise ValueError("Both the tensor and sequence lengths must be torch.autograd.Variables.") 112 | 113 | sorted_sequence_lengths, permutation_index = sequence_lengths.sort(0, descending=True) 114 | sorted_tensor = tensor.index_select(0, permutation_index) 115 | # This is ugly, but required - we are creating a new variable at runtime, so we 116 | # must ensure it has the correct CUDA vs non-CUDA type. We do this by cloning and 117 | # refilling one of the inputs to the function. 118 | index_range = sequence_lengths.data.clone().copy_(torch.arange(0, len(sequence_lengths))) 119 | # This is the equivalent of zipping with index, sorting by the original 120 | # sequence lengths and returning the now sorted indices. 121 | index_range = Variable(index_range.long()) 122 | _, reverse_mapping = permutation_index.sort(0, descending=False) 123 | restoration_indices = index_range.index_select(0, reverse_mapping) 124 | return sorted_tensor, sorted_sequence_lengths, restoration_indices 125 | 126 | 127 | class MultiSimpleDecoder(nn.Module): 128 | """ 129 | Simple decoder in multi-task setting. 130 | """ 131 | def __init__(self, output_dim): 132 | super(MultiSimpleDecoder, self).__init__() 133 | self.linear = nn.Linear(output_dim, constant.ANSWER_NUM_DICT['open'], 134 | bias=False).cuda() # (out_features x in_features) 135 | 136 | def forward(self, inputs, output_type): 137 | if output_type == "open": 138 | return self.linear(inputs) 139 | elif output_type == 'wiki': 140 | return F.linear(inputs, self.linear.weight[:constant.ANSWER_NUM_DICT['wiki'], :], self.linear.bias) 141 | elif output_type == 'kb': 142 | return F.linear(inputs, self.linear.weight[:constant.ANSWER_NUM_DICT['kb'], :], self.linear.bias) 143 | else: 144 | raise ValueError('Decoder error: output type not one of the valid') 145 | 146 | class SimpleDecoder(nn.Module): 147 | def __init__(self, output_dim, answer_num): 148 | super(SimpleDecoder, self).__init__() 149 | self.answer_num = answer_num 150 | self.linear = nn.Linear(output_dim, answer_num, bias=False) 151 | 152 | def forward(self, inputs, output_type): 153 | output_embed = self.linear(inputs) 154 | return output_embed 155 | 156 | class GCNSimpleDecoder(nn.Module): 157 | """for ontonotes""" 158 | def __init__(self, output_dim, answer_num, goal="onto"): 159 | super(GCNSimpleDecoder, self).__init__() 160 | self.answer_num = answer_num 161 | self.linear = nn.Linear(output_dim, answer_num, bias=False) 162 | 163 | # gcn on label vectors 164 | self.transform = nn.Linear(output_dim, output_dim, bias=False) 165 | self.label_matrix, _, _, _, _ = build_concurr_matrix(goal=goal) 166 | self.label_matrix = torch.from_numpy(self.label_matrix).to(torch.device('cuda')).float() 167 | 168 | init.xavier_normal_(self.transform.weight) 169 | init.xavier_normal_(self.linear.weight) 170 | 171 | def forward(self, inputs, output_type): 172 | connection_matrix = self.label_matrix 173 | transform = self.transform(connection_matrix.mm(self.linear.weight) / connection_matrix.sum(1, keepdim=True)) 174 | label_vectors = transform + self.linear.weight # residual 175 | logits = F.linear(inputs, label_vectors, self.linear.bias) 176 | return logits 177 | 178 | 179 | class GCNMultiDecoder(nn.Module): 180 | """for ultra-fine""" 181 | def __init__(self, output_dim): 182 | super(GCNMultiDecoder, self).__init__() 183 | self.output_dim = output_dim 184 | self.linear = nn.Linear(output_dim, constant.ANSWER_NUM_DICT['open'], bias=False) 185 | self.transform = nn.Linear(output_dim, output_dim, bias=False) 186 | 187 | label_matrix, affinity, _, _, _ = build_concurr_matrix() 188 | self.label_matrix = torch.from_numpy(label_matrix).to(torch.device('cuda')).float() 189 | 190 | self.affinity = (torch.from_numpy(affinity).to(torch.device('cuda')).float() + 1) / 2 191 | self.weight = nn.Parameter(torch.rand(1)) 192 | 193 | def forward(self, inputs, output_type): 194 | 195 | connection_matrix = self.label_matrix + self.weight * self.affinity 196 | label_vectors = self.transform(connection_matrix.mm(self.linear.weight) / connection_matrix.sum(1, keepdim=True)) 197 | 198 | if output_type == "open": 199 | return self.linear(inputs) 200 | elif output_type == 'wiki': 201 | return F.linear(inputs, label_vectors[:constant.ANSWER_NUM_DICT['wiki'], :], self.linear.bias) 202 | elif output_type == 'kb': 203 | return F.linear(inputs, label_vectors[:constant.ANSWER_NUM_DICT['kb'], :], self.linear.bias) 204 | else: 205 | raise ValueError('Decoder error: output type not one of the valid') 206 | 207 | class CNN(nn.Module): 208 | def __init__(self): 209 | super(CNN, self).__init__() 210 | self.conv1d = nn.Conv1d(100, 50, 5) # input, output, filter_number 211 | self.char_W = nn.Embedding(115, 100) 212 | 213 | def forward(self, span_chars): 214 | char_embed = self.char_W(span_chars).transpose(1, 2) # [batch_size, char_embedding, max_char_seq] 215 | conv_output = [self.conv1d(char_embed)] # list of [batch_size, filter_dim, max_char_seq, filter_number] 216 | conv_output = [F.relu(c) for c in conv_output] # batch_size, filter_dim, max_char_seq, filter_num 217 | cnn_rep = [F.max_pool1d(i, i.size(2)) for i in conv_output] # batch_size, filter_dim, 1, filter_num 218 | cnn_output = torch.squeeze(torch.cat(cnn_rep, 1), 2) # batch_size, filter_num * filter_dim, 1 219 | return cnn_output 220 | 221 | 222 | class DotAttn(nn.Module): 223 | """docstring for DotAttn""" 224 | def __init__(self, hidden_dim): 225 | super(DotAttn, self).__init__() 226 | self.hidden_dim = hidden_dim 227 | self.attn = nn.Linear(hidden_dim, hidden_dim, bias=False) 228 | 229 | def normalize(self, raw_scores, lens): 230 | backup = raw_scores.data.clone() 231 | max_len = raw_scores.size(1) 232 | 233 | for i, length in enumerate(lens): 234 | if length == max_len: 235 | continue 236 | raw_scores.data[i, int(length):] = -1e8 237 | 238 | normalized_scores = F.softmax(raw_scores, dim=-1) 239 | raw_scores.data.copy_(backup) 240 | return normalized_scores 241 | 242 | def forward(self, key, memory, lengths): 243 | ''' 244 | key (bsz, hidden) 245 | memory (bsz, seq_len, hidden) 246 | ''' 247 | scores = torch.bmm(memory, self.attn(key).unsqueeze(2)).squeeze(2) 248 | attn_scores = self.normalize(scores, lengths) 249 | retrieved = torch.sum(attn_scores.unsqueeze(2) * memory, dim=1) # (bsz, hidden) 250 | return retrieved 251 | 252 | class SelfAttentiveSum(nn.Module): 253 | """ 254 | Attention mechanism to get a weighted sum of RNN output sequence to a single RNN output dimension. 255 | """ 256 | 257 | def __init__(self, output_dim, hidden_dim): 258 | super(SelfAttentiveSum, self).__init__() 259 | self.key_maker = nn.Linear(output_dim, hidden_dim, bias=False) 260 | self.key_rel = nn.ReLU() 261 | self.hidden_dim = hidden_dim 262 | self.key_output = nn.Linear(hidden_dim, 1, bias=False) 263 | self.key_softmax = nn.Softmax(dim=-1) 264 | 265 | def normalize(self, raw_scores, lens): 266 | backup = raw_scores.data.clone() 267 | max_len = raw_scores.size(1) 268 | 269 | for i, length in enumerate(lens): 270 | if length == max_len: 271 | continue 272 | raw_scores.data[i, int(length):] = -1e8 273 | 274 | normalized_scores = F.softmax(raw_scores, dim=-1) 275 | raw_scores.data.copy_(backup) 276 | return normalized_scores 277 | 278 | def forward(self, input_embed, input_lens=None): 279 | input_embed_squeezed = input_embed.view(-1, input_embed.size()[2]) 280 | k_d = self.key_maker(input_embed_squeezed) 281 | k_d = self.key_rel(k_d) # (b*seq_len, hidden_dim) 282 | if self.hidden_dim == 1: 283 | k = k_d.view(input_embed.size()[0], -1) 284 | else: 285 | k = self.key_output(k_d).view(input_embed.size()[0], -1) # (batch_size, seq_length) 286 | 287 | # normalize for seq_len 288 | if input_lens is not None: 289 | weighted_keys = self.normalize(k, input_lens).view(input_embed.size()[0], -1, 1) 290 | else: 291 | weighted_keys = self.key_softmax(k).view(input_embed.size()[0], -1, 1) 292 | weighted_values = torch.sum(weighted_keys * input_embed, 1) # batch_size, seq_length, embed_dim 293 | return weighted_values, weighted_keys 294 | -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import math 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from model_utils import sort_batch_by_length, SelfAttentiveSum, SimpleDecoder, MultiSimpleDecoder, CNN, GCNMultiDecoder, GCNSimpleDecoder, DotAttn 9 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 10 | from label_corr import build_concurr_matrix 11 | import numpy as np 12 | from attention import SimpleEncoder 13 | 14 | sys.path.insert(0, './resources') 15 | import constant 16 | 17 | def cosine_similarity(x1, x2=None, eps=1e-8): 18 | x2 = x1 if x2 is None else x2 19 | w1 = x1.norm(p=2, dim=1, keepdim=True) 20 | w2 = w1 if x2 is x1 else x2.norm(p=2, dim=1, keepdim=True) 21 | return torch.mm(x1, x2.t()) / (w1 * w2.t()).clamp(min=eps) 22 | 23 | def gelu(x): 24 | return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) 25 | 26 | class Fusion(nn.Module): 27 | """docstring for Fusion""" 28 | def __init__(self, d_hid): 29 | super(Fusion, self).__init__() 30 | self.r = nn.Linear(d_hid*3, d_hid) 31 | self.g = nn.Linear(d_hid*3, d_hid) 32 | 33 | def forward(self, x, y): 34 | r_ = gelu(self.r(torch.cat([x,y,x-y], dim=-1))) 35 | g_ = torch.sigmoid(self.g(torch.cat([x,y,x-y], dim=-1))) 36 | return g_ * r_ + (1 - g_) * x 37 | 38 | 39 | class Model(nn.Module): 40 | def __init__(self, args, answer_num): 41 | super(Model, self).__init__() 42 | self.args = args 43 | self.output_dim = args.rnn_dim * 2 44 | self.mention_dropout = nn.Dropout(args.mention_dropout) 45 | self.input_dropout = nn.Dropout(args.input_dropout) 46 | self.dim_hidden = args.dim_hidden 47 | self.embed_dim = 300 48 | self.mention_dim = 300 49 | self.lstm_type = args.lstm_type 50 | self.enhanced_mention = args.enhanced_mention 51 | if args.enhanced_mention: 52 | self.head_attentive_sum = SelfAttentiveSum(self.mention_dim, 1) 53 | self.cnn = CNN() 54 | self.mention_dim += 50 55 | self.output_dim += self.mention_dim 56 | 57 | if args.model_debug: 58 | self.mention_proj = nn.Linear(self.mention_dim, 2*args.rnn_dim) 59 | self.attn = nn.Linear(2*args.rnn_dim, 2*args.rnn_dim) 60 | self.fusion = Fusion(2*args.rnn_dim) 61 | self.output_dim = 2*args.rnn_dim*2 62 | 63 | self.batch_num = 0 64 | 65 | if args.add_regu: 66 | corr_matrix, _, _, mask, mask_inverse = build_concurr_matrix(goal=args.goal) 67 | corr_matrix -= np.identity(corr_matrix.shape[0]) 68 | self.corr_matrix = torch.from_numpy(corr_matrix).to(torch.device('cuda')).float() 69 | self.incon_mask = torch.from_numpy(mask).to(torch.device('cuda')).float() 70 | self.con_mask = torch.from_numpy(mask_inverse).to(torch.device('cuda')).float() 71 | 72 | self.b = nn.Parameter(torch.rand(corr_matrix.shape[0], 1)) 73 | self.b_ = nn.Parameter(torch.rand(corr_matrix.shape[0], 1)) 74 | 75 | # Defining LSTM here. 76 | self.attentive_sum = SelfAttentiveSum(args.rnn_dim * 2, 100) 77 | if self.lstm_type == "two": 78 | self.left_lstm = nn.LSTM(self.embed_dim, 100, bidirectional=True, batch_first=True) 79 | self.right_lstm = nn.LSTM(self.embed_dim, 100, bidirectional=True, batch_first=True) 80 | elif self.lstm_type == 'single': 81 | self.lstm = nn.LSTM(self.embed_dim + 50, args.rnn_dim, bidirectional=True, 82 | batch_first=True) 83 | self.token_mask = nn.Linear(4, 50) 84 | 85 | if args.self_attn: 86 | self.embed_proj = nn.Linear(self.embed_dim + 50, 2*args.rnn_dim) 87 | self.encoder = SimpleEncoder(2*args.rnn_dim, head=4, layer=1, dropout=0.2) 88 | 89 | self.loss_func = nn.BCEWithLogitsLoss() 90 | self.sigmoid_fn = nn.Sigmoid() 91 | self.goal = args.goal 92 | self.multitask = args.multitask 93 | 94 | if args.data_setup == 'joint' and args.multitask and args.gcn: 95 | print("Multi-task learning with gcn on labels") 96 | self.decoder = GCNMultiDecoder(self.output_dim) 97 | elif args.data_setup == 'joint' and args.multitask: 98 | print("Multi-task learning") 99 | self.decoder = MultiSimpleDecoder(self.output_dim) 100 | elif args.data_setup == 'joint' and not args.multitask and args.gcn: 101 | print("Joint training with GCN simple decoder") 102 | self.decoder = GCNSimpleDecoder(self.output_dim, answer_num, "open" 103 | ) 104 | elif args.goal == 'onto' and args.gcn: 105 | print("Ontonotes with gcn decoder") 106 | self.decoder = GCNSimpleDecoder(self.output_dim, answer_num, "onto") 107 | else: 108 | print("Ontonotes using simple decoder") 109 | self.decoder = SimpleDecoder(self.output_dim, answer_num) 110 | 111 | def sorted_rnn(self, sequences, sequence_lengths, rnn): 112 | sorted_inputs, sorted_sequence_lengths, restoration_indices = sort_batch_by_length(sequences, sequence_lengths) 113 | packed_sequence_input = pack_padded_sequence(sorted_inputs, 114 | sorted_sequence_lengths.data.tolist(), 115 | batch_first=True) 116 | packed_sequence_output, _ = rnn(packed_sequence_input, None) 117 | unpacked_sequence_tensor, _ = pad_packed_sequence(packed_sequence_output, batch_first=True) 118 | return unpacked_sequence_tensor.index_select(0, restoration_indices) 119 | 120 | def rnn(self, sequences, lstm): 121 | outputs, _ = lstm(sequences) 122 | return outputs.contiguous() 123 | 124 | def define_loss(self, logits, targets, data_type): 125 | if not self.multitask or data_type == 'onto': 126 | loss = self.loss_func(logits, targets) 127 | return loss 128 | if data_type == 'wiki': 129 | gen_cutoff, fine_cutoff, final_cutoff = constant.ANSWER_NUM_DICT['gen'], constant.ANSWER_NUM_DICT['kb'], \ 130 | constant.ANSWER_NUM_DICT[data_type] 131 | else: 132 | gen_cutoff, fine_cutoff, final_cutoff = constant.ANSWER_NUM_DICT['gen'], constant.ANSWER_NUM_DICT['kb'], None 133 | loss = 0.0 134 | comparison_tensor = torch.Tensor([1.0]).cuda() 135 | gen_targets = targets[:, :gen_cutoff] 136 | fine_targets = targets[:, gen_cutoff:fine_cutoff] 137 | gen_target_sum = torch.sum(gen_targets, 1) 138 | fine_target_sum = torch.sum(fine_targets, 1) 139 | 140 | if torch.sum(gen_target_sum.data) > 0: 141 | gen_mask = torch.squeeze(torch.nonzero(torch.min(gen_target_sum.data, comparison_tensor)), dim=1) 142 | gen_logit_masked = logits[:, :gen_cutoff][gen_mask, :] 143 | gen_target_masked = gen_targets.index_select(0, gen_mask) 144 | gen_loss = self.loss_func(gen_logit_masked, gen_target_masked) 145 | loss += gen_loss 146 | if torch.sum(fine_target_sum.data) > 0: 147 | fine_mask = torch.squeeze(torch.nonzero(torch.min(fine_target_sum.data, comparison_tensor)), dim=1) 148 | fine_logit_masked = logits[:,gen_cutoff:fine_cutoff][fine_mask, :] 149 | fine_target_masked = fine_targets.index_select(0, fine_mask) 150 | fine_loss = self.loss_func(fine_logit_masked, fine_target_masked) 151 | loss += fine_loss 152 | 153 | if not data_type == 'kb': 154 | if final_cutoff: 155 | finer_targets = targets[:, fine_cutoff:final_cutoff] 156 | logit_masked = logits[:, fine_cutoff:final_cutoff] 157 | else: 158 | logit_masked = logits[:, fine_cutoff:] 159 | finer_targets = targets[:, fine_cutoff:] 160 | if torch.sum(torch.sum(finer_targets, 1).data) >0: 161 | finer_mask = torch.squeeze(torch.nonzero(torch.min(torch.sum(finer_targets, 1).data, comparison_tensor)), dim=1) 162 | finer_target_masked = finer_targets.index_select(0, finer_mask) 163 | logit_masked = logit_masked[finer_mask, :] 164 | layer_loss = self.loss_func(logit_masked, finer_target_masked) 165 | loss += layer_loss 166 | 167 | if self.args.add_regu: 168 | if self.batch_num > self.args.regu_steps: 169 | 170 | # inconsistency loss 1: never concurr, then -1, otherwise log 171 | # label_matrix = cosine_similarity(self.decoder.linear.weight, self.decoder.linear.weight) 172 | # target = -1 * self.incon_mask + self.con_mask * torch.log(self.corr_matrix + 1e-8) 173 | # auxiliary_loss = ((target - label_matrix) ** 2).mean() 174 | # loss += self.args.incon_w * auxiliary_loss 175 | 176 | 177 | # glove like loss 178 | less_max_mask = (self.corr_matrix < 100).float() 179 | greater_max_mask = (self.corr_matrix >= 100).float() 180 | weight_matrix = less_max_mask * ((self.corr_matrix / 100.0) ** 0.75) + greater_max_mask 181 | auxiliary_loss = weight_matrix * (torch.mm(self.decoder.linear.weight, self.decoder.linear.weight.t()) + self.b + self.b_.t() - torch.log(self.corr_matrix + 1e-8)) ** 2 182 | auxiliary_loss = auxiliary_loss.mean() 183 | 184 | # # inconsistency loss 2: only consider these inconsistency labels 185 | # label_matrix = cosine_similarity(self.decoder.linear.weight, self.decoder.linear.weight) 186 | # target = -1 * self.incon_mask 187 | # auxiliary_loss = (((target - label_matrix) * self.incon_mask) ** 2).sum() / self.incon_mask.sum() 188 | # loss += self.args.incon_w * auxiliary_loss 189 | 190 | # # inconsitenct loss 3: margin loss 191 | # label_matrix = cosine_similarity(self.decoder.linear.weight, self.decoder.linear.weight) 192 | # label_consistent = label_matrix * self.con_mask 193 | # label_contradict = label_matrix * self.incon_mask 194 | # distance = label_consistent.sum(1) / (self.con_mask.sum(1) + 1e-8) - label_contradict.sum(1) / (self.incon_mask.sum(1) + 1e-8) 195 | # margin = 0.2 196 | # auxiliary_loss = torch.max(torch.tensor(0.0).to(torch.device('cuda')), margin - distance).mean() 197 | 198 | loss += self.args.incon_w * auxiliary_loss 199 | 200 | return loss 201 | 202 | def normalize(self, raw_scores, lengths): 203 | backup = raw_scores.data.clone() 204 | max_len = raw_scores.size(2) 205 | 206 | for i, length in enumerate(lengths): 207 | if length == max_len: 208 | continue 209 | raw_scores.data[i, :, int(length):] = -1e30 210 | 211 | normalized_scores = F.softmax(raw_scores, dim=-1) 212 | raw_scores.data.copy_(backup) 213 | return normalized_scores 214 | 215 | def forward(self, feed_dict, data_type): 216 | if self.lstm_type == 'two': 217 | left_outputs = self.rnn(self.input_dropout(feed_dict['left_embed']), self.left_lstm) 218 | right_outputs = self.rnn(self.input_dropout(feed_dict['right_embed']), self.right_lstm) 219 | context_rep = torch.cat((left_outputs, right_outputs), 1) 220 | context_rep, _ = self.attentive_sum(context_rep) 221 | elif self.lstm_type == 'single': 222 | token_mask_embed = self.token_mask(feed_dict['token_bio'].view(-1, 4)) 223 | token_mask_embed = token_mask_embed.view(feed_dict['token_embed'].size()[0], -1, 50) 224 | token_embed = torch.cat((feed_dict['token_embed'], token_mask_embed), 2) 225 | context_rep_ = self.sorted_rnn(self.input_dropout(token_embed), feed_dict['token_seq_length'], self.lstm) 226 | if self.args.goal == 'onto' or self.args.model_id == 'baseline': 227 | context_rep, _ = self.attentive_sum(context_rep_) 228 | else: 229 | context_rep, _ = self.attentive_sum(context_rep_, feed_dict["token_seq_length"]) 230 | 231 | # Mention Representation 232 | if self.enhanced_mention: 233 | if self.args.goal == 'onto' or self.args.model_id == 'baseline': 234 | mention_embed, _ = self.head_attentive_sum(feed_dict['mention_embed']) 235 | else: 236 | mention_embed, _ = self.head_attentive_sum(feed_dict['mention_embed'], feed_dict['mention_len']) 237 | span_cnn_embed = self.cnn(feed_dict['span_chars']) 238 | mention_embed = torch.cat((span_cnn_embed, mention_embed), 1) 239 | else: 240 | mention_embed = torch.sum(feed_dict['mention_embed'], dim=1) 241 | mention_embed = self.mention_dropout(mention_embed) 242 | # model change 243 | if self.args.model_debug: 244 | mention_embed_proj = self.mention_proj(mention_embed).tanh() 245 | affinity = self.attn(mention_embed_proj.unsqueeze(1)).bmm(F.dropout(context_rep_.transpose(2,1), 0.1, self.training)) # b*1*50 246 | m_over_c = self.normalize(affinity, feed_dict['token_seq_length'].squeeze().tolist()) 247 | m_retrieve_c = torch.bmm(m_over_c, context_rep_) # b*1*200 248 | fusioned = self.fusion(m_retrieve_c.squeeze(1), mention_embed_proj) 249 | output = F.dropout(torch.cat([fusioned, context_rep], dim=1), 0.2, self.training) # seems to be a good choice for ultra-fine 250 | else: 251 | output = F.dropout(torch.cat((context_rep, mention_embed), 1), 0.3, self.training) 252 | # output = torch.cat((context_rep, mention_embed), 1) 253 | 254 | logits = self.decoder(output, data_type) 255 | loss = self.define_loss(logits, feed_dict['y'], data_type) 256 | 257 | return loss, logits 258 | -------------------------------------------------------------------------------- /data_utils.py: -------------------------------------------------------------------------------- 1 | """A library for loading Type Dataset.""" 2 | import glob 3 | import json 4 | import logging 5 | import random 6 | import sys 7 | from collections import defaultdict 8 | import gluonnlp 9 | 10 | import numpy as np 11 | 12 | sys.path.insert(0, './resources/') 13 | import constant 14 | import torch 15 | 16 | def to_torch(feed_dict): 17 | torch_feed_dict = {} 18 | if 'annot_id' in feed_dict: 19 | annot_ids = feed_dict.pop('annot_id') 20 | for k, v in feed_dict.items(): 21 | if 'embed' in k: 22 | torch_feed_dict[k] = torch.autograd.Variable(torch.from_numpy(v), requires_grad=False).cuda().float() 23 | elif 'elmo' in k: 24 | torch_feed_dict[k] = v 25 | elif 'token_bio' == k: 26 | torch_feed_dict[k] = torch.autograd.Variable(torch.from_numpy(v), requires_grad=False).cuda().float() 27 | elif 'y' == k or k == 'mention_start_ind' or k == 'mention_end_ind' or 'length' in k: 28 | torch_feed_dict[k] = torch.autograd.Variable(torch.from_numpy(v), requires_grad=False).cuda() 29 | elif k == 'span_chars': 30 | torch_feed_dict[k] = torch.autograd.Variable(torch.from_numpy(v), requires_grad=False).cuda() 31 | elif k == 'token_seq_mask': 32 | torch_feed_dict[k] = torch.from_numpy(v).byte().cuda() 33 | 34 | elif k == 'context' or k == 'mention': 35 | torch_feed_dict[k] = v 36 | 37 | else: 38 | torch_feed_dict[k] = torch.from_numpy(v).cuda() 39 | return torch_feed_dict, annot_ids 40 | 41 | 42 | def load_embedding_dict(embedding_path, embedding_size): 43 | print("Loading word embeddings from {}...".format(embedding_path)) 44 | default_embedding = np.zeros(embedding_size) 45 | embedding_dict = defaultdict(lambda: default_embedding) 46 | with open(embedding_path) as f: 47 | for i, line in enumerate(f.readlines()): 48 | splits = line.split() 49 | if len(splits) != embedding_size + 1: 50 | continue 51 | assert len(splits) == embedding_size + 1 52 | word = splits[0] 53 | embedding = np.array([float(s) for s in splits[1:]]) 54 | embedding_dict[word] = embedding 55 | print("Done loading word embeddings!") 56 | return embedding_dict 57 | 58 | def get_vocab(source='glove'): 59 | """ 60 | Get vocab file [word -> embedding] 61 | """ 62 | char_vocab = constant.CHAR_DICT 63 | if source == 'glove': 64 | word_vocab = load_embedding_dict(constant.GLOVE_VEC, 300) 65 | elif source == 'fasttext_wiki': 66 | word_vocab = load_embedding_dict(constant.FASTTEXT_WIKI_VEC, 300) 67 | elif source == 'fasttext_crawl': 68 | word_vocab = load_embedding_dict(constant.FASTTEXT_CRAWL_VEC, 300) 69 | 70 | return char_vocab, word_vocab 71 | 72 | 73 | def pad_slice(seq, seq_length, cut_left=False, pad_token=""): 74 | if len(seq) >= seq_length: 75 | if not cut_left: 76 | return seq[:seq_length] 77 | else: 78 | output_seq = [x for x in seq if x != pad_token] 79 | if len(output_seq) >= seq_length: 80 | return output_seq[-seq_length:] 81 | else: 82 | return [pad_token] * (seq_length - len(output_seq)) + output_seq 83 | else: 84 | return seq + ([pad_token] * (seq_length - len(seq))) 85 | 86 | 87 | def get_word_vec(word, vec_dict): 88 | if word in vec_dict: 89 | return vec_dict[word] 90 | return vec_dict['unk'] 91 | 92 | 93 | def build_vocab(file_list = ['crowd/dev.json', 'crowd/train_m.json', 'crowd/test.json', 'ontonotes/augmented_train.json', 'ontonotes/g_dev.json', 'ontonotes/g_test.json', 'distant_supervision/headword_train.json', 'distant_supervision/headword_dev.json', 'distant_supervision/el_dev.json', 'distant_supervision/el_train.json']): 94 | data_path = "data/release/" 95 | words = [] 96 | for file in file_list: 97 | file_name = data_path + file 98 | with open(file_name) as f: 99 | line_elems = [json.loads(sent.strip()) for sent in f.readlines()] 100 | mention_seq = [line_elem["mention_span"].split() for line_elem in line_elems] 101 | left_seq = [line_elem['left_context_token'] for line_elem in line_elems] 102 | right_seq = [line_elem['right_context_token'] for line_elem in line_elems] 103 | for _ in mention_seq + right_seq + left_seq: 104 | words += [tok.lower() for tok in _] 105 | counter = gluonnlp.data.count_tokens(words) 106 | vocab = gluonnlp.Vocab(counter) 107 | with open('data/release/idx_to_token', 'w') as g: 108 | g.write('\n'.join(vocab.idx_to_token)) 109 | with open('data/release/token_to_idx.json', 'w') as g: 110 | json.dump(vocab.token_to_idx, g) 111 | 112 | def load_vocab(): 113 | with open('data/release/idx_to_token') as f: 114 | idx_to_token = [word.strip() for word in f.readlines()] 115 | with open('data/release/token_to_idx.json') as g: 116 | token_to_idx = json.load(g) 117 | return idx_to_token, token_to_idx 118 | 119 | 120 | 121 | def get_example(generator, glove_dict, batch_size, answer_num, 122 | eval_data=False, lstm_type="two", simple_mention=True): 123 | embed_dim = 300 124 | cur_stream = [None] * batch_size 125 | no_more_data = False 126 | 127 | while True: 128 | bsz = batch_size 129 | seq_length = 25 130 | for i in range(batch_size): 131 | try: 132 | cur_stream[i] = list(next(generator)) 133 | except StopIteration: 134 | no_more_data = True 135 | bsz = i 136 | break 137 | if lstm_type == "two": 138 | left_embed = np.zeros([bsz, seq_length, embed_dim], np.float32) 139 | right_embed = np.zeros([bsz, seq_length, embed_dim], np.float32) 140 | left_seq_length = np.zeros([bsz], np.int32) 141 | right_seq_length = np.zeros([bsz], np.int32) 142 | else: 143 | max_seq_length = min(50, max([len(elem[1]) + len(elem[2]) + len(elem[3]) for elem in cur_stream if elem])) 144 | token_embed = np.zeros([bsz, max_seq_length, embed_dim], np.float32) 145 | token_seq_mask = np.ones([bsz, max_seq_length]) 146 | token_seq_length = np.zeros([bsz], np.float32) 147 | token_bio = np.zeros([bsz, max_seq_length, 4], np.float32) 148 | token_bio_mask = np.zeros([bsz, max_seq_length], np.float32) 149 | mention_len = np.zeros([bsz], np.float32) 150 | mention_start_ind = np.zeros([bsz, 1], np.int64) 151 | mention_end_ind = np.zeros([bsz, 1], np.int64) 152 | 153 | max_mention_length = min(20, max([len(elem[3]) for elem in cur_stream if elem])) 154 | max_span_chars = min(25, max(max([len(elem[5]) for elem in cur_stream if elem]), 5)) 155 | annot_ids = np.zeros([bsz], np.object) 156 | span_chars = np.zeros([bsz, max_span_chars], np.int64) 157 | mention_embed = np.zeros([bsz, max_mention_length, embed_dim], np.float32) 158 | targets = np.zeros([bsz, answer_num], np.float32) 159 | 160 | context = [] 161 | mention = [] 162 | 163 | for i in range(bsz): 164 | left_seq = cur_stream[i][1] 165 | if len(left_seq) > seq_length: 166 | left_seq = left_seq[-seq_length:] 167 | mention_seq = cur_stream[i][3] 168 | annot_ids[i] = cur_stream[i][0] 169 | right_seq = cur_stream[i][2] 170 | 171 | mention.append(' '.join(mention_seq)) 172 | context.append(' '.join(left_seq + mention_seq + right_seq)) 173 | 174 | # SEPARATE LSTM SETTING for left / right 175 | if lstm_type == "two": 176 | left_seq_length[i] = max(1, min(len(cur_stream[i][1]), seq_length)) 177 | right_seq_length[i] = max(1, min(len(cur_stream[i][2]), seq_length)) 178 | start_j = max(0, seq_length - len(left_seq)) 179 | for j, left_word in enumerate(left_seq): 180 | if j < seq_length: 181 | left_embed[i, start_j + j, :300] = get_word_vec(left_word, glove_dict) 182 | for j, right_word in enumerate(cur_stream[i][2]): 183 | if j < seq_length: 184 | right_embed[i, j, :300] = get_word_vec(right_word, glove_dict) 185 | # SINGLE LSTM 186 | else: 187 | token_seq = left_seq + mention_seq + right_seq 188 | mention_start_ind[i] = min(seq_length, len(left_seq)) 189 | mention_end_ind[i] = min(49, len(left_seq) + len(mention_seq) - 1) 190 | for j, word in enumerate(token_seq): 191 | if j < max_seq_length: 192 | token_embed[i, j, :300] = get_word_vec(word, glove_dict) 193 | for j, _ in enumerate(left_seq): 194 | token_bio[i, min(j, 49), 0] = 1.0 # token bio: 0(left) start(1) inside(2) 3(after) 195 | token_bio_mask[i, min(j, 49)] = 0.0 196 | for j, _ in enumerate(right_seq): 197 | token_bio[i, min(j + len(mention_seq) + len(left_seq), 49), 3] = 1.0 198 | token_bio_mask[i, min(j + len(mention_seq) + len(left_seq), 49)] = 0.0 199 | for j, _ in enumerate(mention_seq): 200 | if j == 0 and len(mention_seq) == 1: 201 | token_bio[i, min(j + len(left_seq), 49), 1] = 1.0 202 | else: 203 | token_bio[i, min(j + len(left_seq), 49), 2] = 1.0 204 | token_bio_mask[i, min(j + len(left_seq), 49)] = 1.0 205 | 206 | token_seq_length[i] = min(50, len(token_seq)) 207 | 208 | 209 | if token_seq_length[i] < 50: 210 | token_seq_mask[i, int(token_seq_length[i]):] = 0 211 | 212 | mention_len[i] = min(len(mention_seq), max_mention_length) 213 | 214 | for j, mention_word in enumerate(mention_seq): 215 | if j < max_mention_length: 216 | if simple_mention: 217 | mention_embed[i, j, :300] = [k / len(cur_stream[i][3]) for k in 218 | get_word_vec(mention_word, glove_dict)] 219 | else: 220 | mention_embed[i, j, :300] = get_word_vec(mention_word, glove_dict) 221 | span_chars[i, :] = pad_slice(cur_stream[i][5], max_span_chars, pad_token=0) 222 | for answer_ind in cur_stream[i][4]: 223 | targets[i, answer_ind] = 1.0 224 | 225 | feed_dict = {"annot_id": annot_ids, 226 | "mention_embed": mention_embed, 227 | "span_chars": span_chars, 228 | "y": targets} 229 | 230 | if lstm_type == "two": 231 | feed_dict["right_embed"] = np.flip(right_embed, 1).copy() 232 | feed_dict["left_embed"] = left_embed 233 | feed_dict["right_seq_length"] = right_seq_length 234 | feed_dict["left_seq_length"] = left_seq_length 235 | else: 236 | feed_dict["token_bio"] = token_bio 237 | feed_dict["token_embed"] = token_embed 238 | feed_dict["token_seq_length"] = token_seq_length 239 | feed_dict["token_seq_mask"] = token_seq_mask 240 | feed_dict["mention_start_ind"] = mention_start_ind 241 | feed_dict["mention_end_ind"] = mention_end_ind 242 | feed_dict["token_bio_mask"] = token_bio_mask 243 | feed_dict["mention_len"] = mention_len 244 | 245 | 246 | # for analysis 247 | feed_dict['context'] = context 248 | feed_dict['mention'] = mention 249 | 250 | if no_more_data: 251 | if eval_data and bsz > 0: 252 | yield feed_dict 253 | break 254 | yield feed_dict 255 | 256 | 257 | class TypeDataset(object): 258 | """Utility class type datasets""" 259 | 260 | def __init__(self, filepattern, vocab, goal, lstm_type): 261 | """Initialize Type Vocabulary 262 | Args: 263 | filepattern: Dataset file pattern. 264 | vocab: Vocabulary. 265 | """ 266 | self._all_shards = glob.glob(filepattern) 267 | self.goal = goal 268 | self.lstm_type = lstm_type 269 | self.answer_num = constant.ANSWER_NUM_DICT[goal] 270 | random.shuffle(self._all_shards) 271 | self.char_vocab, self.glove_dict = vocab 272 | self.word2id = constant.ANS2ID_DICT[goal] 273 | print("Answer num %d" % (self.answer_num)) 274 | print('Found %d shards at %s' % (len(self._all_shards), filepattern)) 275 | logging.info('Found %d shards at %s' % (len(self._all_shards), filepattern)) 276 | 277 | def _load_shard(self, shard_name, eval_data): 278 | """Read one file and convert to ids. 279 | Args: 280 | shard_name: file path. 281 | Returns: 282 | list of (id, global_word_id) tuples. 283 | """ 284 | with open(shard_name) as f: 285 | line_elems = [json.loads(sent.strip()) for sent in f.readlines()] 286 | if not eval_data: 287 | line_elems = [line_elem for line_elem in line_elems if len(line_elem['mention_span'].split()) < 11] 288 | annot_ids = [line_elem["annot_id"] for line_elem in line_elems] 289 | mention_span = [[self.char_vocab[x] for x in list(line_elem["mention_span"])] for line_elem in line_elems] 290 | mention_seq = [line_elem["mention_span"].split() for line_elem in line_elems] 291 | left_seq = [line_elem['left_context_token'] for line_elem in line_elems] 292 | right_seq = [line_elem['right_context_token'] for line_elem in line_elems] 293 | y_str_list = [line_elem['y_str'] for line_elem in line_elems] 294 | y_ids = [] 295 | for iid, y_strs in enumerate(y_str_list): 296 | y_ids.append([self.word2id[x] for x in y_strs if x in self.word2id]) 297 | return zip(annot_ids, left_seq, right_seq, mention_seq, y_ids, mention_span) 298 | 299 | def _get_sentence(self, epoch, forever, eval_data, shuffle=False): 300 | for i in range(0, epoch if not forever else 100000000000000): 301 | for shard in self._all_shards: 302 | ids = list(self._load_shard(shard, eval_data)) 303 | if shuffle: 304 | # print('Shuffle training data') 305 | np.random.shuffle(ids) 306 | for current_ids in ids: 307 | yield current_ids 308 | 309 | def get_batch(self, batch_size=128, epoch=5, forever=False, eval_data=False, simple_mention=True, shuffle=False): 310 | return get_example(self._get_sentence(epoch, forever=forever, eval_data=eval_data, shuffle=shuffle), self.glove_dict, 311 | batch_size=batch_size, answer_num=self.answer_num, eval_data=eval_data, 312 | simple_mention=simple_mention, lstm_type=self.lstm_type) 313 | 314 | if __name__ == '__main__': 315 | build_vocab() 316 | # load_vocab() 317 | -------------------------------------------------------------------------------- /baseline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import datetime 3 | import gc 4 | import logging 5 | import pickle 6 | import os 7 | import sys 8 | import time, json 9 | 10 | import torch 11 | 12 | import data_utils 13 | import models 14 | from data_utils import to_torch 15 | from eval_metric import mrr 16 | from model_utils import get_gold_pred_str, get_eval_string, get_output_index 17 | from tensorboardX import SummaryWriter 18 | from torch import optim 19 | from tqdm import tqdm 20 | import numpy as np 21 | 22 | sys.path.insert(0, './resources') 23 | import config_parser, constant, eval_metric 24 | 25 | 26 | class TensorboardWriter: 27 | """ 28 | Wraps a pair of ``SummaryWriter`` instances but is a no-op if they're ``None``. 29 | Allows Tensorboard logging without always checking for Nones first. 30 | """ 31 | def __init__(self, train_log: SummaryWriter = None, validation_log: SummaryWriter = None) -> None: 32 | self._train_log = train_log 33 | self._validation_log = validation_log 34 | 35 | def add_train_scalar(self, name: str, value: float, global_step: int) -> None: 36 | if self._train_log is not None: 37 | self._train_log.add_scalar(name, value, global_step) 38 | 39 | def add_validation_scalar(self, name: str, value: float, global_step: int) -> None: 40 | if self._validation_log is not None: 41 | self._validation_log.add_scalar(name, value, global_step) 42 | 43 | 44 | def get_data_gen(dataname, mode, args, vocab_set, goal): 45 | dataset = data_utils.TypeDataset(constant.FILE_ROOT + dataname, lstm_type=args.lstm_type, 46 | goal=goal, vocab=vocab_set) 47 | if mode == 'train': 48 | data_gen = dataset.get_batch(args.batch_size, args.num_epoch, forever=False, eval_data=False, 49 | simple_mention=not args.enhanced_mention) 50 | elif mode == 'dev': 51 | data_gen = dataset.get_batch(args.eval_batch_size, 1, forever=True, eval_data=True, 52 | simple_mention=not args.enhanced_mention) 53 | else: 54 | data_gen = dataset.get_batch(args.eval_batch_size, 1, forever=False, eval_data=True, 55 | simple_mention=not args.enhanced_mention) 56 | return data_gen 57 | 58 | 59 | def get_joint_datasets(args): 60 | vocab = data_utils.get_vocab() 61 | train_gen_list = [] 62 | valid_gen_list = [] 63 | if args.mode == 'train': 64 | if not args.remove_open and not args.only_crowd: 65 | train_gen_list.append( 66 | #("open", get_data_gen('train/open*.json', 'train', args, vocab, "open"))) 67 | ("open", get_data_gen('distant_supervision/headword_train.json', 'train', args, vocab, "open"))) 68 | valid_gen_list.append(("open", get_data_gen('distant_supervision/headword_dev.json', 'dev', args, vocab, "open"))) 69 | if not args.remove_el and not args.only_crowd: 70 | valid_gen_list.append( 71 | ("wiki", 72 | get_data_gen('distant_supervision/el_dev.json', 'dev', args, vocab, "wiki" if args.multitask else "open"))) 73 | train_gen_list.append( 74 | ("wiki", 75 | get_data_gen('distant_supervision/el_train.json', 'train', args, vocab, "wiki" if args.multitask else "open"))) 76 | #get_data_gen('train/el_train.json', 'train', args, vocab, "wiki" if args.multitask else "open"))) 77 | if args.add_crowd or args.only_crowd: 78 | train_gen_list.append( 79 | ("open", get_data_gen('crowd/train_m.json', 'train', args, vocab, "open"))) 80 | crowd_dev_gen = get_data_gen('crowd/dev.json', 'dev', args, vocab, "open") 81 | return train_gen_list, valid_gen_list, crowd_dev_gen 82 | 83 | 84 | def get_datasets(data_lists, args): 85 | data_gen_list = [] 86 | vocab_set = data_utils.get_vocab() 87 | for dataname, mode, goal in data_lists: 88 | data_gen_list.append(get_data_gen(dataname, mode, args, vocab_set, goal)) 89 | return data_gen_list 90 | 91 | 92 | def _train(args): 93 | if args.data_setup == 'joint': 94 | train_gen_list, val_gen_list, crowd_dev_gen = get_joint_datasets(args) 95 | else: 96 | train_fname = args.train_data 97 | dev_fname = args.dev_data 98 | data_gens = get_datasets([(train_fname, 'train', args.goal), 99 | (dev_fname, 'dev', args.goal)], args) 100 | train_gen_list = [(args.goal, data_gens[0])] 101 | val_gen_list = [(args.goal, data_gens[1])] 102 | train_log = SummaryWriter(os.path.join(constant.EXP_ROOT, args.model_id, "log", "train")) 103 | validation_log = SummaryWriter(os.path.join(constant.EXP_ROOT, args.model_id, "log", "validation")) 104 | tensorboard = TensorboardWriter(train_log, validation_log) 105 | 106 | model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) 107 | model.cuda() 108 | total_loss = 0 109 | batch_num = 0 110 | start_time = time.time() 111 | init_time = time.time() 112 | optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) 113 | 114 | if args.load: 115 | load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model, optimizer) 116 | 117 | for idx, m in enumerate(model.modules()): 118 | logging.info(str(idx) + '->' + str(m)) 119 | 120 | best_eval_ma_f1=0 121 | while True: 122 | batch_num += 1 # single batch composed of all train signal passed by. 123 | for (type_name, data_gen) in train_gen_list: 124 | try: 125 | batch = next(data_gen) 126 | batch, _ = to_torch(batch) 127 | except StopIteration: 128 | logging.info(type_name + " finished at " + str(batch_num)) 129 | torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, 130 | '{0:s}/{1:s}.pt'.format(constant.EXP_ROOT, args.model_id)) 131 | return 132 | optimizer.zero_grad() 133 | loss, output_logits = model(batch, type_name) 134 | loss.backward() 135 | total_loss += loss.data.cpu()[0] 136 | optimizer.step() 137 | 138 | # if batch_num % args.log_period == 0 and batch_num > 0: 139 | # gc.collect() 140 | # cur_loss = float(1.0 * loss.data.cpu().clone()[0]) 141 | # elapsed = time.time() - start_time 142 | # train_loss_str = ('|loss {0:3f} | at {1:d}step | @ {2:.2f} ms/batch'.format(cur_loss, batch_num, 143 | # elapsed * 1000 / args.log_period)) 144 | # start_time = time.time() 145 | # print(train_loss_str) 146 | # logging.info(train_loss_str) 147 | # tensorboard.add_train_scalar('train_loss_' + type_name, cur_loss, batch_num) 148 | # 149 | # if batch_num % args.eval_period == 0 and batch_num > 0: 150 | # output_index = get_output_index(output_logits) 151 | # gold_pred_train = get_gold_pred_str(output_index, batch['y'].data.cpu().clone(), args.goal) 152 | # accuracy = sum([set(y) == set(yp) for y, yp in gold_pred_train]) * 1.0 / len(gold_pred_train) 153 | # train_acc_str = '{1:s} Train accuracy: {0:.1f}%'.format(accuracy * 100, type_name) 154 | # print(train_acc_str) 155 | # logging.info(train_acc_str) 156 | # tensorboard.add_train_scalar('train_acc_' + type_name, accuracy, batch_num) 157 | # for (val_type_name, val_data_gen) in val_gen_list: 158 | # if val_type_name == type_name: 159 | # eval_batch, _ = to_torch(next(val_data_gen)) 160 | # evaluate_batch(batch_num, eval_batch, model, tensorboard, val_type_name, args.goal) 161 | 162 | if batch_num % args.eval_period == 0 and batch_num > 0: 163 | # Evaluate Loss on the Turk Dev dataset. 164 | print('---- eval at step {0:d} ---'.format(batch_num)) 165 | feed_dict = next(crowd_dev_gen) 166 | eval_batch, _ = to_torch(feed_dict) 167 | crowd_eval_loss, crowd_eval_ma_f1 = evaluate_batch(batch_num, eval_batch, model, tensorboard, "open", "open") 168 | 169 | if batch_num % args.save_period == 0 and batch_num > 0 and crowd_eval_ma_f1 > best_eval_ma_f1: 170 | best_eval_ma_f1 = crowd_eval_ma_f1 171 | save_fname = '{0:s}/{1:s}_best.pt'.format(constant.EXP_ROOT, args.model_id) 172 | torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, save_fname) 173 | print( 174 | 'Total {0:.2f} minutes have passed, saving at {1:s} '.format((time.time() - init_time) / 60, save_fname)) 175 | # Training finished! 176 | torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, 177 | '{0:s}/{1:s}.pt'.format(constant.EXP_ROOT, args.model_id)) 178 | 179 | 180 | def evaluate_batch(batch_num, eval_batch, model, tensorboard, val_type_name, goal): 181 | model.eval() 182 | loss, output_logits = model(eval_batch, val_type_name) 183 | output_index = get_output_index(output_logits) 184 | eval_loss = loss.data.cpu().clone()[0] 185 | eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(eval_loss, batch_num) 186 | gold_pred = get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), goal) 187 | eval_accu = sum([set(y) == set(yp) for y, yp in gold_pred]) * 1.0 / len(gold_pred) 188 | tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu, batch_num) 189 | tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss, batch_num) 190 | eval_str, ma_f1, f1 = get_eval_string(gold_pred) 191 | print(val_type_name + ":" +eval_loss_str) 192 | print(gold_pred[:3]) 193 | print(val_type_name+":"+ eval_str) 194 | logging.info(val_type_name + ":" + eval_loss_str) 195 | logging.info(val_type_name +":" + eval_str) 196 | model.train() 197 | tensorboard.add_validation_scalar('ma_f1' + val_type_name, ma_f1, batch_num) 198 | tensorboard.add_validation_scalar('f1' + val_type_name, f1, batch_num) 199 | return eval_loss, ma_f1 200 | 201 | 202 | def load_model(reload_model_name, save_dir, model_id, model, optimizer=None): 203 | if reload_model_name: 204 | model_file_name = '{0:s}/{1:s}.pt'.format(save_dir, reload_model_name) 205 | else: 206 | model_file_name = '{0:s}/{1:s}.pt'.format(save_dir, model_id) 207 | checkpoint = torch.load(model_file_name) 208 | model.load_state_dict(checkpoint['state_dict']) 209 | if optimizer: 210 | optimizer.load_state_dict(checkpoint['optimizer']) 211 | else: 212 | total_params = 0 213 | # Log params 214 | for k in checkpoint['state_dict']: 215 | elem = checkpoint['state_dict'][k] 216 | param_s = 1 217 | for size_dim in elem.size(): 218 | param_s = size_dim * param_s 219 | print(k, elem.size()) 220 | total_params += param_s 221 | param_str = ('Number of total parameters..{0:d}'.format(total_params)) 222 | logging.info(param_str) 223 | print(param_str) 224 | logging.info("Loading old file from {0:s}".format(model_file_name)) 225 | print('Loading model from ... {0:s}'.format(model_file_name)) 226 | 227 | 228 | def _test(args): 229 | assert args.load 230 | test_fname = args.eval_data 231 | data_gens = get_datasets([(test_fname, 'test', args.goal)], args) 232 | model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) 233 | model.cuda() 234 | model.eval() 235 | # load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) 236 | 237 | saved_path = constant.EXP_ROOT 238 | model.load_state_dict(torch.load(saved_path + '/' + args.model_id + '_best.pt')["state_dict"]) 239 | data_gens = get_datasets([(test_fname, 'test', args.goal)], args)#, eval_epoch=1) 240 | for name, dataset in [(test_fname, data_gens[0])]: 241 | print('Processing... ' + name) 242 | batch = next(dataset) 243 | eval_batch, annot_ids = to_torch(batch) 244 | loss, output_logits = model(eval_batch, args.goal) 245 | 246 | threshes = np.arange(0,1,0.005) 247 | p_and_r = [] 248 | for thresh in tqdm(threshes): 249 | total_gold_pred = [] 250 | total_annot_ids = [] 251 | total_probs = [] 252 | total_ys = [] 253 | print('thresh {}'.format(thresh)) 254 | output_index = get_output_index(output_logits, thresh) 255 | output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy() 256 | y = eval_batch['y'].data.cpu().clone().numpy() 257 | gold_pred = get_gold_pred_str(output_index, y, args.goal) 258 | total_probs.extend(output_prob) 259 | total_ys.extend(y) 260 | total_gold_pred.extend(gold_pred) 261 | total_annot_ids.extend(annot_ids) 262 | # mrr_val = mrr(total_probs, total_ys) 263 | # print('mrr_value: ', mrr_val) 264 | # pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs}, 265 | # open('./{0:s}.p'.format(args.reload_model_name), "wb")) 266 | # with open('./{0:s}.json'.format(args.reload_model_name), 'w') as f_out: 267 | # output_dict = {} 268 | # for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred): 269 | # output_dict[a_id] = {"gold": gold, "pred": pred} 270 | # json.dump(output_dict, f_out) 271 | eval_str, p, r = get_eval_string(total_gold_pred) 272 | p_and_r.append([p, r]) 273 | print(eval_str) 274 | 275 | np.save(saved_path + '/baseline_pr_dev', p_and_r) 276 | 277 | # for name, dataset in [(test_fname, data_gens[0])]: 278 | # print('Processing... ' + name) 279 | # total_gold_pred = [] 280 | # total_annot_ids = [] 281 | # total_probs = [] 282 | # total_ys = [] 283 | # for batch_num, batch in enumerate(dataset): 284 | # eval_batch, annot_ids = to_torch(batch) 285 | # loss, output_logits = model(eval_batch, args.goal) 286 | # output_index = get_output_index(output_logits) 287 | # output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy() 288 | # y = eval_batch['y'].data.cpu().clone().numpy() 289 | # gold_pred = get_gold_pred_str(output_index, y, args.goal) 290 | # total_probs.extend(output_prob) 291 | # total_ys.extend(y) 292 | # total_gold_pred.extend(gold_pred) 293 | # total_annot_ids.extend(annot_ids) 294 | # mrr_val = mrr(total_probs, total_ys) 295 | # print('mrr_value: ', mrr_val) 296 | # pickle.dump({'gold_id_array': total_ys, 'pred_dist': total_probs}, 297 | # open('./{0:s}.p'.format(args.reload_model_name), "wb")) 298 | # with open('./{0:s}.json'.format(args.reload_model_name), 'w') as f_out: 299 | # output_dict = {} 300 | # for a_id, (gold, pred) in zip(total_annot_ids, total_gold_pred): 301 | # output_dict[a_id] = {"gold": gold, "pred": pred} 302 | # json.dump(output_dict, f_out) 303 | # eval_str = get_eval_string(total_gold_pred) 304 | # print(eval_str) 305 | # logging.info('processing: ' + name) 306 | # logging.info(eval_str) 307 | 308 | if __name__ == '__main__': 309 | config = config_parser.parser.parse_args() 310 | torch.cuda.manual_seed(config.seed) 311 | logging.basicConfig( 312 | filename=constant.EXP_ROOT +"/"+ config.model_id + datetime.datetime.now().strftime("_%m-%d_%H") + config.mode + '.txt', 313 | level=logging.INFO, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%m-%d %H:%M') 314 | logging.info(config) 315 | logger = logging.getLogger() 316 | logger.setLevel(logging.INFO) 317 | 318 | if config.mode == 'train': 319 | _train(config) 320 | elif config.mode == 'test': 321 | _test(config) 322 | else: 323 | raise ValueError("invalid value for 'mode': {}".format(config.mode)) 324 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import gc 3 | import os 4 | import sys 5 | import time 6 | 7 | import torch 8 | 9 | import data_utils 10 | import models 11 | 12 | from data_utils import to_torch 13 | from eval_metric import mrr 14 | from model_utils import get_eval_string 15 | from model_utils import get_gold_pred_str 16 | from model_utils import get_output_index 17 | from model_utils import metric_dicts 18 | from model_utils import fine_grained_eval 19 | from tensorboardX import SummaryWriter 20 | from torch import optim 21 | import numpy as np 22 | import random 23 | from tqdm import tqdm 24 | 25 | sys.path.insert(0, './resources') 26 | import constant 27 | 28 | from config_parser import get_logger 29 | from config_parser import read_args 30 | 31 | from label_corr import build_concurr_matrix 32 | 33 | def get_data_gen(dataname, mode, args, vocab_set, goal, eval_epoch=1): 34 | dataset = data_utils.TypeDataset(constant.FILE_ROOT + dataname, lstm_type=args.lstm_type, 35 | goal=goal, vocab=vocab_set) 36 | if mode == 'train': 37 | data_gen = dataset.get_batch(args.batch_size, args.num_epoch, forever=False, eval_data=False, 38 | simple_mention=not args.enhanced_mention, shuffle=True) 39 | elif mode == 'dev': 40 | if args.goal == 'onto': 41 | eval_batch_size = 2202 42 | else: 43 | eval_batch_size = 1998 44 | data_gen = dataset.get_batch(eval_batch_size, 1, forever=True, eval_data=True, 45 | simple_mention=not args.enhanced_mention) 46 | else: 47 | if args.goal == "onto": 48 | if 'dev' in dataname: 49 | eval_batch_size = 2202 50 | else: 51 | eval_batch_size = 8963 52 | else: 53 | eval_batch_size = 1998 54 | # eval_batch_size = 20 55 | data_gen = dataset.get_batch(eval_batch_size, eval_epoch, forever=False, eval_data=True, 56 | simple_mention=not args.enhanced_mention) 57 | return data_gen 58 | 59 | 60 | def get_joint_datasets(args): 61 | vocab = data_utils.get_vocab(args.embed_source) 62 | train_gen_list = [] 63 | valid_gen_list = [] 64 | if args.mode == 'train': 65 | if not args.remove_open and not args.only_crowd: 66 | train_gen_list.append( 67 | #`("open", get_data_gen('train/open*.json', 'train', args, vocab, "open"))) 68 | ("open", get_data_gen('distant_supervision/headword_train.json', 'train', args, vocab, "open"))) 69 | valid_gen_list.append(("open", get_data_gen('distant_supervision/headword_dev.json', 'dev', args, vocab, "open"))) 70 | if not args.remove_el and not args.only_crowd: 71 | valid_gen_list.append( 72 | ("wiki", 73 | get_data_gen('distant_supervision/el_dev.json', 'dev', args, vocab, "wiki" if args.multitask else "open"))) 74 | train_gen_list.append( 75 | ("wiki", 76 | get_data_gen('distant_supervision/el_train.json', 'train', args, vocab, "wiki" if args.multitask else "open"))) 77 | #get_data_gen('train/el_train.json', 'train', args, vocab, "wiki" if args.multitask else "open"))) 78 | if args.add_crowd or args.only_crowd: 79 | train_gen_list.append( 80 | ("open", get_data_gen('crowd/train_m.json', 'train', args, vocab, "open"))) 81 | crowd_dev_gen = get_data_gen('crowd/dev.json', 'dev', args, vocab, "open") 82 | return train_gen_list, valid_gen_list, crowd_dev_gen 83 | 84 | 85 | def get_datasets(data_lists, args, eval_epoch=1): 86 | data_gen_list = [] 87 | vocab_set = data_utils.get_vocab(args.embed_source) 88 | for dataname, mode, goal in data_lists: 89 | data_gen_list.append(get_data_gen(dataname, mode, args, vocab_set, goal, eval_epoch)) 90 | return data_gen_list 91 | 92 | def _train(args): 93 | logger = get_logger(args) 94 | if args.data_setup == 'joint': 95 | train_gen_list, val_gen_list, crowd_dev_gen = get_joint_datasets(args) 96 | else: 97 | train_fname = args.train_data 98 | dev_fname = args.dev_data 99 | data_gens = get_datasets([(train_fname, 'train', args.goal), 100 | (dev_fname, 'dev', args.goal)], args) 101 | train_gen_list = [(args.goal, data_gens[0])] 102 | val_gen_list = [(args.goal, data_gens[1])] 103 | 104 | 105 | if args.goal == 'onto': 106 | validation_log = SummaryWriter(os.path.join(constant.EXP_ROOT_ONTO, args.model_id, "log", "validation")) 107 | else: 108 | validation_log = SummaryWriter(os.path.join(constant.EXP_ROOT, args.model_id, "log", "validation")) 109 | 110 | model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) 111 | model.cuda() 112 | total_loss = 0 113 | start_time = time.time() 114 | init_time = time.time() 115 | optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) 116 | if args.use_lr_schedule: 117 | scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [1000], gamma=0.1) 118 | 119 | if args.load: 120 | load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model, optimizer) 121 | 122 | best_f1 = 0 123 | logger.info('Start training......') 124 | while True: 125 | model.batch_num += 1 # single batch composed of all train signal passed by. 126 | if args.use_lr_schedule: 127 | scheduler.step() 128 | for (type_name, data_gen) in train_gen_list: 129 | try: 130 | batch = next(data_gen) 131 | batch, _ = to_torch(batch) 132 | except StopIteration: 133 | logger.info(type_name + " finished at " + str(model.batch_num)) 134 | torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, 135 | '{0:s}/{1:s}.pt'.format(constant.EXP_ROOT, args.model_id)) 136 | return 137 | optimizer.zero_grad() 138 | loss, output_logits = model(batch, type_name) 139 | loss.backward() 140 | total_loss += loss.item() 141 | optimizer.step() 142 | 143 | if model.batch_num % args.log_period == 0 and model.batch_num > 0: 144 | gc.collect() 145 | cur_loss = float(1.0 * loss.item()) 146 | elapsed = time.time() - start_time 147 | train_loss_str = ('|loss {0:3f} | at {1:d}step | @ {2:.2f} ms/batch'.format(cur_loss, model.batch_num,elapsed * 1000 / args.log_period)) 148 | start_time = time.time() 149 | logger.info(train_loss_str) 150 | 151 | if model.batch_num % args.eval_period == 0 and model.batch_num > 0: 152 | eval_start = time.time() 153 | logger.info('---- eval at step {0:d} ---'.format(model.batch_num)) 154 | 155 | if args.goal == 'onto': 156 | val_type = "onto" 157 | feed_dict = next(val_gen_list[0][1]) 158 | EXP_ROOT = constant.EXP_ROOT_ONTO 159 | else: 160 | val_type = "open" 161 | feed_dict = next(crowd_dev_gen) 162 | EXP_ROOT = constant.EXP_ROOT 163 | eval_batch, _ = to_torch(feed_dict) 164 | total_eval_loss, gold_preds = evaluate_batch(model.batch_num, eval_batch, model, val_type, args.goal) 165 | eval_result, output_str = metric_dicts(gold_preds) 166 | if args.use_lr_schedule: 167 | scheduler.step(eval_result['ma_f1']) 168 | 169 | if eval_result['ma_f1'] > 0.78 or args.goal == "open": 170 | if eval_result['ma_f1'] > best_f1 or model.batch_num > 10000: 171 | 172 | # added for regularization based baselines 173 | if args.add_regu and model.batch_num < 8000: 174 | break 175 | 176 | if eval_result['ma_f1'] > best_f1: 177 | best_f1 = eval_result['ma_f1'] 178 | save_fname = '{0:s}/{1:s}_{2:f}.pt'.format(EXP_ROOT, args.model_id, eval_result['ma_f1']) 179 | torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, save_fname) 180 | logger.critical( 181 | 'Found best. Total {0:.2f} minutes have passed, saving at {1:s} '.format((time.time() - init_time) / 60, save_fname)) 182 | elif args.goal != "open": 183 | save_fname = '{0:s}/{1:s}_{2:f}.pt'.format(EXP_ROOT, args.model_id, eval_result['ma_f1']) 184 | torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, save_fname) 185 | logger.critical( 186 | 'Found best. Total {0:.2f} minutes have passed, saving at {1:s} '.format((time.time() - init_time) / 60, save_fname)) 187 | 188 | logger.info('eval loss total: ' + str(total_eval_loss)) 189 | logger.info('eval performance: ' + output_str) 190 | validation_log.add_scalar('eval_crowd_loss', total_eval_loss, model.batch_num) 191 | validation_log.add_scalar('eval_crowd_mi_f1', eval_result["f1"], model.batch_num) 192 | validation_log.add_scalar('eval_crowd_ma_f1', eval_result["ma_f1"], model.batch_num) 193 | validation_log.add_scalar('eval_crowd_ma_p', eval_result["ma_precision"], model.batch_num) 194 | validation_log.add_scalar('eval_crowd_ma_recall', eval_result["ma_recall"], model.batch_num) 195 | logger.info('Eval time clipse {}s'.format(time.time() - eval_start)) 196 | 197 | if model.batch_num > args.max_batch: 198 | break 199 | 200 | # Training finished! 201 | torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, 202 | '{0:s}/{1:s}.pt'.format(constant.EXP_ROOT, args.model_id)) 203 | 204 | 205 | def evaluate_batch(batch_num, eval_batch, model, val_type_name, goal): 206 | model.eval() 207 | loss, output_logits = model(eval_batch, val_type_name) 208 | output_index = get_output_index(output_logits) 209 | # eval_loss = loss.data.cpu().clone()[0] 210 | eval_loss = loss.item() 211 | # eval_loss_str = 'Eval loss: {0:.7f} at step {1:d}'.format(eval_loss, model.batch_num) 212 | gold_pred = get_gold_pred_str(output_index, eval_batch['y'].data.cpu().clone(), goal) 213 | # eval_accu = sum([set(y) == set(yp) for y, yp in gold_pred]) * 1.0 / len(gold_pred) 214 | # eval_accus = [set(y) == set(yp) for y, yp in gold_pred] 215 | # tensorboard.add_validation_scalar('eval_acc_' + val_type_name, eval_accu, model.batch_num) 216 | # tensorboard.add_validation_scalar('eval_loss_' + val_type_name, eval_loss, model.batch_num) 217 | # eval_str = get_eval_string(gold_pred) 218 | # print(val_type_name + ":" +eval_loss_str) 219 | # print(gold_pred[:3]) 220 | # print(val_type_name+":"+ eval_str) 221 | # logging.info(val_type_name + ":" + eval_loss_str) 222 | # logging.info(val_type_name +":" + eval_str) 223 | model.train() 224 | return eval_loss, gold_pred 225 | 226 | 227 | def load_model(reload_model_name, save_dir, model_id, model, optimizer=None): 228 | if reload_model_name: 229 | model_file_name = '{0:s}/{1:s}.pt'.format(save_dir, reload_model_name) 230 | else: 231 | model_file_name = '{0:s}/{1:s}.pt'.format(save_dir, model_id) 232 | checkpoint = torch.load(model_file_name) 233 | model.load_state_dict(checkpoint['state_dict']) 234 | if optimizer: 235 | optimizer.load_state_dict(checkpoint['optimizer']) 236 | else: 237 | total_params = 0 238 | # Log params 239 | for k in checkpoint['state_dict']: 240 | elem = checkpoint['state_dict'][k] 241 | param_s = 1 242 | for size_dim in elem.size(): 243 | param_s = size_dim * param_s 244 | print(k, elem.size()) 245 | total_params += param_s 246 | param_str = ('Number of total parameters..{0:d}'.format(total_params)) 247 | print(param_str) 248 | print('Loading model from ... {0:s}'.format(model_file_name)) 249 | 250 | 251 | def visualize(args): 252 | saved_path = constant.EXP_ROOT 253 | model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) 254 | model.cuda() 255 | model.eval() 256 | model.load_state_dict(torch.load(saved_path + '/' + args.model_id + '_best.pt')["state_dict"]) 257 | 258 | label2id = constant.ANS2ID_DICT["open"] 259 | visualize = SummaryWriter("../visualize/" + args.model_id) 260 | # label_list = ["person", "leader", "president", "politician", "organization", "company", "athlete","adult", "male", "man", "television_program", "event"] 261 | label_list = list(label2id.keys()) 262 | ids = [label2id[_] for _ in label_list] 263 | if args.gcn: 264 | # connection_matrix = model.decoder.label_matrix + model.decoder.weight * model.decoder.affinity 265 | connection_matrix = model.decoder.label_matrix + model.decoder.weight * model.decoder.affinity 266 | label_vectors = model.decoder.transform(connection_matrix.mm(model.decoder.linear.weight) / connection_matrix.sum(1, keepdim=True)) 267 | else: 268 | label_vectors = model.decoder.linear.weight.data 269 | 270 | interested_vectors = torch.index_select(label_vectors, 0, torch.tensor(ids).to(torch.device("cuda"))) 271 | visualize.add_embedding(interested_vectors, metadata=label_list, label_img=None) 272 | 273 | def _test(args): 274 | assert args.load 275 | test_fname = args.eval_data 276 | model = models.Model(args, constant.ANSWER_NUM_DICT[args.goal]) 277 | model.cuda() 278 | model.eval() 279 | # load_model(args.reload_model_name, constant.EXP_ROOT, args.model_id, model) 280 | 281 | if args.goal == "onto": 282 | saved_path = constant.EXP_ROOT_ONTO 283 | else: 284 | saved_path = constant.EXP_ROOT 285 | model.load_state_dict(torch.load(saved_path + '/' + args.model_id + '_best.pt')["state_dict"]) 286 | 287 | data_gens = get_datasets([(test_fname, 'test', args.goal)], args, eval_epoch=1) 288 | for name, dataset in [(test_fname, data_gens[0])]: 289 | print('Processing... ' + name) 290 | 291 | batch = next(dataset) 292 | eval_batch, _ = to_torch(batch) 293 | loss, output_logits = model(eval_batch, args.goal) 294 | 295 | threshes = np.arange(0,1,0.02) 296 | # threshes = [0.65, 0.68, 0.7, 0.71] 297 | # threshes = [0.5] 298 | p_and_r = [] 299 | for thresh in tqdm(threshes): 300 | total_gold_pred = [] 301 | total_probs = [] 302 | total_ys = [] 303 | print('\nthresh {}'.format(thresh)) 304 | output_index = get_output_index(output_logits, thresh) 305 | output_prob = model.sigmoid_fn(output_logits).data.cpu().clone().numpy() 306 | y = eval_batch['y'].data.cpu().clone().numpy() 307 | gold_pred = get_gold_pred_str(output_index, y, args.goal) 308 | 309 | total_probs.extend(output_prob) 310 | total_ys.extend(y) 311 | total_gold_pred.extend(gold_pred) 312 | # mrr_val = mrr(total_probs, total_ys) 313 | 314 | # json.dump(gold_pred, open('nomulti_predictions.json', 'w')) 315 | # np.save('y', total_ys) 316 | # np.save('probs', total_probs) 317 | 318 | # print('mrr_value: ', mrr_val) 319 | # result, eval_str = metric_dicts(total_gold_pred) 320 | result, eval_str = fine_grained_eval(total_gold_pred) 321 | 322 | # fine_grained_eval(total_gold_pred) 323 | 324 | p_and_r.append([result["ma_precision"], result["ma_recall"]]) 325 | print(eval_str) 326 | 327 | np.save(saved_path + '/{}_pr_else_dev'.format(args.model_id), p_and_r) 328 | 329 | 330 | if __name__ == '__main__': 331 | config = read_args() 332 | 333 | # fix random seed 334 | np.random.seed(config.seed) 335 | random.seed(config.seed) 336 | torch.cuda.manual_seed(config.seed) 337 | torch.cuda.manual_seed_all(config.seed) 338 | 339 | if config.mode == 'train': 340 | _train(config) 341 | elif config.mode == 'test': 342 | _test(config) 343 | elif config.mode == 'visual': 344 | visualize(config) 345 | else: 346 | raise ValueError("invalid value for 'mode': {}".format(config.mode)) 347 | -------------------------------------------------------------------------------- /inconsistency.sublime-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "auto_complete": 3 | { 4 | "selected_items": 5 | [ 6 | [ 7 | "else", 8 | "else_index_dev\tstatement" 9 | ], 10 | [ 11 | "tru", 12 | "true_prediction\tparam" 13 | ], 14 | [ 15 | "pred", 16 | "predset\tstatement" 17 | ], 18 | [ 19 | "prono", 20 | "pronoun_index_dev\tstatement" 21 | ], 22 | [ 23 | "pre", 24 | "predset\tstatement" 25 | ], 26 | [ 27 | "pron", 28 | "pronoun_probs\tstatement" 29 | ], 30 | [ 31 | "pro", 32 | "pronoun_probs\tstatement" 33 | ], 34 | [ 35 | "type", 36 | "type_\tstatement" 37 | ], 38 | [ 39 | "y", 40 | "y_str_list\tstatement" 41 | ], 42 | [ 43 | "le", 44 | "left_seq\tstatement" 45 | ], 46 | [ 47 | "pers", 48 | "person_id_row\tstatement" 49 | ], 50 | [ 51 | "thre", 52 | "threshes\tstatement" 53 | ], 54 | [ 55 | "op", 56 | "optimizer\tstatement" 57 | ], 58 | [ 59 | "el", 60 | "elmo_proj" 61 | ], 62 | [ 63 | "max_", 64 | "max_seq_length\tstatement" 65 | ], 66 | [ 67 | "we", 68 | "weight_output" 69 | ], 70 | [ 71 | "inte", 72 | "interested_vecttors\tstatement" 73 | ], 74 | [ 75 | "conte", 76 | "context_rep_2\tstatement" 77 | ], 78 | [ 79 | "mention", 80 | "mention_seq\tstatement" 81 | ], 82 | [ 83 | "menti", 84 | "mention_seq\tstatement" 85 | ], 86 | [ 87 | "labe", 88 | "label_vectors\tstatement" 89 | ], 90 | [ 91 | "la", 92 | "label_vectors\tstatement" 93 | ], 94 | [ 95 | "mo", 96 | "model_debug" 97 | ], 98 | [ 99 | "validatio", 100 | "validation_log\tstatement" 101 | ], 102 | [ 103 | "label", 104 | "label_matrix" 105 | ], 106 | [ 107 | "ANS", 108 | "ANSWER_NUM_DICT\tstatement" 109 | ], 110 | [ 111 | "embe", 112 | "embedding" 113 | ], 114 | [ 115 | "open", 116 | "open_id2ans\tstatement" 117 | ], 118 | [ 119 | "concu", 120 | "concurr_labels\tstatement" 121 | ], 122 | [ 123 | "y_", 124 | "y_strs\tstatement" 125 | ], 126 | [ 127 | "out_", 128 | "output_str\tstatement" 129 | ], 130 | [ 131 | "total_e", 132 | "total_eval_loss\tstatement" 133 | ], 134 | [ 135 | "eval_a", 136 | "eval_accus\tstatement" 137 | ], 138 | [ 139 | "extr", 140 | "extractionLines\tfunction" 141 | ], 142 | [ 143 | "data", 144 | "data" 145 | ], 146 | [ 147 | "C", 148 | "C\tstatement" 149 | ], 150 | [ 151 | "hypo", 152 | "hypo_indicator\tstatement" 153 | ], 154 | [ 155 | "from", 156 | "from_numpy\tfunction" 157 | ], 158 | [ 159 | "tu", 160 | "true" 161 | ], 162 | [ 163 | "compl", 164 | "complex_\tstatement" 165 | ], 166 | [ 167 | "ent_embe", 168 | "ent_embeddings" 169 | ], 170 | [ 171 | "dev_", 172 | "dev_tasks_\tstatement" 173 | ], 174 | [ 175 | "for", 176 | "for_filtering\tfunction" 177 | ], 178 | [ 179 | "tripl", 180 | "triples_\tstatement" 181 | ], 182 | [ 183 | "trip", 184 | "triple\tstatement" 185 | ], 186 | [ 187 | "rel2tri", 188 | "rel2triples_\tstatement" 189 | ], 190 | [ 191 | "rel2", 192 | "rel2triples_\tstatement" 193 | ], 194 | [ 195 | "rel_s", 196 | "rel_lines\tstatement" 197 | ], 198 | [ 199 | "readlin", 200 | "readlines\tfunction" 201 | ], 202 | [ 203 | "query_righ", 204 | "query_right_connections\tstatement" 205 | ], 206 | [ 207 | "query_le", 208 | "query_left_connections\tstatement" 209 | ], 210 | [ 211 | "suppo", 212 | "support_right\tstatement" 213 | ], 214 | [ 215 | "symbo", 216 | "symbol_id\tstatement" 217 | ], 218 | [ 219 | "encode", 220 | "encode_path\tfunction" 221 | ], 222 | [ 223 | "support_", 224 | "support_self_attn\tstatement" 225 | ], 226 | [ 227 | "encd", 228 | "encode_path\tfunction" 229 | ], 230 | [ 231 | "pai", 232 | "pair_feature\tfunction" 233 | ], 234 | [ 235 | "test_n", 236 | "test_neg_paths\tstatement" 237 | ], 238 | [ 239 | "Modul", 240 | "ModuleList" 241 | ], 242 | [ 243 | "sub_pa", 244 | "sub_path_2\tstatement" 245 | ], 246 | [ 247 | "sym", 248 | "symbol2id\tparam" 249 | ], 250 | [ 251 | "train_", 252 | "train_tasks" 253 | ], 254 | [ 255 | "symbol2id", 256 | "symbol2ids\tparam" 257 | ], 258 | [ 259 | "ty", 260 | "type_\tstatement" 261 | ], 262 | [ 263 | "all_re", 264 | "all_reason_relation_triples\tstatement" 265 | ], 266 | [ 267 | "symbol", 268 | "symbol2id" 269 | ], 270 | [ 271 | "rel_e", 272 | "rel_embed\tstatement" 273 | ], 274 | [ 275 | "combi", 276 | "combine_vocab\tfunction" 277 | ], 278 | [ 279 | "con", 280 | "conv3\tstatement" 281 | ], 282 | [ 283 | "se", 284 | "symbol_emb\tstatement" 285 | ], 286 | [ 287 | "inpu", 288 | "input_dim\tparam" 289 | ], 290 | [ 291 | "ent", 292 | "ent_emb_np\tstatement" 293 | ], 294 | [ 295 | "di", 296 | "defaultdict\tclass" 297 | ], 298 | [ 299 | "ran", 300 | "ranks_per_rel\tstatement" 301 | ], 302 | [ 303 | "ge", 304 | "get_query_relation" 305 | ], 306 | [ 307 | "tes", 308 | "test_action_idx\tstatement" 309 | ], 310 | [ 311 | "bath", 312 | "batch_size\tstatement" 313 | ], 314 | [ 315 | "batc", 316 | "batch_size\tstatement" 317 | ], 318 | [ 319 | "agr", 320 | "arg_dic\tstatement" 321 | ], 322 | [ 323 | "ar", 324 | "arg_dic\tstatement" 325 | ], 326 | [ 327 | "add", 328 | "addHandler\tfunction" 329 | ], 330 | [ 331 | "forma", 332 | "formatter\tstatement" 333 | ], 334 | [ 335 | "set", 336 | "setFormatter\tfunction" 337 | ], 338 | [ 339 | "nav_err", 340 | "nav_errors_final\tstatement" 341 | ], 342 | [ 343 | "bat", 344 | "batch_size\tstatement" 345 | ], 346 | [ 347 | "path_", 348 | "path_lengths\tstatement" 349 | ], 350 | [ 351 | "entro", 352 | "entropy_num\tstatement" 353 | ], 354 | [ 355 | "en", 356 | "encoder" 357 | ], 358 | [ 359 | "tw", 360 | "tweet_tokens\tstatement" 361 | ], 362 | [ 363 | "out", 364 | "out_path2\tstatement" 365 | ], 366 | [ 367 | "tweet", 368 | "tweets_with_id\tstatement" 369 | ], 370 | [ 371 | "da", 372 | "data_path\tstatement" 373 | ], 374 | [ 375 | "clea", 376 | "clean_doc\tstatement" 377 | ], 378 | [ 379 | "try", 380 | "try\tTry/Except" 381 | ], 382 | [ 383 | "te", 384 | "tweet_extract\tfunction" 385 | ], 386 | [ 387 | "html", 388 | "html_files\tstatement" 389 | ], 390 | [ 391 | "sou", 392 | "sources\tstatement" 393 | ], 394 | [ 395 | "news", 396 | "news_media\tstatement" 397 | ], 398 | [ 399 | "sub_", 400 | "sub_path\tstatement" 401 | ], 402 | [ 403 | "main_", 404 | "main_html\tstatement" 405 | ], 406 | [ 407 | "get", 408 | "get_parser" 409 | ], 410 | [ 411 | "htm", 412 | "html_download\tfunction" 413 | ], 414 | [ 415 | "pa", 416 | "page_no" 417 | ], 418 | [ 419 | "entr", 420 | "entropy_coef" 421 | ], 422 | [ 423 | "distances_smo", 424 | "distances_smoothed_1\tstatement" 425 | ], 426 | [ 427 | "coun", 428 | "count_lfd\tstatement" 429 | ], 430 | [ 431 | "distances_", 432 | "distances_b1" 433 | ], 434 | [ 435 | "distan", 436 | "distance_steps\tstatement" 437 | ], 438 | [ 439 | "base", 440 | "baselines\tstatement" 441 | ], 442 | [ 443 | "repla", 444 | "replay_memory\tstatement" 445 | ], 446 | [ 447 | "dire", 448 | "direction_id\tstatement" 449 | ], 450 | [ 451 | "blo", 452 | "block_id\tstatement" 453 | ], 454 | [ 455 | "exe", 456 | "expert_batch\tstatement" 457 | ], 458 | [ 459 | "num_", 460 | "num_of_neg\tstatement" 461 | ], 462 | [ 463 | "recal", 464 | "recalls\tstatement" 465 | ], 466 | [ 467 | "pos", 468 | "pos_set\tstatement" 469 | ], 470 | [ 471 | "clip", 472 | "clip_value\tparam" 473 | ], 474 | [ 475 | "precis", 476 | "precisions\tstatement" 477 | ], 478 | [ 479 | "episode", 480 | "episode_len\tstatement" 481 | ], 482 | [ 483 | "epis", 484 | "episode_len\tstatement" 485 | ], 486 | [ 487 | "rewa", 488 | "rewards\tstatement" 489 | ], 490 | [ 491 | "num", 492 | "num_epochs" 493 | ], 494 | [ 495 | "rew", 496 | "reward\tstatement" 497 | ], 498 | [ 499 | "poli", 500 | "policy_batch\tstatement" 501 | ], 502 | [ 503 | "dis", 504 | "dist_entropy\tstatement" 505 | ], 506 | [ 507 | "cli", 508 | "clip_grad_norm" 509 | ], 510 | [ 511 | "expert", 512 | "expert_loss\tstatement" 513 | ], 514 | [ 515 | "random", 516 | "random_batch\tstatement" 517 | ] 518 | ] 519 | }, 520 | "buffers": 521 | [ 522 | ], 523 | "build_system": "", 524 | "build_system_choices": 525 | [ 526 | [ 527 | [ 528 | [ 529 | "Packages/C++/C++ Single File.sublime-build", 530 | "" 531 | ], 532 | [ 533 | "Packages/C++/C++ Single File.sublime-build", 534 | "Run" 535 | ] 536 | ], 537 | [ 538 | "Packages/C++/C++ Single File.sublime-build", 539 | "" 540 | ] 541 | ], 542 | [ 543 | [ 544 | [ 545 | "Packages/LaTeXTools/LaTeX.sublime-build", 546 | "" 547 | ], 548 | [ 549 | "Packages/LaTeXTools/LaTeX.sublime-build", 550 | "Traditional" 551 | ], 552 | [ 553 | "Packages/LaTeXTools/LaTeX.sublime-build", 554 | "PdfLaTeX" 555 | ], 556 | [ 557 | "Packages/LaTeXTools/LaTeX.sublime-build", 558 | "XeLaTeX" 559 | ], 560 | [ 561 | "Packages/LaTeXTools/LaTeX.sublime-build", 562 | "LuaLaTeX" 563 | ], 564 | [ 565 | "Packages/LaTeXTools/LaTeX.sublime-build", 566 | "Basic Builder" 567 | ], 568 | [ 569 | "Packages/LaTeXTools/LaTeX.sublime-build", 570 | "Basic Builder - PdfLaTeX" 571 | ], 572 | [ 573 | "Packages/LaTeXTools/LaTeX.sublime-build", 574 | "Basic Builder - XeLaTeX" 575 | ], 576 | [ 577 | "Packages/LaTeXTools/LaTeX.sublime-build", 578 | "Basic Builder - LuaLaTeX" 579 | ], 580 | [ 581 | "Packages/LaTeXTools/LaTeX.sublime-build", 582 | "Script Builder" 583 | ] 584 | ], 585 | [ 586 | "Packages/LaTeXTools/LaTeX.sublime-build", 587 | "XeLaTeX" 588 | ] 589 | ], 590 | [ 591 | [ 592 | [ 593 | "Packages/Python/Python.sublime-build", 594 | "" 595 | ], 596 | [ 597 | "Packages/Python/Python.sublime-build", 598 | "Syntax Check" 599 | ] 600 | ], 601 | [ 602 | "Packages/Python/Python.sublime-build", 603 | "" 604 | ] 605 | ] 606 | ], 607 | "build_varint": "", 608 | "command_palette": 609 | { 610 | "height": 357.0, 611 | "last_filter": "", 612 | "selected_items": 613 | [ 614 | [ 615 | "fix", 616 | "Python Fix Imports" 617 | ], 618 | [ 619 | "sft", 620 | "SFTP: Map to Remote…" 621 | ], 622 | [ 623 | "previ", 624 | "Markdown Preview: Preview in Browser" 625 | ], 626 | [ 627 | "status bar", 628 | "View: Toggle Status Bar" 629 | ], 630 | [ 631 | "remov", 632 | "Package Control: Remove Package" 633 | ], 634 | [ 635 | "sublimelinter", 636 | "Preferences: SublimeLinter Settings" 637 | ], 638 | [ 639 | "jed", 640 | "Project: Refresh Folders" 641 | ], 642 | [ 643 | "ins", 644 | "Package Control: Install Package" 645 | ], 646 | [ 647 | "remo", 648 | "Package Control: Remove Package" 649 | ], 650 | [ 651 | "brows", 652 | "Preferences: Browse Packages" 653 | ], 654 | [ 655 | "CHECK", 656 | "Build With: Python - Syntax Check" 657 | ], 658 | [ 659 | "packagesync", 660 | "PackageSync: Backup/Restore - Restore Packages From Folder" 661 | ], 662 | [ 663 | "makd", 664 | "Set Syntax: Markdown" 665 | ], 666 | [ 667 | "install", 668 | "Package Control: Install Package" 669 | ], 670 | [ 671 | "s", 672 | "SFTP: Browse Server…" 673 | ], 674 | [ 675 | "to tab", 676 | "Indentation: Convert to Tabs" 677 | ], 678 | [ 679 | "to space", 680 | "Indentation: Convert to Spaces" 681 | ], 682 | [ 683 | "ssy html", 684 | "Set Syntax: HTML" 685 | ], 686 | [ 687 | "tab", 688 | "Indentation: Convert to Tabs" 689 | ], 690 | [ 691 | "inde", 692 | "Indentation: Convert to Tabs" 693 | ], 694 | [ 695 | "inden", 696 | "Indentation: Convert to Tabs" 697 | ], 698 | [ 699 | "set syntax h", 700 | "Set Syntax: HTML" 701 | ], 702 | [ 703 | "set syn", 704 | "Set Syntax: YAML" 705 | ], 706 | [ 707 | "PRE", 708 | "Markdown Preview: Preview in Browser" 709 | ], 710 | [ 711 | "pre", 712 | "Markdown Preview: Preview in Browser" 713 | ], 714 | [ 715 | "re", 716 | "Package Control: Remove Package" 717 | ], 718 | [ 719 | "rem", 720 | "Package Control: Remove Package" 721 | ], 722 | [ 723 | "ssy py", 724 | "Set Syntax: Python" 725 | ], 726 | [ 727 | "remove pa", 728 | "Package Control: Remove Package" 729 | ], 730 | [ 731 | "remi", 732 | "Package Control: Remove Repository" 733 | ], 734 | [ 735 | "i", 736 | "Package Control: Install Package" 737 | ], 738 | [ 739 | "pr", 740 | "Markdown Preview: Preview in Browser" 741 | ], 742 | [ 743 | "preview", 744 | "Markdown Preview: Preview in Browser" 745 | ], 746 | [ 747 | "ssy latex", 748 | "Set Syntax: LaTeX" 749 | ], 750 | [ 751 | "Pla", 752 | "Set Syntax: Plain Text" 753 | ], 754 | [ 755 | "mark", 756 | "Set Syntax: Markdown Extended" 757 | ], 758 | [ 759 | "lat", 760 | "Set Syntax: LaTeX" 761 | ], 762 | [ 763 | "pla", 764 | "Set Syntax: Plain Text" 765 | ], 766 | [ 767 | "pyth", 768 | "Set Syntax: Python" 769 | ], 770 | [ 771 | "ssy late", 772 | "Set Syntax: LaTeX" 773 | ], 774 | [ 775 | "br", 776 | "Preferences: Browse Packages" 777 | ], 778 | [ 779 | "brow", 780 | "Preferences: Browse Packages" 781 | ], 782 | [ 783 | "py", 784 | "Set Syntax: Python" 785 | ], 786 | [ 787 | "pytho", 788 | "Set Syntax: Python" 789 | ], 790 | [ 791 | "close ", 792 | "File: Close All" 793 | ], 794 | [ 795 | "box", 796 | "Boxy Theme: Activation" 797 | ], 798 | [ 799 | "Box", 800 | "Boxy Theme: Activation" 801 | ], 802 | [ 803 | "satis", 804 | "Package Control: Satisfy Dependencies" 805 | ], 806 | [ 807 | "ssy ma", 808 | "Set Syntax: Markdown Extended" 809 | ], 810 | [ 811 | "instal", 812 | "Package Control: Install Package" 813 | ], 814 | [ 815 | "ssy c++", 816 | "Set Syntax: C++" 817 | ], 818 | [ 819 | "sf", 820 | "SFTP: Setup Server…" 821 | ], 822 | [ 823 | "sftp", 824 | "SFTP: Edit Remote Mapping…" 825 | ], 826 | [ 827 | "ssy pytho", 828 | "Set Syntax: Python" 829 | ], 830 | [ 831 | "remove", 832 | "Package Control: Remove Package" 833 | ], 834 | [ 835 | "pac", 836 | "Install Package Control" 837 | ] 838 | ], 839 | "width": 462.0 840 | }, 841 | "console": 842 | { 843 | "height": 0.0, 844 | "history": 845 | [ 846 | ] 847 | }, 848 | "distraction_free": 849 | { 850 | "menu_visible": true, 851 | "show_minimap": false, 852 | "show_open_files": false, 853 | "show_tabs": false, 854 | "side_bar_visible": false, 855 | "status_bar_visible": false 856 | }, 857 | "expanded_folders": 858 | [ 859 | ], 860 | "file_history": 861 | [ 862 | "/Users/wenhan/Remote/inconsistency/label_corr.py", 863 | "/Users/wenhan/Remote/inconsistency/model_utils.py", 864 | "/Users/wenhan/Remote/inconsistency/eval_metric.py", 865 | "/Users/wenhan/Remote/inconsistency/README.md", 866 | "/Users/wenhan/Remote/inconsistency/run_onto.sh", 867 | "/Users/wenhan/Remote/inconsistency/run_gcn_onto.sh", 868 | "/Users/wenhan/Remote/inconsistency/resources/constant.py", 869 | "/Users/wenhan/Remote/inconsistency/models.py", 870 | "/Users/wenhan/Remote/inconsistency/analysis.py", 871 | "/Users/wenhan/Remote/inconsistency/run_label_gcn.sh", 872 | "/Users/wenhan/Remote/inconsistency/config_parser.py", 873 | "/Users/wenhan/Remote/inconsistency/data_utils.py", 874 | "/Users/wenhan/Remote/inconsistency/main.py", 875 | "/Users/wenhan/Remote/inconsistency/scorer.py", 876 | "/Users/wenhan/Remote/inconsistency/baseline.py", 877 | "/Users/wenhan/Desktop/tmp.txt", 878 | "/Users/wenhan/Desktop/pr_base.txt", 879 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1543355686/nlp/home/derenlei/open_type/main.py", 880 | "/Users/wenhan/Documents/CV/cv-us.tex", 881 | "/Users/wenhan/Web/index.html", 882 | "/Users/wenhan/Remote/inconsistency/attention.py", 883 | "/Users/wenhan/Remote/inconsistency/run_regu.sh", 884 | "/Users/wenhan/Remote/inconsistency/sftp-config.json", 885 | "/Users/wenhan/Remote/inconsistency/annotate.py", 886 | "/Users/wenhan/Remote/inconsistency/da.py", 887 | "/Users/wenhan/Remote/inconsistency/train.py", 888 | "/Users/wenhan/Remote/inconsistency/data_loader.py", 889 | "/Users/wenhan/Remote/inconsistency/swagaf/swag_loader.py", 890 | "/Users/wenhan/Remote/inconsistency/args.py", 891 | "/Users/wenhan/Remote/inconsistency/inconsistency.sublime-project", 892 | "/Users/wenhan/Library/Application Support/Sublime Text 3/Packages/Anaconda/Anaconda.sublime-settings", 893 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1539588211/mapped/mnt/hdd/xwhan/projects/inconsistency/data_loader.py", 894 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1539589171/mapped/mnt/hdd/xwhan/projects/inconsistency/args.py", 895 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1539596622/mapped/mnt/hdd/xwhan/projects/inconsistency/train.py", 896 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1539588983/mapped/mnt/hdd/xwhan/projects/inconsistency/da.py", 897 | "/Users/wenhan/Remote/inconsistency/swagaf/dataset_reader.py", 898 | "/Users/wenhan/Remote/inconsistency/distinguish.py", 899 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1539071606/mapped/mnt/hdd/xwhan/projects/inconsistency/swagaf/swag_loader.py", 900 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1539068335/mapped/mnt/hdd/xwhan/projects/inconsistency/genome.py", 901 | "/Users/wenhan/remote/one-shot/modules.py", 902 | "/Users/wenhan/Library/Application Support/Sublime Text 3/Packages/SFTP/SFTP.sublime-settings", 903 | "/Users/wenhan/remote/one-shot/data_loader.py", 904 | "/Users/wenhan/remote/one-shot/matcher.py", 905 | "/Users/wenhan/remote/one-shot/sftp-config.json", 906 | "/Users/wenhan/Downloads/orig_squad2/squad2/train.py", 907 | "/Users/wenhan/Downloads/backup/model/dcn.py", 908 | "/Users/wenhan/remote/one-shot/one_shot.sublime-project", 909 | "/Users/wenhan/Library/Application Support/Sublime Text 3/Packages/Jedi - Python autocompletion/Default.sublime-keymap", 910 | "/Users/wenhan/Library/Application Support/Sublime Text 3/Packages/Jedi - Python autocompletion/sublime_jedi.sublime-settings", 911 | "/Users/wenhan/remote/one-shot/args.py", 912 | "/Users/wenhan/remote/one-shot/data.py", 913 | "/Users/wenhan/remote/one-shot/trainer.py", 914 | "/Users/wenhan/remote/one-shot/README.md", 915 | "/Users/wenhan/remote/one-shot/.gitignore", 916 | "/Users/wenhan/Documents/Notes/BookNotes/jacob_nlp.md", 917 | "/Eisenstein NLP Notes.md", 918 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1532971611/nlp/mnt/hdd/xwhan/DrQA/scripts/reader/train.py", 919 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1532970680/nlp/mnt/hdd/xwhan/DrQA/scripts/reader/preprocess.py", 920 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1532988958/nlp/mnt/hdd/xwhan/DrQA/drqa/reader/utils.py", 921 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1532995074/nlp/home/xwhan/squad/annotate.py", 922 | "/Users/wenhan/remote/DrQA/sftp-config.json", 923 | "/Users/wenhan/web/docs/inde", 924 | "/Users/wenhan/web/index.html", 925 | "/Users/wenhan/s_ppo_plot/plot.py", 926 | "/Users/wenhan/codebases/plot/functions.py", 927 | "/Users/wenhan/codebases/plot/template.py", 928 | "/Users/wenhan/Downloads/scheduled-policy-optimization (5) 2/ijcai18.tex", 929 | "/Users/wenhan/Downloads/scheduled-policy-optimization (5) 2/ijcai18.bib", 930 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1527535498/george/mnt/sshd/xwhan/Matterport3DSimulator/tasks/R2R/opts.py", 931 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1527535438/george/mnt/sshd/xwhan/Matterport3DSimulator/tasks/R2R/log_utils.py", 932 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1527535339/george/mnt/sshd/xwhan/Matterport3DSimulator/tasks/R2R/train_model_agent.py", 933 | "/Users/wenhan/Downloads/FormattingGuidelines2018 2/ijcai18.tex", 934 | "/Users/wenhan/Plotdata/plot.py", 935 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1526714393/nlp/mnt/hdd/xwhan/Fast-TransX/eval.py", 936 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1526011586/nlp/mnt/hdd/xwhan/OpenKE/extract.py", 937 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1526719062/nlp/mnt/hdd/xwhan/Fast-TransX/build.py", 938 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1526429769/nlp/mnt/hdd/xwhan/OpenKE/extract_from_model.py", 939 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1526011574/nlp/mnt/hdd/xwhan/OpenKE/unify.py", 940 | "/Users/wenhan/Downloads/DistMult_NELL.result", 941 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525929149/nlp/mnt/hdd/xwhan/OpenKE/distmult_train.py", 942 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525939573/nlp/mnt/hdd/xwhan/OpenKE/transE_train.py", 943 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525939685/nlp/mnt/hdd/xwhan/OpenKE/transE_eval.py", 944 | "/Users/wenhan/Downloads/mnet_nell.result", 945 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525940857/nlp/mnt/hdd/xwhan/OpenKE/complex_train.py", 946 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525930382/nlp/mnt/hdd/xwhan/OpenKE/complex_eval.py", 947 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1526010657/nlp/mnt/hdd/xwhan/OpenKE/unify.py", 948 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525934972/nlp/mnt/hdd/xwhan/OpenKE/distmult_eval.py", 949 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525930561/nlp/mnt/hdd/xwhan/OpenKE/complex_train.py", 950 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525906774/nlp/mnt/hdd/xwhan/OpenKE/example_train_transe.py", 951 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525667733/nlp/mnt/hdd/xwhan/one_shot/grapher.py", 952 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525626267/nlp/mnt/hdd/xwhan/Fast-TransX/transE/transE.cpp", 953 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525627880/nlp/mnt/hdd/xwhan/Fast-TransX/eval.py", 954 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525626912/nlp/mnt/hdd/xwhan/Fast-TransX/build.py", 955 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525558837/nlp/mnt/hdd/xwhan/one_shot/data_loader.py", 956 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525471880/nlp/mnt/hdd/xwhan/one_shot/Wikidata/process.py", 957 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525547225/nlp/home/xwhan/projects/TweetRC/results/eval_all.py", 958 | "/private/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/54a11f44-baf1-437d-a55e-60cd4fcb670b/home/xwhan/projects/TweetRC/results/eval_all.py", 959 | "/private/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/54a11f44-baf1-437d-a55e-60cd4fcb670b/home/xwhan/data/TweetQA/evaluate.py", 960 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1525335918/nlp/mnt/hdd/xwhan/KBs/Wikidata/process.py", 961 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524973092/nlp/mnt/hdd/xwhan/one_shot/grapher.py", 962 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524552374/nlp/mnt/hdd/xwhan/one_shot/data_loader.py", 963 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524523855/nlp/mnt/hdd/xwhan/one_shot/matcher.py", 964 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524524624/nlp/mnt/hdd/xwhan/one_shot/modules.py", 965 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524523876/nlp/mnt/hdd/xwhan/one_shot/trainer.py", 966 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524610190/nlp/mnt/hdd/xwhan/one_shot/data.py", 967 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524523700/nlp/mnt/hdd/xwhan/one_shot/args.py", 968 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524552491/nlp/mnt/hdd/xwhan/one_shot/grapher.py", 969 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524523983/nlp/mnt/hdd/xwhan/one_shot/utils.py", 970 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524523811/nlp/mnt/hdd/xwhan/one_shot/train.py", 971 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524523663/nlp/mnt/hdd/xwhan/one_shot/matching.py", 972 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524099786/nlp/home/xwhan/projects/lifelong/one_shot/args.py", 973 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524120006/nlp/home/xwhan/projects/lifelong/one_shot/train.py", 974 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523998689/nlp/home/xwhan/projects/lifelong/one_shot/matching.py", 975 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523998677/nlp/home/xwhan/projects/lifelong/one_shot/modules.py", 976 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523740211/nlp/home/xwhan/projects/lifelong/one_shot/utils.py", 977 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524189898/nlp/home/xwhan/projects/lifelong/one_shot/check_path.py", 978 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523952746/nlp/home/xwhan/projects/lifelong/one_shot/data.py", 979 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1524099738/nlp/home/xwhan/projects/lifelong/one_shot/options.py", 980 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523740024/nlp/home/xwhan/projects/lifelong/one_shot/matching.py", 981 | "/Users/wenhan/web/custom.css", 982 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523740049/nlp/home/xwhan/projects/lifelong/one_shot/agent.py", 983 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523740163/nlp/home/xwhan/projects/lifelong/one_shot/path.py", 984 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523262715/nlp/home/xwhan/projects/lifelong/one_shot/distill.py", 985 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523233290/nlp/home/xwhan/projects/lifelong/one_shot/meta.py", 986 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523230487/nlp/home/xwhan/projects/lifelong/one_shot/agent.py", 987 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523232312/nlp/home/xwhan/projects/lifelong/one_shot/options.py", 988 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523230512/nlp/home/xwhan/projects/lifelong/one_shot/train.py", 989 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1523294369/nlp/home/xwhan/projects/lifelong/one_shot/env.py" 990 | ], 991 | "find": 992 | { 993 | "height": 48.0 994 | }, 995 | "find_in_files": 996 | { 997 | "height": 132.0, 998 | "where_history": 999 | [ 1000 | ] 1001 | }, 1002 | "find_state": 1003 | { 1004 | "case_sensitive": true, 1005 | "find_history": 1006 | [ 1007 | "probs", 1008 | "json", 1009 | "token_seq", 1010 | "model_", 1011 | "model", 1012 | "model_id", 1013 | "add_regu", 1014 | "add_regy", 1015 | "add_regu", 1016 | "':", 1017 | "regu", 1018 | "max_b", 1019 | "save", 1020 | "use_elmo", 1021 | "use_", 1022 | "elmo", 1023 | "max_mention", 1024 | "token_bio_mask", 1025 | "max_span_chars", 1026 | "max_span_chars ", 1027 | "mention_span", 1028 | "fusion", 1029 | "val_gen_list", 1030 | "head_", 1031 | "add", 1032 | "type", 1033 | "goal", 1034 | "get_vocab", 1035 | "batch_num", 1036 | "vocab", 1037 | "get_vocab", 1038 | "load_embedding_dict", 1039 | "batch_num", 1040 | "total_eval_loss", 1041 | "mask", 1042 | "eval_loss", 1043 | "word2id", 1044 | "save", 1045 | "logger", 1046 | "loggerG", 1047 | "logger", 1048 | "logging", 1049 | "crowd_dev_gen", 1050 | "eval_batch", 1051 | "evaluate", 1052 | "@", 1053 | "logging", 1054 | "loss", 1055 | "logging", 1056 | "eval", 1057 | "torch.save", 1058 | "Eval", 1059 | "TensorboardWriter", 1060 | "evaluate_batch", 1061 | "eval_batch", 1062 | "gc", 1063 | "notes", 1064 | "onto", 1065 | "next", 1066 | "F.", 1067 | "tanh", 1068 | "sigmoid", 1069 | "save", 1070 | "sentence", 1071 | "question", 1072 | "feat", 1073 | "csv", 1074 | "fine", 1075 | "fine_tune", 1076 | "tanh", 1077 | "embed_path", 1078 | "loggin", 1079 | "embed", 1080 | "eval", 1081 | "rel2candidates_all", 1082 | "tasks", 1083 | "cleaned", 1084 | "candidates_rel", 1085 | "candidates", 1086 | "meta", 1087 | "attn", 1088 | "n_head", 1089 | "writer", 1090 | "tqdm", 1091 | "logging", 1092 | "AVG", 1093 | "loss", 1094 | "BATCH", 1095 | "AVG", 1096 | "loss", 1097 | "logging", 1098 | "data", 1099 | "data[0]", 1100 | "self.drop", 1101 | "meta", 1102 | "normal", 1103 | "norm", 1104 | "normal", 1105 | "self.log", 1106 | "FIX", 1107 | "prin", 1108 | "args.", 1109 | "id", 1110 | "summary", 1111 | "writer", 1112 | "id", 1113 | "args.id", 1114 | "logger", 1115 | "anaconda_", 1116 | "anaconda_lin", 1117 | "linting", 1118 | "pe", 1119 | "pep", 1120 | "pep8", 1121 | "pep" 1122 | ], 1123 | "highlight": true, 1124 | "in_selection": false, 1125 | "preserve_case": false, 1126 | "regex": false, 1127 | "replace_history": 1128 | [ 1129 | ], 1130 | "reverse": false, 1131 | "show_context": true, 1132 | "use_buffer2": true, 1133 | "whole_word": false, 1134 | "wrap": false 1135 | }, 1136 | "groups": 1137 | [ 1138 | { 1139 | "sheets": 1140 | [ 1141 | ] 1142 | } 1143 | ], 1144 | "incremental_find": 1145 | { 1146 | "height": 23.0 1147 | }, 1148 | "input": 1149 | { 1150 | "height": 39.0 1151 | }, 1152 | "layout": 1153 | { 1154 | "cells": 1155 | [ 1156 | [ 1157 | 0, 1158 | 0, 1159 | 1, 1160 | 1 1161 | ] 1162 | ], 1163 | "cols": 1164 | [ 1165 | 0.0, 1166 | 1.0 1167 | ], 1168 | "rows": 1169 | [ 1170 | 0.0, 1171 | 1.0 1172 | ] 1173 | }, 1174 | "menu_visible": true, 1175 | "output.SublimeLinter": 1176 | { 1177 | "height": 0.0 1178 | }, 1179 | "output.SublimeLinter Messages": 1180 | { 1181 | "height": 94.0 1182 | }, 1183 | "output.exec": 1184 | { 1185 | "height": 130.0 1186 | }, 1187 | "output.find_results": 1188 | { 1189 | "height": 0.0 1190 | }, 1191 | "output.latextools": 1192 | { 1193 | "height": 112.0 1194 | }, 1195 | "output.markdown": 1196 | { 1197 | "height": 100.0 1198 | }, 1199 | "output.mdpopups": 1200 | { 1201 | "height": 0.0 1202 | }, 1203 | "output.sftp": 1204 | { 1205 | "height": 112.0 1206 | }, 1207 | "pinned_build_system": "", 1208 | "project": "inconsistency.sublime-project", 1209 | "replace": 1210 | { 1211 | "height": 42.0 1212 | }, 1213 | "save_all_on_build": true, 1214 | "select_file": 1215 | { 1216 | "height": 0.0, 1217 | "last_filter": "", 1218 | "selected_items": 1219 | [ 1220 | [ 1221 | "rea", 1222 | "README.md" 1223 | ], 1224 | [ 1225 | "con", 1226 | "config_parser.py" 1227 | ], 1228 | [ 1229 | "cons", 1230 | "resources/constant.py" 1231 | ], 1232 | [ 1233 | "mode", 1234 | "model_utils.py" 1235 | ], 1236 | [ 1237 | "metr", 1238 | "eval_metric.py" 1239 | ], 1240 | [ 1241 | "ev", 1242 | "eval_metric.py" 1243 | ], 1244 | [ 1245 | "e", 1246 | "eval_metric.py" 1247 | ], 1248 | [ 1249 | "ma", 1250 | "main.py" 1251 | ], 1252 | [ 1253 | "da", 1254 | "data_utils.py" 1255 | ], 1256 | [ 1257 | "labe", 1258 | "label_corr.py" 1259 | ], 1260 | [ 1261 | "lab", 1262 | "label_corr.py" 1263 | ], 1264 | [ 1265 | "mod", 1266 | "models.py" 1267 | ], 1268 | [ 1269 | "sco", 1270 | "scorer.py" 1271 | ], 1272 | [ 1273 | "mo", 1274 | "model_utils.py" 1275 | ], 1276 | [ 1277 | "atte", 1278 | "attention.py" 1279 | ], 1280 | [ 1281 | "co", 1282 | "config_parser.py" 1283 | ], 1284 | [ 1285 | "data", 1286 | "data_utils.py" 1287 | ], 1288 | [ 1289 | "model", 1290 | "model_utils.py" 1291 | ], 1292 | [ 1293 | "confi", 1294 | "config_parser.py" 1295 | ], 1296 | [ 1297 | "dat", 1298 | "data_utils.py" 1299 | ], 1300 | [ 1301 | "mai", 1302 | "main.py" 1303 | ], 1304 | [ 1305 | "tra", 1306 | "trainer.py" 1307 | ], 1308 | [ 1309 | "", 1310 | "Package Control Messages" 1311 | ], 1312 | [ 1313 | "pol", 1314 | "/var/folders/x0/64q0_xj50950b8s4j24mc2wr0000gn/T/sublime-sftp-browse-1490309444/nlp/home/xwhan/RL_KB/data/FB15k-237/policy_1.sh" 1315 | ], 1316 | [ 1317 | "d", 1318 | "~/course/cs170-private/labs/include/dllist.h" 1319 | ], 1320 | [ 1321 | "my", 1322 | "infrastructure/my_malloc/my_malloc.c" 1323 | ] 1324 | ], 1325 | "width": 0.0 1326 | }, 1327 | "select_project": 1328 | { 1329 | "height": 500.0, 1330 | "last_filter": "con", 1331 | "selected_items": 1332 | [ 1333 | [ 1334 | "con", 1335 | "~/Remote/continual/continual.sublime-project" 1336 | ], 1337 | [ 1338 | "", 1339 | "~/Remote/continual/continual.sublime-project" 1340 | ], 1341 | [ 1342 | "incon", 1343 | "~/Remote/inconsistency/inconsistency.sublime-project" 1344 | ], 1345 | [ 1346 | "in", 1347 | "~/Remote/continual/continual.sublime-project" 1348 | ], 1349 | [ 1350 | "conti\t", 1351 | "~/Remote/continual/continual.sublime-project" 1352 | ], 1353 | [ 1354 | "inc", 1355 | "~/Remote/inconsistency/inconsistency.sublime-project" 1356 | ] 1357 | ], 1358 | "width": 380.0 1359 | }, 1360 | "select_symbol": 1361 | { 1362 | "height": 157.0, 1363 | "last_filter": "", 1364 | "selected_items": 1365 | [ 1366 | [ 1367 | "", 1368 | "DFS" 1369 | ] 1370 | ], 1371 | "width": 374.0 1372 | }, 1373 | "selected_group": 0, 1374 | "settings": 1375 | { 1376 | }, 1377 | "show_minimap": true, 1378 | "show_open_files": true, 1379 | "show_tabs": true, 1380 | "side_bar_visible": true, 1381 | "side_bar_width": 280.0, 1382 | "status_bar_visible": true, 1383 | "template_settings": 1384 | { 1385 | } 1386 | } 1387 | --------------------------------------------------------------------------------