├── barchybrid └── src │ ├── utils │ ├── evaluation_script │ │ ├── weights.clas │ │ └── conll17_ud_eval.py │ └── eval.pl │ ├── utils.py │ ├── parser.py │ └── arc_hybrid.py ├── bmstparser └── src │ ├── utils │ └── evaluation_script │ │ ├── weights.clas │ │ └── conll17_ud_eval.py │ ├── utils.py │ ├── decoder.py │ ├── parser.py │ └── mstlstm.py ├── README.md └── LICENSE /barchybrid/src/utils/evaluation_script/weights.clas: -------------------------------------------------------------------------------- 1 | # Relations used to attach function words to content words 2 | aux 0.1 3 | case 0.1 4 | cc 0.1 5 | clf 0.1 6 | cop 0.1 7 | det 0.1 8 | mark 0.1 9 | 10 | # Punctuation 11 | punct 0 12 | -------------------------------------------------------------------------------- /bmstparser/src/utils/evaluation_script/weights.clas: -------------------------------------------------------------------------------- 1 | # Relations used to attach function words to content words 2 | aux 0.1 3 | case 0.1 4 | cc 0.1 5 | clf 0.1 6 | cop 0.1 7 | det 0.1 8 | mark 0.1 9 | 10 | # Punctuation 11 | punct 0 12 | -------------------------------------------------------------------------------- /bmstparser/src/utils.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import re 3 | 4 | 5 | class ConllEntry: 6 | def __init__(self, id, form, lemma, pos, cpos, feats=None, parent_id=None, relation=None, deps=None, misc=None): 7 | self.id = id 8 | self.form = form 9 | self.norm = normalize(form) 10 | self.cpos = cpos.upper() 11 | self.pos = pos.upper() 12 | self.parent_id = parent_id 13 | self.relation = relation 14 | 15 | self.lemma = lemma 16 | self.feats = feats 17 | self.deps = deps 18 | self.misc = misc 19 | 20 | self.pred_parent_id = None 21 | self.pred_relation = None 22 | 23 | def __str__(self): 24 | values = [str(self.id), self.form, self.lemma, self.cpos, self.pos, self.feats, str(self.pred_parent_id) if self.pred_parent_id is not None else None, self.pred_relation, self.deps, self.misc] 25 | return '\t'.join(['_' if v is None else v for v in values]) 26 | 27 | 28 | def vocab(conll_path): 29 | wordsCount = Counter() 30 | posCount = Counter() 31 | relCount = Counter() 32 | 33 | with open(conll_path, 'r') as conllFP: 34 | for sentence in read_conll(conllFP): 35 | wordsCount.update([node.norm for node in sentence if isinstance(node, ConllEntry)]) 36 | posCount.update([node.pos for node in sentence if isinstance(node, ConllEntry)]) 37 | relCount.update([node.relation for node in sentence if isinstance(node, ConllEntry)]) 38 | 39 | return (wordsCount, {w: i for i, w in enumerate(wordsCount.keys())}, posCount.keys(), relCount.keys()) 40 | 41 | 42 | def read_conll(fh): 43 | root = ConllEntry(0, '*root*', '*root*', 'ROOT-POS', 'ROOT-CPOS', '_', -1, 'rroot', '_', '_') 44 | tokens = [root] 45 | for line in fh: 46 | tok = line.strip().split('\t') 47 | if not tok or line.strip() == '': 48 | if len(tokens)>1: yield tokens 49 | tokens = [root] 50 | else: 51 | if line[0] == '#' or '-' in tok[0] or '.' in tok[0]: 52 | tokens.append(line.strip()) 53 | else: 54 | tokens.append(ConllEntry(int(tok[0]), tok[1], tok[2], tok[4], tok[3], tok[5], int(tok[6]) if tok[6] != '_' else -1, tok[7], tok[8], tok[9])) 55 | if len(tokens) > 1: 56 | yield tokens 57 | 58 | 59 | def write_conll(fn, conll_gen): 60 | with open(fn, 'w') as fh: 61 | for sentence in conll_gen: 62 | for entry in sentence[1:]: 63 | fh.write(str(entry) + '\n') 64 | fh.write('\n') 65 | 66 | 67 | numberRegex = re.compile("[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+"); 68 | def normalize(word): 69 | return 'NUM' if numberRegex.match(word) else word.lower() 70 | 71 | -------------------------------------------------------------------------------- /bmstparser/src/decoder.py: -------------------------------------------------------------------------------- 1 | # This file contains routines from Lisbon Machine Learning summer school. 2 | # The code is freely distributed under a MIT license. https://github.com/LxMLS/lxmls-toolkit/ 3 | 4 | import numpy as np 5 | import sys 6 | from collections import defaultdict, namedtuple 7 | from operator import itemgetter 8 | 9 | 10 | def parse_proj(scores, gold=None): 11 | ''' 12 | Parse using Eisner's algorithm. 13 | ''' 14 | nr, nc = np.shape(scores) 15 | if nr != nc: 16 | raise ValueError("scores must be a squared matrix with nw+1 rows") 17 | 18 | N = nr - 1 # Number of words (excluding root). 19 | 20 | # Initialize CKY table. 21 | complete = np.zeros([N+1, N+1, 2]) # s, t, direction (right=1). 22 | incomplete = np.zeros([N+1, N+1, 2]) # s, t, direction (right=1). 23 | complete_backtrack = -np.ones([N+1, N+1, 2], dtype=int) # s, t, direction (right=1). 24 | incomplete_backtrack = -np.ones([N+1, N+1, 2], dtype=int) # s, t, direction (right=1). 25 | 26 | incomplete[0, :, 0] -= np.inf 27 | 28 | # Loop from smaller items to larger items. 29 | for k in xrange(1,N+1): 30 | for s in xrange(N-k+1): 31 | t = s+k 32 | 33 | # First, create incomplete items. 34 | # left tree 35 | incomplete_vals0 = complete[s, s:t, 1] + complete[(s+1):(t+1), t, 0] + scores[t, s] + (0.0 if gold is not None and gold[s]==t else 1.0) 36 | incomplete[s, t, 0] = np.max(incomplete_vals0) 37 | incomplete_backtrack[s, t, 0] = s + np.argmax(incomplete_vals0) 38 | # right tree 39 | incomplete_vals1 = complete[s, s:t, 1] + complete[(s+1):(t+1), t, 0] + scores[s, t] + (0.0 if gold is not None and gold[t]==s else 1.0) 40 | incomplete[s, t, 1] = np.max(incomplete_vals1) 41 | incomplete_backtrack[s, t, 1] = s + np.argmax(incomplete_vals1) 42 | 43 | # Second, create complete items. 44 | # left tree 45 | complete_vals0 = complete[s, s:t, 0] + incomplete[s:t, t, 0] 46 | complete[s, t, 0] = np.max(complete_vals0) 47 | complete_backtrack[s, t, 0] = s + np.argmax(complete_vals0) 48 | # right tree 49 | complete_vals1 = incomplete[s, (s+1):(t+1), 1] + complete[(s+1):(t+1), t, 1] 50 | complete[s, t, 1] = np.max(complete_vals1) 51 | complete_backtrack[s, t, 1] = s + 1 + np.argmax(complete_vals1) 52 | 53 | value = complete[0][N][1] 54 | heads = [-1 for _ in range(N+1)] #-np.ones(N+1, dtype=int) 55 | backtrack_eisner(incomplete_backtrack, complete_backtrack, 0, N, 1, 1, heads) 56 | 57 | value_proj = 0.0 58 | for m in xrange(1,N+1): 59 | h = heads[m] 60 | value_proj += scores[h,m] 61 | 62 | return heads 63 | 64 | 65 | def backtrack_eisner(incomplete_backtrack, complete_backtrack, s, t, direction, complete, heads): 66 | ''' 67 | Backtracking step in Eisner's algorithm. 68 | - incomplete_backtrack is a (NW+1)-by-(NW+1) numpy array indexed by a start position, 69 | an end position, and a direction flag (0 means left, 1 means right). This array contains 70 | the arg-maxes of each step in the Eisner algorithm when building *incomplete* spans. 71 | - complete_backtrack is a (NW+1)-by-(NW+1) numpy array indexed by a start position, 72 | an end position, and a direction flag (0 means left, 1 means right). This array contains 73 | the arg-maxes of each step in the Eisner algorithm when building *complete* spans. 74 | - s is the current start of the span 75 | - t is the current end of the span 76 | - direction is 0 (left attachment) or 1 (right attachment) 77 | - complete is 1 if the current span is complete, and 0 otherwise 78 | - heads is a (NW+1)-sized numpy array of integers which is a placeholder for storing the 79 | head of each word. 80 | ''' 81 | if s == t: 82 | return 83 | if complete: 84 | r = complete_backtrack[s][t][direction] 85 | if direction == 0: 86 | backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 0, 1, heads) 87 | backtrack_eisner(incomplete_backtrack, complete_backtrack, r, t, 0, 0, heads) 88 | return 89 | else: 90 | backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 0, heads) 91 | backtrack_eisner(incomplete_backtrack, complete_backtrack, r, t, 1, 1, heads) 92 | return 93 | else: 94 | r = incomplete_backtrack[s][t][direction] 95 | if direction == 0: 96 | heads[s] = t 97 | backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 1, heads) 98 | backtrack_eisner(incomplete_backtrack, complete_backtrack, r+1, t, 0, 1, heads) 99 | return 100 | else: 101 | heads[t] = s 102 | backtrack_eisner(incomplete_backtrack, complete_backtrack, s, r, 1, 1, heads) 103 | backtrack_eisner(incomplete_backtrack, complete_backtrack, r+1, t, 0, 1, heads) 104 | return 105 | 106 | -------------------------------------------------------------------------------- /bmstparser/src/parser.py: -------------------------------------------------------------------------------- 1 | from optparse import OptionParser 2 | import pickle, utils, mstlstm, os, os.path, time 3 | 4 | 5 | if __name__ == '__main__': 6 | parser = OptionParser() 7 | parser.add_option("--train", dest="conll_train", help="Annotated CONLL train file", metavar="FILE", default="../data/en-universal-train.conll.ptb") 8 | parser.add_option("--dev", dest="conll_dev", help="Annotated CONLL dev file", metavar="FILE", default="../data/en-universal-dev.conll.ptb") 9 | parser.add_option("--test", dest="conll_test", help="Annotated CONLL test file", metavar="FILE", default="../data/en-universal-test.conll.ptb") 10 | parser.add_option("--extrn", dest="external_embedding", help="External embeddings", metavar="FILE") 11 | parser.add_option("--params", dest="params", help="Parameters file", metavar="FILE", default="params.pickle") 12 | parser.add_option("--model", dest="model", help="Load/Save model file", metavar="FILE", default="neuralfirstorder.model") 13 | parser.add_option("--wembedding", type="int", dest="wembedding_dims", default=100) 14 | parser.add_option("--pembedding", type="int", dest="pembedding_dims", default=25) 15 | parser.add_option("--rembedding", type="int", dest="rembedding_dims", default=25) 16 | parser.add_option("--epochs", type="int", dest="epochs", default=30) 17 | parser.add_option("--hidden", type="int", dest="hidden_units", default=100) 18 | parser.add_option("--hidden2", type="int", dest="hidden2_units", default=0) 19 | parser.add_option("--lr", type="float", dest="learning_rate", default=0.1) 20 | parser.add_option("--outdir", type="string", dest="output", default="results") 21 | parser.add_option("--activation", type="string", dest="activation", default="tanh") 22 | parser.add_option("--lstmlayers", type="int", dest="lstm_layers", default=2) 23 | parser.add_option("--lstmdims", type="int", dest="lstm_dims", default=125) 24 | parser.add_option("--disableblstm", action="store_false", dest="blstmFlag", default=True) 25 | parser.add_option("--disablelabels", action="store_false", dest="labelsFlag", default=True) 26 | parser.add_option("--predict", action="store_true", dest="predictFlag", default=False) 27 | parser.add_option("--bibi-lstm", action="store_true", dest="bibiFlag", default=False) 28 | parser.add_option("--disablecostaug", action="store_false", dest="costaugFlag", default=True) 29 | parser.add_option("--dynet-seed", type="int", dest="seed", default=0) 30 | parser.add_option("--dynet-mem", type="int", dest="mem", default=0) 31 | 32 | (options, args) = parser.parse_args() 33 | 34 | print 'Using external embedding:', options.external_embedding 35 | 36 | if options.predictFlag: 37 | with open(options.params, 'r') as paramsfp: 38 | words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) 39 | 40 | stored_opt.external_embedding = options.external_embedding 41 | 42 | print 'Initializing lstm mstparser:' 43 | parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, stored_opt) 44 | 45 | parser.Load(options.model) 46 | conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu') 47 | tespath = os.path.join(options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu') 48 | 49 | ts = time.time() 50 | test_res = list(parser.Predict(options.conll_test)) 51 | te = time.time() 52 | print 'Finished predicting test.', te-ts, 'seconds.' 53 | utils.write_conll(tespath, test_res) 54 | 55 | if not conllu: 56 | os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt') 57 | else: 58 | os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + testpath + '.txt') 59 | else: 60 | print 'Preparing vocab' 61 | words, w2i, pos, rels = utils.vocab(options.conll_train) 62 | 63 | with open(os.path.join(options.output, options.params), 'w') as paramsfp: 64 | pickle.dump((words, w2i, pos, rels, options), paramsfp) 65 | print 'Finished collecting vocab' 66 | 67 | print 'Initializing lstm mstparser:' 68 | parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, options) 69 | 70 | for epoch in xrange(options.epochs): 71 | print 'Starting epoch', epoch 72 | parser.Train(options.conll_train) 73 | conllu = (os.path.splitext(options.conll_dev.lower())[1] == '.conllu') 74 | devpath = os.path.join(options.output, 'dev_epoch_' + str(epoch+1) + ('.conll' if not conllu else '.conllu')) 75 | utils.write_conll(devpath, parser.Predict(options.conll_dev)) 76 | parser.Save(os.path.join(options.output, os.path.basename(options.model) + str(epoch+1))) 77 | 78 | if not conllu: 79 | os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') 80 | else: 81 | os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt') 82 | 83 | -------------------------------------------------------------------------------- /barchybrid/src/utils.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | import re 3 | 4 | 5 | 6 | class ConllEntry: 7 | def __init__(self, id, form, lemma, pos, cpos, feats=None, parent_id=None, relation=None, deps=None, misc=None): 8 | self.id = id 9 | self.form = form 10 | self.norm = normalize(form) 11 | self.cpos = cpos.upper() 12 | self.pos = pos.upper() 13 | self.parent_id = parent_id 14 | self.relation = relation 15 | 16 | self.lemma = lemma 17 | self.feats = feats 18 | self.deps = deps 19 | self.misc = misc 20 | 21 | self.pred_parent_id = None 22 | self.pred_relation = None 23 | 24 | def __str__(self): 25 | values = [str(self.id), self.form, self.lemma, self.cpos, self.pos, self.feats, str(self.pred_parent_id) if self.pred_parent_id is not None else None, self.pred_relation, self.deps, self.misc] 26 | return '\t'.join(['_' if v is None else v for v in values]) 27 | 28 | 29 | class ParseForest: 30 | def __init__(self, sentence): 31 | self.roots = list(sentence) 32 | 33 | for root in self.roots: 34 | root.children = [] 35 | root.scores = None 36 | root.parent = None 37 | root.pred_parent_id = 0 # None 38 | root.pred_relation = 'rroot' # None 39 | root.vecs = None 40 | root.lstms = None 41 | 42 | def __len__(self): 43 | return len(self.roots) 44 | 45 | 46 | def Attach(self, parent_index, child_index): 47 | parent = self.roots[parent_index] 48 | child = self.roots[child_index] 49 | 50 | child.pred_parent_id = parent.id 51 | del self.roots[child_index] 52 | 53 | 54 | def isProj(sentence): 55 | forest = ParseForest(sentence) 56 | unassigned = {entry.id: sum([1 for pentry in sentence if pentry.parent_id == entry.id]) for entry in sentence} 57 | 58 | for _ in xrange(len(sentence)): 59 | for i in xrange(len(forest.roots) - 1): 60 | if forest.roots[i].parent_id == forest.roots[i+1].id and unassigned[forest.roots[i].id] == 0: 61 | unassigned[forest.roots[i+1].id]-=1 62 | forest.Attach(i+1, i) 63 | break 64 | if forest.roots[i+1].parent_id == forest.roots[i].id and unassigned[forest.roots[i+1].id] == 0: 65 | unassigned[forest.roots[i].id]-=1 66 | forest.Attach(i, i+1) 67 | break 68 | 69 | return len(forest.roots) == 1 70 | 71 | 72 | def vocab(conll_path): 73 | wordsCount = Counter() 74 | posCount = Counter() 75 | relCount = Counter() 76 | 77 | with open(conll_path, 'r') as conllFP: 78 | for sentence in read_conll(conllFP, True): 79 | wordsCount.update([node.norm for node in sentence if isinstance(node, ConllEntry)]) 80 | posCount.update([node.pos for node in sentence if isinstance(node, ConllEntry)]) 81 | relCount.update([node.relation for node in sentence if isinstance(node, ConllEntry)]) 82 | 83 | return (wordsCount, {w: i for i, w in enumerate(wordsCount.keys())}, posCount.keys(), relCount.keys()) 84 | 85 | 86 | def read_conll(fh, proj): 87 | dropped = 0 88 | read = 0 89 | root = ConllEntry(0, '*root*', '*root*', 'ROOT-POS', 'ROOT-CPOS', '_', -1, 'rroot', '_', '_') 90 | tokens = [root] 91 | for line in fh: 92 | tok = line.strip().split('\t') 93 | if not tok or line.strip() == '': 94 | if len(tokens)>1: 95 | if not proj or isProj([t for t in tokens if isinstance(t, ConllEntry)]): 96 | yield tokens 97 | else: 98 | #print 'Non-projective sentence dropped' 99 | dropped += 1 100 | read += 1 101 | tokens = [root] 102 | else: 103 | if line[0] == '#' or '-' in tok[0] or '.' in tok[0]: 104 | tokens.append(line.strip()) 105 | else: 106 | tokens.append(ConllEntry(int(tok[0]), tok[1], tok[2], tok[4], tok[3], tok[5], int(tok[6]) if tok[6] != '_' else -1, tok[7], tok[8], tok[9])) 107 | if len(tokens) > 1: 108 | yield tokens 109 | 110 | print dropped, 'dropped non-projective sentences.' 111 | print read, 'sentences read.' 112 | 113 | 114 | def write_conll(fn, conll_gen): 115 | with open(fn, 'w') as fh: 116 | for sentence in conll_gen: 117 | for entry in sentence[1:]: 118 | fh.write(str(entry) + '\n') 119 | fh.write('\n') 120 | 121 | 122 | numberRegex = re.compile("[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+"); 123 | def normalize(word): 124 | return 'NUM' if numberRegex.match(word) else word.lower() 125 | 126 | cposTable = {"PRP$": "PRON", "VBG": "VERB", "VBD": "VERB", "VBN": "VERB", ",": ".", "''": ".", "VBP": "VERB", "WDT": "DET", "JJ": "ADJ", "WP": "PRON", "VBZ": "VERB", 127 | "DT": "DET", "#": ".", "RP": "PRT", "$": ".", "NN": "NOUN", ")": ".", "(": ".", "FW": "X", "POS": "PRT", ".": ".", "TO": "PRT", "PRP": "PRON", "RB": "ADV", 128 | ":": ".", "NNS": "NOUN", "NNP": "NOUN", "``": ".", "WRB": "ADV", "CC": "CONJ", "LS": "X", "PDT": "DET", "RBS": "ADV", "RBR": "ADV", "CD": "NUM", "EX": "DET", 129 | "IN": "ADP", "WP$": "PRON", "MD": "VERB", "NNPS": "NOUN", "JJS": "ADJ", "JJR": "ADJ", "SYM": "X", "VB": "VERB", "UH": "X", "ROOT-POS": "ROOT-CPOS", 130 | "-LRB-": ".", "-RRB-": "."} 131 | -------------------------------------------------------------------------------- /barchybrid/src/parser.py: -------------------------------------------------------------------------------- 1 | from optparse import OptionParser 2 | from arc_hybrid import ArcHybridLSTM 3 | import pickle, utils, os, time, sys 4 | 5 | if __name__ == '__main__': 6 | parser = OptionParser() 7 | parser.add_option("--train", dest="conll_train", help="Annotated CONLL train file", metavar="FILE", default="../data/PTB_SD_3_3_0/train.conll") 8 | parser.add_option("--dev", dest="conll_dev", help="Annotated CONLL dev file", metavar="FILE", default="../data/PTB_SD_3_3_0/dev.conll") 9 | parser.add_option("--test", dest="conll_test", help="Annotated CONLL test file", metavar="FILE", default="../data/PTB_SD_3_3_0/test.conll") 10 | parser.add_option("--params", dest="params", help="Parameters file", metavar="FILE", default="params.pickle") 11 | parser.add_option("--extrn", dest="external_embedding", help="External embeddings", metavar="FILE") 12 | parser.add_option("--model", dest="model", help="Load/Save model file", metavar="FILE", default="barchybrid.model") 13 | parser.add_option("--wembedding", type="int", dest="wembedding_dims", default=100) 14 | parser.add_option("--pembedding", type="int", dest="pembedding_dims", default=25) 15 | parser.add_option("--rembedding", type="int", dest="rembedding_dims", default=25) 16 | parser.add_option("--epochs", type="int", dest="epochs", default=30) 17 | parser.add_option("--hidden", type="int", dest="hidden_units", default=100) 18 | parser.add_option("--hidden2", type="int", dest="hidden2_units", default=0) 19 | parser.add_option("--k", type="int", dest="window", default=3) 20 | parser.add_option("--lr", type="float", dest="learning_rate", default=0.1) 21 | parser.add_option("--outdir", type="string", dest="output", default="results") 22 | parser.add_option("--activation", type="string", dest="activation", default="tanh") 23 | parser.add_option("--lstmlayers", type="int", dest="lstm_layers", default=2) 24 | parser.add_option("--lstmdims", type="int", dest="lstm_dims", default=200) 25 | parser.add_option("--dynet-seed", type="int", dest="seed", default=7) 26 | parser.add_option("--disableoracle", action="store_false", dest="oracle", default=True) 27 | parser.add_option("--disableblstm", action="store_false", dest="blstmFlag", default=True) 28 | parser.add_option("--bibi-lstm", action="store_true", dest="bibiFlag", default=False) 29 | parser.add_option("--usehead", action="store_true", dest="headFlag", default=False) 30 | parser.add_option("--userlmost", action="store_true", dest="rlFlag", default=False) 31 | parser.add_option("--userl", action="store_true", dest="rlMostFlag", default=False) 32 | parser.add_option("--predict", action="store_true", dest="predictFlag", default=False) 33 | parser.add_option("--dynet-mem", type="int", dest="cnn_mem", default=512) 34 | 35 | (options, args) = parser.parse_args() 36 | print 'Using external embedding:', options.external_embedding 37 | 38 | if not options.predictFlag: 39 | if not (options.rlFlag or options.rlMostFlag or options.headFlag): 40 | print 'You must use either --userlmost or --userl or --usehead (you can use multiple)' 41 | sys.exit() 42 | 43 | print 'Preparing vocab' 44 | words, w2i, pos, rels = utils.vocab(options.conll_train) 45 | 46 | with open(os.path.join(options.output, options.params), 'w') as paramsfp: 47 | pickle.dump((words, w2i, pos, rels, options), paramsfp) 48 | print 'Finished collecting vocab' 49 | 50 | print 'Initializing blstm arc hybrid:' 51 | parser = ArcHybridLSTM(words, pos, rels, w2i, options) 52 | 53 | for epoch in xrange(options.epochs): 54 | print 'Starting epoch', epoch 55 | parser.Train(options.conll_train) 56 | conllu = (os.path.splitext(options.conll_dev.lower())[1] == '.conllu') 57 | devpath = os.path.join(options.output, 'dev_epoch_' + str(epoch+1) + ('.conll' if not conllu else '.conllu')) 58 | utils.write_conll(devpath, parser.Predict(options.conll_dev)) 59 | 60 | if not conllu: 61 | os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') 62 | else: 63 | os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt') 64 | 65 | print 'Finished predicting dev' 66 | parser.Save(os.path.join(options.output, options.model + str(epoch+1))) 67 | else: 68 | with open(options.params, 'r') as paramsfp: 69 | words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) 70 | 71 | stored_opt.external_embedding = options.external_embedding 72 | 73 | parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt) 74 | parser.Load(options.model) 75 | conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu') 76 | tespath = os.path.join(options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu') 77 | ts = time.time() 78 | pred = list(parser.Predict(options.conll_test)) 79 | te = time.time() 80 | utils.write_conll(tespath, pred) 81 | 82 | if not conllu: 83 | os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt') 84 | else: 85 | os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + testpath + '.txt') 86 | 87 | print 'Finished predicting test',te-ts 88 | 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BIST Parsers 2 | ## Graph & Transition based dependency parsers using BiLSTM feature extractors. 3 | 4 | The techniques behind the parser are described in the paper [Simple and Accurate Dependency Parsing Using Bidirectional LSTM Feature Representations](https://www.transacl.org/ojs/index.php/tacl/article/viewFile/885/198). Futher materials could be found [here](http://elki.cc/#/article/Simple%20and%20Accurate%20Dependency%20Parsing%20Using%20Bidirectional%20LSTM%20Feature%20Representations). 5 | 6 | #### Required software 7 | 8 | * Python 2.7 interpreter 9 | * [DyNet library](https://github.com/clab/dynet/tree/master/python) 10 | 11 | #### Train a parsing model 12 | 13 | The software requires having a `training.conll` and `development.conll` files formatted according to the [CoNLL data format](http://ilk.uvt.nl/conll/#dataformat). 14 | For the faster graph-based parser change directory to `bmstparser` (1200 words/sec), and for the more accurate transition-based parser change directory to `barchybrid` (800 word/sec). The benchmark was performed on a Mac book pro with i7 processor. The graph-based parser acheives an accuracy of 93.8 UAS and the transition-based parser an accuracy of 94.7 UAS on the standard Penn Treebank dataset (Standford Dependencies). The transition-based parser requires no part-of-speech tagging and setting all the tags to NN will produce the expected accuracy. The model and param files achieving those scores are available for download ([Graph-based model](https://www.dropbox.com/sh/v9cbshnmb36km6v/AADgBS9hb9vy0o-UBZW9AbbKa/bestfirstorder.tar.gz?dl=0), [Transition-based model](https://www.dropbox.com/sh/v9cbshnmb36km6v/AACEPp3DLQeJnRA_QyPmll93a/bestarchybrid.tar.gz?dl=0)). The trained models include improvements beyond those described in the paper, to be published soon. 15 | 16 | To train a parsing model with for either parsing architecture type the following at the command prompt: 17 | 18 | python src/parser.py --dynet-seed 123456789 [--dynet-mem XXXX] --outdir [results directory] --train training.conll --dev development.conll --epochs 30 --lstmdims 125 --lstmlayers 2 [--extrn extrn.vectors] --bibi-lstm 19 | 20 | We use the same external embedding used in [Transition-Based Dependency Parsing with Stack Long Short-Term Memory](http://arxiv.org/abs/1505.08075) which can be downloaded from the authors [github repository](https://github.com/clab/lstm-parser/) and [directly here](https://drive.google.com/file/d/0B8nESzOdPhLsdWF2S1Ayb1RkTXc/view?usp=sharing). 21 | 22 | If you are training a transition-based parser then for optimal results you should add the following to the command prompt `--k 3 --usehead --userl`. These switch will set the stack to 3 elements; use the BiLSTM of the head of trees on the stack as feature vectors; and add the BiLSTM of the right/leftmost children to the feature vectors. 23 | 24 | Note 1: You can run it without pos embeddings by setting the pos embedding dimensions to zero (--pembedding 0). 25 | 26 | Note 2: The reported test result is the one matching the highest development score. 27 | 28 | Note 3: The parser calculates (after each iteration) the accuracies excluding punctuation symbols by running the `eval.pl` script from the CoNLL-X Shared Task and stores the results in directory specified by the `--outdir`. 29 | 30 | Note 4: The external embeddings parameter is optional and better not used when train/predicting a graph-based model. 31 | 32 | #### Parse data with your parsing model 33 | 34 | The command for parsing a `test.conll` file formatted according to the [CoNLL data format](http://ilk.uvt.nl/conll/#dataformat) with a previously trained model is: 35 | 36 | python src/parser.py --predict --outdir [results directory] --test test.conll [--extrn extrn.vectors] --model [trained model file] --params [param file generate during training] 37 | 38 | The parser will store the resulting conll file in the out directory (`--outdir`). 39 | 40 | Note 1: If you are using the arc-hybrid trained model we provided please use the `--extrn` flag and specify the location of the external embeddings file. 41 | 42 | Note 2: If you are using the first-order trained model we provided please do not use the `--extrn` flag. 43 | 44 | #### Citation 45 | 46 | If you make use of this software for research purposes, we'll appreciate citing the following: 47 | 48 | @article{DBLP:journals/tacl/KiperwasserG16, 49 | author = {Eliyahu Kiperwasser and Yoav Goldberg}, 50 | title = {Simple and Accurate Dependency Parsing Using Bidirectional {LSTM} 51 | Feature Representations}, 52 | journal = {{TACL}}, 53 | volume = {4}, 54 | pages = {313--327}, 55 | year = {2016}, 56 | url = {https://transacl.org/ojs/index.php/tacl/article/view/885}, 57 | timestamp = {Tue, 09 Aug 2016 14:51:09 +0200}, 58 | biburl = {http://dblp.uni-trier.de/rec/bib/journals/tacl/KiperwasserG16}, 59 | bibsource = {dblp computer science bibliography, http://dblp.org} 60 | } 61 | 62 | #### Forks 63 | 64 | [BIST-PyTorch](https://github.com/wddabc/bist-parser): A PyTorch implementation of the BIST Parsers (for graph based parser only). 65 | 66 | [BIST-COVINGTON](https://github.com/aghie/LyS-FASTPARSE): A neural implementation of the Covington's algorithm for non-projective dependency parsing. It extends the original BIST transition-based a greedy parser by including a dynamic oracle for non-projective parsing to mitigate error propagation. 67 | 68 | [Uppsala Parser](https://github.com/UppsalaNLP/uuparser): A transition-based parser for Universal Dependencies with BiLSTM word and character representations. 69 | 70 | #### License 71 | 72 | This software is released under the terms of the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). 73 | 74 | #### Contact 75 | 76 | For questions and usage issues, please contact elikip@gmail.com 77 | 78 | #### Credits 79 | 80 | [Eliyahu Kiperwasser](http://elki.cc) 81 | 82 | [Yoav Goldberg](https://www.cs.bgu.ac.il/~yoavg/uni/) 83 | 84 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /bmstparser/src/mstlstm.py: -------------------------------------------------------------------------------- 1 | from dynet import * 2 | from utils import read_conll, write_conll 3 | from operator import itemgetter 4 | import utils, time, random, decoder 5 | import numpy as np 6 | 7 | 8 | class MSTParserLSTM: 9 | def __init__(self, vocab, pos, rels, w2i, options): 10 | self.model = Model() 11 | random.seed(1) 12 | self.trainer = AdamTrainer(self.model) 13 | 14 | self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))} 15 | self.activation = self.activations[options.activation] 16 | 17 | self.blstmFlag = options.blstmFlag 18 | self.labelsFlag = options.labelsFlag 19 | self.costaugFlag = options.costaugFlag 20 | self.bibiFlag = options.bibiFlag 21 | 22 | self.ldims = options.lstm_dims 23 | self.wdims = options.wembedding_dims 24 | self.pdims = options.pembedding_dims 25 | self.rdims = options.rembedding_dims 26 | self.layers = options.lstm_layers 27 | self.wordsCount = vocab 28 | self.vocab = {word: ind+3 for word, ind in w2i.iteritems()} 29 | self.pos = {word: ind+3 for ind, word in enumerate(pos)} 30 | self.rels = {word: ind for ind, word in enumerate(rels)} 31 | self.irels = rels 32 | 33 | 34 | self.external_embedding, self.edim = None, 0 35 | if options.external_embedding is not None: 36 | external_embedding_fp = open(options.external_embedding,'r') 37 | external_embedding_fp.readline() 38 | self.external_embedding = {line.split(' ')[0] : [float(f) for f in line.strip().split(' ')[1:]] for line in external_embedding_fp} 39 | external_embedding_fp.close() 40 | 41 | self.edim = len(self.external_embedding.values()[0]) 42 | self.noextrn = [0.0 for _ in xrange(self.edim)] 43 | self.extrnd = {word: i + 3 for i, word in enumerate(self.external_embedding)} 44 | self.elookup = self.model.add_lookup_parameters((len(self.external_embedding) + 3, self.edim)) 45 | for word, i in self.extrnd.iteritems(): 46 | self.elookup.init_row(i, self.external_embedding[word]) 47 | self.extrnd['*PAD*'] = 1 48 | self.extrnd['*INITIAL*'] = 2 49 | 50 | print 'Load external embedding. Vector dimensions', self.edim 51 | 52 | if self.bibiFlag: 53 | self.builders = [VanillaLSTMBuilder(1, self.wdims + self.pdims + self.edim, self.ldims, self.model), 54 | VanillaLSTMBuilder(1, self.wdims + self.pdims + self.edim, self.ldims, self.model)] 55 | self.bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), 56 | VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] 57 | elif self.layers > 0: 58 | self.builders = [VanillaLSTMBuilder(self.layers, self.wdims + self.pdims + self.edim, self.ldims, self.model), 59 | VanillaLSTMBuilder(self.layers, self.wdims + self.pdims + self.edim, self.ldims, self.model)] 60 | else: 61 | self.builders = [SimpleRNNBuilder(1, self.wdims + self.pdims + self.edim, self.ldims, self.model), 62 | SimpleRNNBuilder(1, self.wdims + self.pdims + self.edim, self.ldims, self.model)] 63 | 64 | self.hidden_units = options.hidden_units 65 | self.hidden2_units = options.hidden2_units 66 | 67 | self.vocab['*PAD*'] = 1 68 | self.pos['*PAD*'] = 1 69 | 70 | self.vocab['*INITIAL*'] = 2 71 | self.pos['*INITIAL*'] = 2 72 | 73 | self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims)) 74 | self.plookup = self.model.add_lookup_parameters((len(pos) + 3, self.pdims)) 75 | self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims)) 76 | 77 | self.hidLayerFOH = self.model.add_parameters((self.hidden_units, self.ldims * 2)) 78 | self.hidLayerFOM = self.model.add_parameters((self.hidden_units, self.ldims * 2)) 79 | self.hidBias = self.model.add_parameters((self.hidden_units)) 80 | 81 | self.hid2Layer = self.model.add_parameters((self.hidden2_units, self.hidden_units)) 82 | self.hid2Bias = self.model.add_parameters((self.hidden2_units)) 83 | 84 | self.outLayer = self.model.add_parameters((1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) 85 | 86 | if self.labelsFlag: 87 | self.rhidLayerFOH = self.model.add_parameters((self.hidden_units, 2 * self.ldims)) 88 | self.rhidLayerFOM = self.model.add_parameters((self.hidden_units, 2 * self.ldims)) 89 | self.rhidBias = self.model.add_parameters((self.hidden_units)) 90 | 91 | self.rhid2Layer = self.model.add_parameters((self.hidden2_units, self.hidden_units)) 92 | self.rhid2Bias = self.model.add_parameters((self.hidden2_units)) 93 | 94 | self.routLayer = self.model.add_parameters((len(self.irels), self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) 95 | self.routBias = self.model.add_parameters((len(self.irels))) 96 | 97 | 98 | def __getExpr(self, sentence, i, j, train): 99 | 100 | if sentence[i].headfov is None: 101 | sentence[i].headfov = self.hidLayerFOH.expr() * concatenate([sentence[i].lstms[0], sentence[i].lstms[1]]) 102 | if sentence[j].modfov is None: 103 | sentence[j].modfov = self.hidLayerFOM.expr() * concatenate([sentence[j].lstms[0], sentence[j].lstms[1]]) 104 | 105 | if self.hidden2_units > 0: 106 | output = self.outLayer.expr() * self.activation(self.hid2Bias.expr() + self.hid2Layer.expr() * self.activation(sentence[i].headfov + sentence[j].modfov + self.hidBias.expr())) # + self.outBias 107 | else: 108 | output = self.outLayer.expr() * self.activation(sentence[i].headfov + sentence[j].modfov + self.hidBias.expr()) # + self.outBias 109 | 110 | return output 111 | 112 | 113 | def __evaluate(self, sentence, train): 114 | exprs = [ [self.__getExpr(sentence, i, j, train) for j in xrange(len(sentence))] for i in xrange(len(sentence)) ] 115 | scores = np.array([ [output.scalar_value() for output in exprsRow] for exprsRow in exprs ]) 116 | 117 | return scores, exprs 118 | 119 | 120 | def __evaluateLabel(self, sentence, i, j): 121 | if sentence[i].rheadfov is None: 122 | sentence[i].rheadfov = self.rhidLayerFOH.expr() * concatenate([sentence[i].lstms[0], sentence[i].lstms[1]]) 123 | if sentence[j].rmodfov is None: 124 | sentence[j].rmodfov = self.rhidLayerFOM.expr() * concatenate([sentence[j].lstms[0], sentence[j].lstms[1]]) 125 | 126 | if self.hidden2_units > 0: 127 | output = self.routLayer.expr() * self.activation(self.rhid2Bias.expr() + self.rhid2Layer.expr() * self.activation(sentence[i].rheadfov + sentence[j].rmodfov + self.rhidBias.expr())) + self.routBias.expr() 128 | else: 129 | output = self.routLayer.expr() * self.activation(sentence[i].rheadfov + sentence[j].rmodfov + self.rhidBias.expr()) + self.routBias.expr() 130 | 131 | return output.value(), output 132 | 133 | 134 | def Save(self, filename): 135 | self.model.save(filename) 136 | 137 | 138 | def Load(self, filename): 139 | self.model.load(filename) 140 | 141 | 142 | def Predict(self, conll_path): 143 | with open(conll_path, 'r') as conllFP: 144 | for iSentence, sentence in enumerate(read_conll(conllFP)): 145 | conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)] 146 | 147 | for entry in conll_sentence: 148 | wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0))] if self.wdims > 0 else None 149 | posvec = self.plookup[int(self.pos[entry.pos])] if self.pdims > 0 else None 150 | evec = self.elookup[int(self.extrnd.get(entry.form, self.extrnd.get(entry.norm, 0)))] if self.external_embedding is not None else None 151 | entry.vec = concatenate(filter(None, [wordvec, posvec, evec])) 152 | 153 | entry.lstms = [entry.vec, entry.vec] 154 | entry.headfov = None 155 | entry.modfov = None 156 | 157 | entry.rheadfov = None 158 | entry.rmodfov = None 159 | 160 | if self.blstmFlag: 161 | lstm_forward = self.builders[0].initial_state() 162 | lstm_backward = self.builders[1].initial_state() 163 | 164 | for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): 165 | lstm_forward = lstm_forward.add_input(entry.vec) 166 | lstm_backward = lstm_backward.add_input(rentry.vec) 167 | 168 | entry.lstms[1] = lstm_forward.output() 169 | rentry.lstms[0] = lstm_backward.output() 170 | 171 | if self.bibiFlag: 172 | for entry in conll_sentence: 173 | entry.vec = concatenate(entry.lstms) 174 | 175 | blstm_forward = self.bbuilders[0].initial_state() 176 | blstm_backward = self.bbuilders[1].initial_state() 177 | 178 | for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): 179 | blstm_forward = blstm_forward.add_input(entry.vec) 180 | blstm_backward = blstm_backward.add_input(rentry.vec) 181 | 182 | entry.lstms[1] = blstm_forward.output() 183 | rentry.lstms[0] = blstm_backward.output() 184 | 185 | scores, exprs = self.__evaluate(conll_sentence, True) 186 | heads = decoder.parse_proj(scores) 187 | 188 | for entry, head in zip(conll_sentence, heads): 189 | entry.pred_parent_id = head 190 | entry.pred_relation = '_' 191 | 192 | dump = False 193 | 194 | if self.labelsFlag: 195 | for modifier, head in enumerate(heads[1:]): 196 | scores, exprs = self.__evaluateLabel(conll_sentence, head, modifier+1) 197 | conll_sentence[modifier+1].pred_relation = self.irels[max(enumerate(scores), key=itemgetter(1))[0]] 198 | 199 | renew_cg() 200 | if not dump: 201 | yield sentence 202 | 203 | 204 | def Train(self, conll_path): 205 | errors = 0 206 | batch = 0 207 | eloss = 0.0 208 | mloss = 0.0 209 | eerrors = 0 210 | etotal = 0 211 | start = time.time() 212 | 213 | with open(conll_path, 'r') as conllFP: 214 | shuffledData = list(read_conll(conllFP)) 215 | random.shuffle(shuffledData) 216 | 217 | errs = [] 218 | lerrs = [] 219 | eeloss = 0.0 220 | 221 | for iSentence, sentence in enumerate(shuffledData): 222 | if iSentence % 100 == 0 and iSentence != 0: 223 | print 'Processing sentence number:', iSentence, 'Loss:', eloss / etotal, 'Errors:', (float(eerrors)) / etotal, 'Time', time.time()-start 224 | start = time.time() 225 | eerrors = 0 226 | eloss = 0.0 227 | etotal = 0 228 | lerrors = 0 229 | ltotal = 0 230 | 231 | conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)] 232 | 233 | for entry in conll_sentence: 234 | c = float(self.wordsCount.get(entry.norm, 0)) 235 | dropFlag = (random.random() < (c/(0.25+c))) 236 | wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0)) if dropFlag else 0] if self.wdims > 0 else None 237 | posvec = self.plookup[int(self.pos[entry.pos])] if self.pdims > 0 else None 238 | evec = None 239 | 240 | if self.external_embedding is not None: 241 | evec = self.elookup[self.extrnd.get(entry.form, self.extrnd.get(entry.norm, 0)) if (dropFlag or (random.random() < 0.5)) else 0] 242 | entry.vec = concatenate(filter(None, [wordvec, posvec, evec])) 243 | 244 | entry.lstms = [entry.vec, entry.vec] 245 | entry.headfov = None 246 | entry.modfov = None 247 | 248 | entry.rheadfov = None 249 | entry.rmodfov = None 250 | 251 | if self.blstmFlag: 252 | lstm_forward = self.builders[0].initial_state() 253 | lstm_backward = self.builders[1].initial_state() 254 | 255 | for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): 256 | lstm_forward = lstm_forward.add_input(entry.vec) 257 | lstm_backward = lstm_backward.add_input(rentry.vec) 258 | 259 | entry.lstms[1] = lstm_forward.output() 260 | rentry.lstms[0] = lstm_backward.output() 261 | 262 | if self.bibiFlag: 263 | for entry in conll_sentence: 264 | entry.vec = concatenate(entry.lstms) 265 | 266 | blstm_forward = self.bbuilders[0].initial_state() 267 | blstm_backward = self.bbuilders[1].initial_state() 268 | 269 | for entry, rentry in zip(conll_sentence, reversed(conll_sentence)): 270 | blstm_forward = blstm_forward.add_input(entry.vec) 271 | blstm_backward = blstm_backward.add_input(rentry.vec) 272 | 273 | entry.lstms[1] = blstm_forward.output() 274 | rentry.lstms[0] = blstm_backward.output() 275 | 276 | scores, exprs = self.__evaluate(conll_sentence, True) 277 | gold = [entry.parent_id for entry in conll_sentence] 278 | heads = decoder.parse_proj(scores, gold if self.costaugFlag else None) 279 | 280 | if self.labelsFlag: 281 | for modifier, head in enumerate(gold[1:]): 282 | rscores, rexprs = self.__evaluateLabel(conll_sentence, head, modifier+1) 283 | goldLabelInd = self.rels[conll_sentence[modifier+1].relation] 284 | wrongLabelInd = max(((l, scr) for l, scr in enumerate(rscores) if l != goldLabelInd), key=itemgetter(1))[0] 285 | if rscores[goldLabelInd] < rscores[wrongLabelInd] + 1: 286 | lerrs.append(rexprs[wrongLabelInd] - rexprs[goldLabelInd]) 287 | 288 | e = sum([1 for h, g in zip(heads[1:], gold[1:]) if h != g]) 289 | eerrors += e 290 | if e > 0: 291 | loss = [(exprs[h][i] - exprs[g][i]) for i, (h,g) in enumerate(zip(heads, gold)) if h != g] # * (1.0/float(e)) 292 | eloss += (e) 293 | mloss += (e) 294 | errs.extend(loss) 295 | 296 | etotal += len(conll_sentence) 297 | 298 | if iSentence % 1 == 0 or len(errs) > 0 or len(lerrs) > 0: 299 | eeloss = 0.0 300 | 301 | if len(errs) > 0 or len(lerrs) > 0: 302 | eerrs = (esum(errs + lerrs)) #* (1.0/(float(len(errs)))) 303 | eerrs.scalar_value() 304 | eerrs.backward() 305 | self.trainer.update() 306 | errs = [] 307 | lerrs = [] 308 | 309 | renew_cg() 310 | 311 | if len(errs) > 0: 312 | eerrs = (esum(errs + lerrs)) #* (1.0/(float(len(errs)))) 313 | eerrs.scalar_value() 314 | eerrs.backward() 315 | self.trainer.update() 316 | 317 | errs = [] 318 | lerrs = [] 319 | eeloss = 0.0 320 | 321 | renew_cg() 322 | 323 | self.trainer.update_epoch() 324 | print "Loss: ", mloss/iSentence 325 | -------------------------------------------------------------------------------- /barchybrid/src/arc_hybrid.py: -------------------------------------------------------------------------------- 1 | from dynet import * 2 | from utils import ParseForest, read_conll, write_conll 3 | from operator import itemgetter 4 | from itertools import chain 5 | import utils, time, random 6 | import numpy as np 7 | 8 | 9 | class ArcHybridLSTM: 10 | def __init__(self, words, pos, rels, w2i, options): 11 | self.model = Model() 12 | self.trainer = AdamTrainer(self.model) 13 | random.seed(1) 14 | 15 | self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))} 16 | self.activation = self.activations[options.activation] 17 | 18 | self.oracle = options.oracle 19 | self.ldims = options.lstm_dims * 2 20 | self.wdims = options.wembedding_dims 21 | self.pdims = options.pembedding_dims 22 | self.rdims = options.rembedding_dims 23 | self.layers = options.lstm_layers 24 | self.wordsCount = words 25 | self.vocab = {word: ind+3 for word, ind in w2i.iteritems()} 26 | self.pos = {word: ind+3 for ind, word in enumerate(pos)} 27 | self.rels = {word: ind for ind, word in enumerate(rels)} 28 | self.irels = rels 29 | 30 | self.headFlag = options.headFlag 31 | self.rlMostFlag = options.rlMostFlag 32 | self.rlFlag = options.rlFlag 33 | self.k = options.window 34 | 35 | self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0) 36 | 37 | self.external_embedding = None 38 | if options.external_embedding is not None: 39 | external_embedding_fp = open(options.external_embedding,'r') 40 | external_embedding_fp.readline() 41 | self.external_embedding = {line.split(' ')[0] : [float(f) for f in line.strip().split(' ')[1:]] for line in external_embedding_fp} 42 | external_embedding_fp.close() 43 | 44 | self.edim = len(self.external_embedding.values()[0]) 45 | self.noextrn = [0.0 for _ in xrange(self.edim)] 46 | self.extrnd = {word: i + 3 for i, word in enumerate(self.external_embedding)} 47 | self.elookup = self.model.add_lookup_parameters((len(self.external_embedding) + 3, self.edim)) 48 | for word, i in self.extrnd.iteritems(): 49 | self.elookup.init_row(i, self.external_embedding[word]) 50 | self.extrnd['*PAD*'] = 1 51 | self.extrnd['*INITIAL*'] = 2 52 | 53 | print 'Load external embedding. Vector dimensions', self.edim 54 | 55 | dims = self.wdims + self.pdims + (self.edim if self.external_embedding is not None else 0) 56 | self.blstmFlag = options.blstmFlag 57 | self.bibiFlag = options.bibiFlag 58 | 59 | if self.bibiFlag: 60 | self.surfaceBuilders = [VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model), 61 | VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model)] 62 | self.bsurfaceBuilders = [VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5, self.model), 63 | VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5, self.model)] 64 | elif self.blstmFlag: 65 | if self.layers > 0: 66 | self.surfaceBuilders = [VanillaLSTMBuilder(self.layers, dims, self.ldims * 0.5, self.model), LSTMBuilder(self.layers, dims, self.ldims * 0.5, self.model)] 67 | else: 68 | self.surfaceBuilders = [SimpleRNNBuilder(1, dims, self.ldims * 0.5, self.model), LSTMBuilder(1, dims, self.ldims * 0.5, self.model)] 69 | 70 | self.hidden_units = options.hidden_units 71 | self.hidden2_units = options.hidden2_units 72 | self.vocab['*PAD*'] = 1 73 | self.pos['*PAD*'] = 1 74 | 75 | self.vocab['*INITIAL*'] = 2 76 | self.pos['*INITIAL*'] = 2 77 | 78 | self.wlookup = self.model.add_lookup_parameters((len(words) + 3, self.wdims)) 79 | self.plookup = self.model.add_lookup_parameters((len(pos) + 3, self.pdims)) 80 | self.rlookup = self.model.add_lookup_parameters((len(rels), self.rdims)) 81 | 82 | self.word2lstm = self.model.add_parameters((self.ldims, self.wdims + self.pdims + (self.edim if self.external_embedding is not None else 0))) 83 | self.word2lstmbias = self.model.add_parameters((self.ldims)) 84 | self.lstm2lstm = self.model.add_parameters((self.ldims, self.ldims * self.nnvecs + self.rdims)) 85 | self.lstm2lstmbias = self.model.add_parameters((self.ldims)) 86 | 87 | self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * self.nnvecs * (self.k + 1))) 88 | self.hidBias = self.model.add_parameters((self.hidden_units)) 89 | 90 | self.hid2Layer = self.model.add_parameters((self.hidden2_units, self.hidden_units)) 91 | self.hid2Bias = self.model.add_parameters((self.hidden2_units)) 92 | 93 | self.outLayer = self.model.add_parameters((3, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) 94 | self.outBias = self.model.add_parameters((3)) 95 | 96 | self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * self.nnvecs * (self.k + 1))) 97 | self.rhidBias = self.model.add_parameters((self.hidden_units)) 98 | 99 | self.rhid2Layer = self.model.add_parameters((self.hidden2_units, self.hidden_units)) 100 | self.rhid2Bias = self.model.add_parameters((self.hidden2_units)) 101 | 102 | self.routLayer = self.model.add_parameters((2 * (len(self.irels) + 0) + 1, self.hidden2_units if self.hidden2_units > 0 else self.hidden_units)) 103 | self.routBias = self.model.add_parameters((2 * (len(self.irels) + 0) + 1)) 104 | 105 | 106 | def __evaluate(self, stack, buf, train): 107 | topStack = [ stack.roots[-i-1].lstms if len(stack) > i else [self.empty] for i in xrange(self.k) ] 108 | topBuffer = [ buf.roots[i].lstms if len(buf) > i else [self.empty] for i in xrange(1) ] 109 | 110 | input = concatenate(list(chain(*(topStack + topBuffer)))) 111 | 112 | if self.hidden2_units > 0: 113 | routput = (self.routLayer.expr() * self.activation(self.rhid2Bias.expr() + self.rhid2Layer.expr() * self.activation(self.rhidLayer.expr() * input + self.rhidBias.expr())) + self.routBias.expr()) 114 | else: 115 | routput = (self.routLayer.expr() * self.activation(self.rhidLayer.expr() * input + self.rhidBias.expr()) + self.routBias.expr()) 116 | 117 | if self.hidden2_units > 0: 118 | output = (self.outLayer.expr() * self.activation(self.hid2Bias.expr() + self.hid2Layer.expr() * self.activation(self.hidLayer.expr() * input + self.hidBias.expr())) + self.outBias.expr()) 119 | else: 120 | output = (self.outLayer.expr() * self.activation(self.hidLayer.expr() * input + self.hidBias.expr()) + self.outBias.expr()) 121 | 122 | scrs, uscrs = routput.value(), output.value() 123 | 124 | #transition conditions 125 | left_arc_conditions = len(stack) > 0 and len(buf) > 0 126 | right_arc_conditions = len(stack) > 1 and stack.roots[-1].id != 0 127 | shift_conditions = len(buf) >0 and buf.roots[0].id != 0 128 | 129 | uscrs0 = uscrs[0] 130 | uscrs1 = uscrs[1] 131 | uscrs2 = uscrs[2] 132 | if train: 133 | output0 = output[0] 134 | output1 = output[1] 135 | output2 = output[2] 136 | ret = [ [ (rel, 0, scrs[1 + j * 2] + uscrs1, routput[1 + j * 2 ] + output1) for j, rel in enumerate(self.irels) ] if left_arc_conditions else [], 137 | [ (rel, 1, scrs[2 + j * 2] + uscrs2, routput[2 + j * 2 ] + output2) for j, rel in enumerate(self.irels) ] if right_arc_conditions else [], 138 | [ (None, 2, scrs[0] + uscrs0, routput[0] + output0) ] if shift_conditions else [] ] 139 | else: 140 | s1,r1 = max(zip(scrs[1::2],self.irels)) 141 | s2,r2 = max(zip(scrs[2::2],self.irels)) 142 | s1 += uscrs1 143 | s2 += uscrs2 144 | ret = [ [ (r1, 0, s1) ] if left_arc_conditions else [], 145 | [ (r2, 1, s2) ] if right_arc_conditions else [], 146 | [ (None, 2, scrs[0] + uscrs0) ] if shift_conditions else [] ] 147 | return ret 148 | #return [ [ (rel, 0, scrs[1 + j * 2 + 0] + uscrs[1], routput[1 + j * 2 + 0] + output[1]) for j, rel in enumerate(self.irels) ] if len(stack) > 0 and len(buf) > 0 else [], 149 | # [ (rel, 1, scrs[1 + j * 2 + 1] + uscrs[2], routput[1 + j * 2 + 1] + output[2]) for j, rel in enumerate(self.irels) ] if len(stack) > 1 else [], 150 | # [ (None, 2, scrs[0] + uscrs[0], routput[0] + output[0]) ] if len(buf) > 0 else [] ] 151 | 152 | 153 | def Save(self, filename): 154 | self.model.save(filename) 155 | 156 | 157 | def Load(self, filename): 158 | self.model.load(filename) 159 | 160 | def Init(self): 161 | evec = self.elookup[1] if self.external_embedding is not None else None 162 | paddingWordVec = self.wlookup[1] 163 | paddingPosVec = self.plookup[1] if self.pdims > 0 else None 164 | 165 | paddingVec = tanh(self.word2lstm.expr() * concatenate(filter(None, [paddingWordVec, paddingPosVec, evec])) + self.word2lstmbias.expr() ) 166 | self.empty = paddingVec if self.nnvecs == 1 else concatenate([paddingVec for _ in xrange(self.nnvecs)]) 167 | 168 | 169 | def getWordEmbeddings(self, sentence, train): 170 | for root in sentence: 171 | c = float(self.wordsCount.get(root.norm, 0)) 172 | dropFlag = not train or (random.random() < (c/(0.25+c))) 173 | root.wordvec = self.wlookup[int(self.vocab.get(root.norm, 0)) if dropFlag else 0] 174 | root.posvec = self.plookup[int(self.pos[root.pos])] if self.pdims > 0 else None 175 | 176 | if self.external_embedding is not None: 177 | #if not dropFlag and random.random() < 0.5: 178 | # root.evec = self.elookup[0] 179 | if root.form in self.external_embedding: 180 | root.evec = self.elookup[self.extrnd[root.form]] 181 | elif root.norm in self.external_embedding: 182 | root.evec = self.elookup[self.extrnd[root.norm]] 183 | else: 184 | root.evec = self.elookup[0] 185 | else: 186 | root.evec = None 187 | root.ivec = concatenate(filter(None, [root.wordvec, root.posvec, root.evec])) 188 | 189 | if self.blstmFlag: 190 | forward = self.surfaceBuilders[0].initial_state() 191 | backward = self.surfaceBuilders[1].initial_state() 192 | 193 | for froot, rroot in zip(sentence, reversed(sentence)): 194 | forward = forward.add_input( froot.ivec ) 195 | backward = backward.add_input( rroot.ivec ) 196 | froot.fvec = forward.output() 197 | rroot.bvec = backward.output() 198 | for root in sentence: 199 | root.vec = concatenate( [root.fvec, root.bvec] ) 200 | 201 | if self.bibiFlag: 202 | bforward = self.bsurfaceBuilders[0].initial_state() 203 | bbackward = self.bsurfaceBuilders[1].initial_state() 204 | 205 | for froot, rroot in zip(sentence, reversed(sentence)): 206 | bforward = bforward.add_input( froot.vec ) 207 | bbackward = bbackward.add_input( rroot.vec ) 208 | froot.bfvec = bforward.output() 209 | rroot.bbvec = bbackward.output() 210 | for root in sentence: 211 | root.vec = concatenate( [root.bfvec, root.bbvec] ) 212 | 213 | else: 214 | for root in sentence: 215 | root.ivec = (self.word2lstm.expr() * root.ivec) + self.word2lstmbias.expr() 216 | root.vec = tanh( root.ivec ) 217 | 218 | 219 | def Predict(self, conll_path): 220 | with open(conll_path, 'r') as conllFP: 221 | for iSentence, sentence in enumerate(read_conll(conllFP, False)): 222 | self.Init() 223 | 224 | conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)] 225 | 226 | conll_sentence = conll_sentence[1:] + [conll_sentence[0]] 227 | self.getWordEmbeddings(conll_sentence, False) 228 | stack = ParseForest([]) 229 | buf = ParseForest(conll_sentence) 230 | 231 | for root in conll_sentence: 232 | root.lstms = [root.vec for _ in xrange(self.nnvecs)] 233 | 234 | hoffset = 1 if self.headFlag else 0 235 | 236 | while not (len(buf) == 1 and len(stack) == 0): 237 | scores = self.__evaluate(stack, buf, False) 238 | best = max(chain(*scores), key = itemgetter(2) ) 239 | 240 | if best[1] == 2: 241 | stack.roots.append(buf.roots[0]) 242 | del buf.roots[0] 243 | 244 | elif best[1] == 0: 245 | child = stack.roots.pop() 246 | parent = buf.roots[0] 247 | 248 | child.pred_parent_id = parent.id 249 | child.pred_relation = best[0] 250 | 251 | bestOp = 0 252 | if self.rlMostFlag: 253 | parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] 254 | if self.rlFlag: 255 | parent.lstms[bestOp + hoffset] = child.vec 256 | 257 | elif best[1] == 1: 258 | child = stack.roots.pop() 259 | parent = stack.roots[-1] 260 | 261 | child.pred_parent_id = parent.id 262 | child.pred_relation = best[0] 263 | 264 | bestOp = 1 265 | if self.rlMostFlag: 266 | parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] 267 | if self.rlFlag: 268 | parent.lstms[bestOp + hoffset] = child.vec 269 | 270 | renew_cg() 271 | yield sentence 272 | 273 | 274 | def Train(self, conll_path): 275 | mloss = 0.0 276 | errors = 0 277 | batch = 0 278 | eloss = 0.0 279 | eerrors = 0 280 | lerrors = 0 281 | etotal = 0 282 | ltotal = 0 283 | ninf = -float('inf') 284 | 285 | hoffset = 1 if self.headFlag else 0 286 | 287 | start = time.time() 288 | 289 | with open(conll_path, 'r') as conllFP: 290 | shuffledData = list(read_conll(conllFP, True)) 291 | random.shuffle(shuffledData) 292 | 293 | errs = [] 294 | eeloss = 0.0 295 | 296 | self.Init() 297 | 298 | for iSentence, sentence in enumerate(shuffledData): 299 | if iSentence % 100 == 0 and iSentence != 0: 300 | print 'Processing sentence number:', iSentence, 'Loss:', eloss / etotal, 'Errors:', (float(eerrors)) / etotal, 'Labeled Errors:', (float(lerrors) / etotal) , 'Time', time.time()-start 301 | start = time.time() 302 | eerrors = 0 303 | eloss = 0.0 304 | etotal = 0 305 | lerrors = 0 306 | ltotal = 0 307 | 308 | conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)] 309 | 310 | conll_sentence = conll_sentence[1:] + [conll_sentence[0]] 311 | self.getWordEmbeddings(conll_sentence, True) 312 | stack = ParseForest([]) 313 | buf = ParseForest(conll_sentence) 314 | 315 | for root in conll_sentence: 316 | root.lstms = [root.vec for _ in xrange(self.nnvecs)] 317 | 318 | hoffset = 1 if self.headFlag else 0 319 | 320 | while not (len(buf) == 1 and len(stack) == 0): 321 | scores = self.__evaluate(stack, buf, True) 322 | scores.append([(None, 3, ninf ,None)]) 323 | 324 | alpha = stack.roots[:-2] if len(stack) > 2 else [] 325 | s1 = [stack.roots[-2]] if len(stack) > 1 else [] 326 | s0 = [stack.roots[-1]] if len(stack) > 0 else [] 327 | b = [buf.roots[0]] if len(buf) > 0 else [] 328 | beta = buf.roots[1:] if len(buf) > 1 else [] 329 | 330 | left_cost = ( len([h for h in s1 + beta if h.id == s0[0].parent_id]) + 331 | len([d for d in b + beta if d.parent_id == s0[0].id]) ) if len(scores[0]) > 0 else 1 332 | right_cost = ( len([h for h in b + beta if h.id == s0[0].parent_id]) + 333 | len([d for d in b + beta if d.parent_id == s0[0].id]) ) if len(scores[1]) > 0 else 1 334 | shift_cost = ( len([h for h in s1 + alpha if h.id == b[0].parent_id]) + 335 | len([d for d in s0 + s1 + alpha if d.parent_id == b[0].id]) ) if len(scores[2]) > 0 else 1 336 | costs = (left_cost, right_cost, shift_cost, 1) 337 | 338 | bestValid = max(( s for s in chain(*scores) if costs[s[1]] == 0 and ( s[1] == 2 or s[0] == stack.roots[-1].relation ) ), key=itemgetter(2)) 339 | bestWrong = max(( s for s in chain(*scores) if costs[s[1]] != 0 or ( s[1] != 2 and s[0] != stack.roots[-1].relation ) ), key=itemgetter(2)) 340 | best = bestValid if ( (not self.oracle) or (bestValid[2] - bestWrong[2] > 1.0) or (bestValid[2] > bestWrong[2] and random.random() > 0.1) ) else bestWrong 341 | 342 | if best[1] == 2: 343 | stack.roots.append(buf.roots[0]) 344 | del buf.roots[0] 345 | 346 | elif best[1] == 0: 347 | child = stack.roots.pop() 348 | parent = buf.roots[0] 349 | 350 | child.pred_parent_id = parent.id 351 | child.pred_relation = best[0] 352 | 353 | bestOp = 0 354 | if self.rlMostFlag: 355 | parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] 356 | if self.rlFlag: 357 | parent.lstms[bestOp + hoffset] = child.vec 358 | 359 | elif best[1] == 1: 360 | child = stack.roots.pop() 361 | parent = stack.roots[-1] 362 | 363 | child.pred_parent_id = parent.id 364 | child.pred_relation = best[0] 365 | 366 | bestOp = 1 367 | if self.rlMostFlag: 368 | parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset] 369 | if self.rlFlag: 370 | parent.lstms[bestOp + hoffset] = child.vec 371 | 372 | if bestValid[2] < bestWrong[2] + 1.0: 373 | loss = bestWrong[3] - bestValid[3] 374 | mloss += 1.0 + bestWrong[2] - bestValid[2] 375 | eloss += 1.0 + bestWrong[2] - bestValid[2] 376 | errs.append(loss) 377 | 378 | if best[1] != 2 and (child.pred_parent_id != child.parent_id or child.pred_relation != child.relation): 379 | lerrors += 1 380 | if child.pred_parent_id != child.parent_id: 381 | errors += 1 382 | eerrors += 1 383 | 384 | etotal += 1 385 | 386 | if len(errs) > 50: # or True: 387 | #eerrs = ((esum(errs)) * (1.0/(float(len(errs))))) 388 | eerrs = esum(errs) 389 | scalar_loss = eerrs.scalar_value() 390 | eerrs.backward() 391 | self.trainer.update() 392 | errs = [] 393 | lerrs = [] 394 | 395 | renew_cg() 396 | self.Init() 397 | 398 | if len(errs) > 0: 399 | eerrs = (esum(errs)) # * (1.0/(float(len(errs)))) 400 | eerrs.scalar_value() 401 | eerrs.backward() 402 | self.trainer.update() 403 | 404 | errs = [] 405 | lerrs = [] 406 | 407 | renew_cg() 408 | 409 | self.trainer.update_epoch() 410 | print "Loss: ", mloss/iSentence 411 | -------------------------------------------------------------------------------- /bmstparser/src/utils/evaluation_script/conll17_ud_eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # CoNLL 2017 UD Parsing evaluation script. 4 | # 5 | # Compatible with Python 2.7 and 3.2+, can be used either as a module 6 | # or a standalone executable. 7 | # 8 | # Copyright 2017 Institute of Formal and Applied Linguistics (UFAL), 9 | # Faculty of Mathematics and Physics, Charles University, Czech Republic. 10 | # 11 | # Changelog: 12 | # - [02 Jan 2017] Version 0.9: Initial release 13 | # - [25 Jan 2017] Version 0.9.1: Fix bug in LCS alignment computation 14 | # - [10 Mar 2017] Version 1.0: Add documentation and test 15 | # Compare HEADs correctly using aligned words 16 | # Allow evaluation with errorneous spaces in forms 17 | # Compare forms in LCS case insensitively 18 | # Detect cycles and multiple root nodes 19 | # Compute AlignedAccuracy 20 | 21 | # Command line usage 22 | # ------------------ 23 | # conll17_ud_eval.py [-v] [-w weights_file] gold_conllu_file system_conllu_file 24 | # 25 | # - if no -v is given, only the CoNLL17 UD Shared Task evaluation LAS metrics 26 | # is printed 27 | # - if -v is given, several metrics are printed (as precision, recall, F1 score, 28 | # and in case the metric is computed on aligned words also accuracy on these): 29 | # - Tokens: how well do the gold tokens match system tokens 30 | # - Sentences: how well do the gold sentences match system sentences 31 | # - Words: how well can the gold words be aligned to system words 32 | # - UPOS: using aligned words, how well does UPOS match 33 | # - XPOS: using aligned words, how well does XPOS match 34 | # - Feats: using aligned words, how well does FEATS match 35 | # - AllTags: using aligned words, how well does UPOS+XPOS+FEATS match 36 | # - Lemmas: using aligned words, how well does LEMMA match 37 | # - UAS: using aligned words, how well does HEAD match 38 | # - LAS: using aligned words, how well does HEAD+DEPREL(ignoring subtypes) match 39 | # - if weights_file is given (with lines containing deprel-weight pairs), 40 | # one more metric is shown: 41 | # - WeightedLAS: as LAS, but each deprel (ignoring subtypes) has different weight 42 | 43 | # API usage 44 | # --------- 45 | # - load_conllu(file) 46 | # - loads CoNLL-U file from given file object to an internal representation 47 | # - the file object should return str on both Python 2 and Python 3 48 | # - raises UDError exception if the given file cannot be loaded 49 | # - evaluate(gold_ud, system_ud) 50 | # - evaluate the given gold and system CoNLL-U files (loaded with load_conllu) 51 | # - raises UDError if the concatenated tokens of gold and system file do not match 52 | # - returns a dictionary with the metrics described above, each metrics having 53 | # three fields: precision, recall and f1 54 | 55 | # Description of token matching 56 | # ----------------------------- 57 | # In order to match tokens of gold file and system file, we consider the text 58 | # resulting from concatenation of gold tokens and text resulting from 59 | # concatenation of system tokens. These texts should match -- if they do not, 60 | # the evaluation fails. 61 | # 62 | # If the texts do match, every token is represented as a range in this original 63 | # text, and tokens are equal only if their range is the same. 64 | 65 | # Description of word matching 66 | # ---------------------------- 67 | # When matching words of gold file and system file, we first match the tokens. 68 | # The words which are also tokens are matched as tokens, but words in multi-word 69 | # tokens have to be handled differently. 70 | # 71 | # To handle multi-word tokens, we start by finding "multi-word spans". 72 | # Multi-word span is a span in the original text such that 73 | # - it contains at least one multi-word token 74 | # - all multi-word tokens in the span (considering both gold and system ones) 75 | # are completely inside the span (i.e., they do not "stick out") 76 | # - the multi-word span is as small as possible 77 | # 78 | # For every multi-word span, we align the gold and system words completely 79 | # inside this span using LCS on their FORMs. The words not intersecting 80 | # (even partially) any multi-word span are then aligned as tokens. 81 | 82 | 83 | from __future__ import division 84 | from __future__ import print_function 85 | 86 | import argparse 87 | import io 88 | import sys 89 | import unittest 90 | 91 | # CoNLL-U column names 92 | ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC = range(10) 93 | 94 | # UD Error is used when raising exceptions in this module 95 | class UDError(Exception): 96 | pass 97 | 98 | # Load given CoNLL-U file into internal representation 99 | def load_conllu(file): 100 | # Internal representation classes 101 | class UDRepresentation: 102 | def __init__(self): 103 | # Characters of all the tokens in the whole file. 104 | # Whitespace between tokens is not included. 105 | self.characters = [] 106 | # List of UDSpan instances with start&end indices into `characters`. 107 | self.tokens = [] 108 | # List of UDWord instances. 109 | self.words = [] 110 | # List of UDSpan instances with start&end indices into `characters`. 111 | self.sentences = [] 112 | class UDSpan: 113 | def __init__(self, start, end): 114 | self.start = start 115 | # Note that self.end marks the first position **after the end** of span, 116 | # so we can use characters[start:end] or range(start, end). 117 | self.end = end 118 | class UDWord: 119 | def __init__(self, span, columns, is_multiword): 120 | # Span of this word (or MWT, see below) within ud_representation.characters. 121 | self.span = span 122 | # 10 columns of the CoNLL-U file: ID, FORM, LEMMA,... 123 | self.columns = columns 124 | # is_multiword==True means that this word is part of a multi-word token. 125 | # In that case, self.span marks the span of the whole multi-word token. 126 | self.is_multiword = is_multiword 127 | # Reference to the UDWord instance representing the HEAD (or None if root). 128 | self.parent = None 129 | # Let's ignore language-specific deprel subtypes. 130 | self.columns[DEPREL] = columns[DEPREL].split(':')[0] 131 | 132 | ud = UDRepresentation() 133 | 134 | # Load the CoNLL-U file 135 | index, sentence_start = 0, None 136 | while True: 137 | line = file.readline() 138 | if not line: 139 | break 140 | line = line.rstrip("\r\n") 141 | 142 | # Handle sentence start boundaries 143 | if sentence_start is None: 144 | # Skip comments 145 | if line.startswith("#"): 146 | continue 147 | # Start a new sentence 148 | ud.sentences.append(UDSpan(index, 0)) 149 | sentence_start = len(ud.words) 150 | if not line: 151 | # Add parent UDWord links and check there are no cycles 152 | def process_word(word): 153 | if word.parent == "remapping": 154 | raise UDError("There is a cycle in a sentence") 155 | if word.parent is None: 156 | head = int(word.columns[HEAD]) 157 | if head > len(ud.words) - sentence_start: 158 | raise UDError("HEAD '{}' points outside of the sentence".format(word.columns[HEAD])) 159 | if head: 160 | parent = ud.words[sentence_start + head - 1] 161 | word.parent = "remapping" 162 | process_word(parent) 163 | word.parent = parent 164 | 165 | for word in ud.words[sentence_start:]: 166 | process_word(word) 167 | 168 | # Check there is a single root node 169 | if len([word for word in ud.words[sentence_start:] if word.parent is None]) != 1: 170 | raise UDError("There are multiple roots in a sentence") 171 | 172 | # End the sentence 173 | ud.sentences[-1].end = index 174 | sentence_start = None 175 | continue 176 | 177 | # Read next token/word 178 | columns = line.split("\t") 179 | if len(columns) != 10: 180 | raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(line)) 181 | 182 | # Skip empty nodes 183 | if "." in columns[ID]: 184 | continue 185 | 186 | # Delete spaces from FORM so gold.characters == system.characters 187 | # even if one of them tokenizes the space. 188 | columns[FORM] = columns[FORM].replace(" ", "") 189 | if not columns[FORM]: 190 | raise UDError("There is an empty FORM in the CoNLL-U file") 191 | 192 | # Save token 193 | ud.characters.extend(columns[FORM]) 194 | ud.tokens.append(UDSpan(index, index + len(columns[FORM]))) 195 | index += len(columns[FORM]) 196 | 197 | # Handle multi-word tokens to save word(s) 198 | if "-" in columns[ID]: 199 | try: 200 | start, end = map(int, columns[ID].split("-")) 201 | except: 202 | raise UDError("Cannot parse multi-word token ID '{}'".format(columns[ID])) 203 | 204 | for _ in range(start, end + 1): 205 | word_line = file.readline().rstrip("\r\n") 206 | word_columns = word_line.split("\t") 207 | if len(word_columns) != 10: 208 | raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(word_line)) 209 | ud.words.append(UDWord(ud.tokens[-1], word_columns, is_multiword=True)) 210 | # Basic tokens/words 211 | else: 212 | try: 213 | word_id = int(columns[ID]) 214 | except: 215 | raise UDError("Cannot parse word ID '{}'".format(columns[ID])) 216 | if word_id != len(ud.words) - sentence_start + 1: 217 | raise UDError("Incorrect word ID '{}' for word '{}', expected '{}'".format(columns[ID], columns[FORM], len(ud.words) - sentence_start + 1)) 218 | 219 | try: 220 | head_id = int(columns[HEAD]) 221 | except: 222 | raise UDError("Cannot parse HEAD '{}'".format(columns[HEAD])) 223 | if head_id < 0: 224 | raise UDError("HEAD cannot be negative") 225 | 226 | ud.words.append(UDWord(ud.tokens[-1], columns, is_multiword=False)) 227 | 228 | if sentence_start is not None: 229 | raise UDError("The CoNLL-U file does not end with empty line") 230 | 231 | return ud 232 | 233 | # Evaluate the gold and system treebanks (loaded using load_conllu). 234 | def evaluate(gold_ud, system_ud, deprel_weights=None): 235 | class Score: 236 | def __init__(self, gold_total, system_total, correct, aligned_total=None): 237 | self.precision = correct / system_total if system_total else 0.0 238 | self.recall = correct / gold_total if gold_total else 0.0 239 | self.f1 = 2 * correct / (system_total + gold_total) if system_total + gold_total else 0.0 240 | self.aligned_accuracy = correct / aligned_total if aligned_total else aligned_total 241 | class AlignmentWord: 242 | def __init__(self, gold_word, system_word): 243 | self.gold_word = gold_word 244 | self.system_word = system_word 245 | self.gold_parent = None 246 | self.system_parent_gold_aligned = None 247 | class Alignment: 248 | def __init__(self, gold_words, system_words): 249 | self.gold_words = gold_words 250 | self.system_words = system_words 251 | self.matched_words = [] 252 | self.matched_words_map = {} 253 | def append_aligned_words(self, gold_word, system_word): 254 | self.matched_words.append(AlignmentWord(gold_word, system_word)) 255 | self.matched_words_map[system_word] = gold_word 256 | def fill_parents(self): 257 | # We represent root parents in both gold and system data by '0'. 258 | # For gold data, we represent non-root parent by corresponding gold word. 259 | # For system data, we represent non-root parent by either gold word aligned 260 | # to parent system nodes, or by None if no gold words is aligned to the parent. 261 | for words in self.matched_words: 262 | words.gold_parent = words.gold_word.parent if words.gold_word.parent is not None else 0 263 | words.system_parent_gold_aligned = self.matched_words_map.get(words.system_word.parent, None) \ 264 | if words.system_word.parent is not None else 0 265 | 266 | def lower(text): 267 | if sys.version_info < (3, 0) and isinstance(text, str): 268 | return text.decode("utf-8").lower() 269 | return text.lower() 270 | 271 | def spans_score(gold_spans, system_spans): 272 | correct, gi, si = 0, 0, 0 273 | while gi < len(gold_spans) and si < len(system_spans): 274 | if system_spans[si].start < gold_spans[gi].start: 275 | si += 1 276 | elif gold_spans[gi].start < system_spans[si].start: 277 | gi += 1 278 | else: 279 | correct += gold_spans[gi].end == system_spans[si].end 280 | si += 1 281 | gi += 1 282 | 283 | return Score(len(gold_spans), len(system_spans), correct) 284 | 285 | def alignment_score(alignment, key_fn, weight_fn=lambda w: 1): 286 | gold, system, aligned, correct = 0, 0, 0, 0 287 | 288 | for word in alignment.gold_words: 289 | gold += weight_fn(word) 290 | 291 | for word in alignment.system_words: 292 | system += weight_fn(word) 293 | 294 | for words in alignment.matched_words: 295 | aligned += weight_fn(words.gold_word) 296 | 297 | if key_fn is None: 298 | # Return score for whole aligned words 299 | return Score(gold, system, aligned) 300 | 301 | for words in alignment.matched_words: 302 | if key_fn(words.gold_word, words.gold_parent) == key_fn(words.system_word, words.system_parent_gold_aligned): 303 | correct += weight_fn(words.gold_word) 304 | 305 | return Score(gold, system, correct, aligned) 306 | 307 | def beyond_end(words, i, multiword_span_end): 308 | if i >= len(words): 309 | return True 310 | if words[i].is_multiword: 311 | return words[i].span.start >= multiword_span_end 312 | return words[i].span.end > multiword_span_end 313 | 314 | def extend_end(word, multiword_span_end): 315 | if word.is_multiword and word.span.end > multiword_span_end: 316 | return word.span.end 317 | return multiword_span_end 318 | 319 | def find_multiword_span(gold_words, system_words, gi, si): 320 | # We know gold_words[gi].is_multiword or system_words[si].is_multiword. 321 | # Find the start of the multiword span (gs, ss), so the multiword span is minimal. 322 | # Initialize multiword_span_end characters index. 323 | if gold_words[gi].is_multiword: 324 | multiword_span_end = gold_words[gi].span.end 325 | if not system_words[si].is_multiword and system_words[si].span.start < gold_words[gi].span.start: 326 | si += 1 327 | else: # if system_words[si].is_multiword 328 | multiword_span_end = system_words[si].span.end 329 | if not gold_words[gi].is_multiword and gold_words[gi].span.start < system_words[si].span.start: 330 | gi += 1 331 | gs, ss = gi, si 332 | 333 | # Find the end of the multiword span 334 | # (so both gi and si are pointing to the word following the multiword span end). 335 | while not beyond_end(gold_words, gi, multiword_span_end) or \ 336 | not beyond_end(system_words, si, multiword_span_end): 337 | if gi < len(gold_words) and (si >= len(system_words) or 338 | gold_words[gi].span.start <= system_words[si].span.start): 339 | multiword_span_end = extend_end(gold_words[gi], multiword_span_end) 340 | gi += 1 341 | else: 342 | multiword_span_end = extend_end(system_words[si], multiword_span_end) 343 | si += 1 344 | return gs, ss, gi, si 345 | 346 | def compute_lcs(gold_words, system_words, gi, si, gs, ss): 347 | lcs = [[0] * (si - ss) for i in range(gi - gs)] 348 | for g in reversed(range(gi - gs)): 349 | for s in reversed(range(si - ss)): 350 | if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]): 351 | lcs[g][s] = 1 + (lcs[g+1][s+1] if g+1 < gi-gs and s+1 < si-ss else 0) 352 | lcs[g][s] = max(lcs[g][s], lcs[g+1][s] if g+1 < gi-gs else 0) 353 | lcs[g][s] = max(lcs[g][s], lcs[g][s+1] if s+1 < si-ss else 0) 354 | return lcs 355 | 356 | def align_words(gold_words, system_words): 357 | alignment = Alignment(gold_words, system_words) 358 | 359 | gi, si = 0, 0 360 | while gi < len(gold_words) and si < len(system_words): 361 | if gold_words[gi].is_multiword or system_words[si].is_multiword: 362 | # A: Multi-word tokens => align via LCS within the whole "multiword span". 363 | gs, ss, gi, si = find_multiword_span(gold_words, system_words, gi, si) 364 | 365 | if si > ss and gi > gs: 366 | lcs = compute_lcs(gold_words, system_words, gi, si, gs, ss) 367 | 368 | # Store aligned words 369 | s, g = 0, 0 370 | while g < gi - gs and s < si - ss: 371 | if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]): 372 | alignment.append_aligned_words(gold_words[gs+g], system_words[ss+s]) 373 | g += 1 374 | s += 1 375 | elif lcs[g][s] == (lcs[g+1][s] if g+1 < gi-gs else 0): 376 | g += 1 377 | else: 378 | s += 1 379 | else: 380 | # B: No multi-word token => align according to spans. 381 | if (gold_words[gi].span.start, gold_words[gi].span.end) == (system_words[si].span.start, system_words[si].span.end): 382 | alignment.append_aligned_words(gold_words[gi], system_words[si]) 383 | gi += 1 384 | si += 1 385 | elif gold_words[gi].span.start <= system_words[si].span.start: 386 | gi += 1 387 | else: 388 | si += 1 389 | 390 | alignment.fill_parents() 391 | 392 | return alignment 393 | 394 | # Check that underlying character sequences do match 395 | if gold_ud.characters != system_ud.characters: 396 | index = 0 397 | while gold_ud.characters[index] == system_ud.characters[index]: 398 | index += 1 399 | 400 | raise UDError( 401 | "The concatenation of tokens in gold file and in system file differ!\n" + 402 | "First 20 differing characters in gold file: '{}' and system file: '{}'".format( 403 | "".join(gold_ud.characters[index:index + 20]), 404 | "".join(system_ud.characters[index:index + 20]) 405 | ) 406 | ) 407 | 408 | # Align words 409 | alignment = align_words(gold_ud.words, system_ud.words) 410 | 411 | # Compute the F1-scores 412 | result = { 413 | "Tokens": spans_score(gold_ud.tokens, system_ud.tokens), 414 | "Sentences": spans_score(gold_ud.sentences, system_ud.sentences), 415 | "Words": alignment_score(alignment, None), 416 | "UPOS": alignment_score(alignment, lambda w, parent: w.columns[UPOS]), 417 | "XPOS": alignment_score(alignment, lambda w, parent: w.columns[XPOS]), 418 | "Feats": alignment_score(alignment, lambda w, parent: w.columns[FEATS]), 419 | "AllTags": alignment_score(alignment, lambda w, parent: (w.columns[UPOS], w.columns[XPOS], w.columns[FEATS])), 420 | "Lemmas": alignment_score(alignment, lambda w, parent: w.columns[LEMMA]), 421 | "UAS": alignment_score(alignment, lambda w, parent: parent), 422 | "LAS": alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL])), 423 | } 424 | 425 | # Add WeightedLAS if weights are given 426 | if deprel_weights is not None: 427 | def weighted_las(word): 428 | return deprel_weights.get(word.columns[DEPREL], 1.0) 429 | result["WeightedLAS"] = alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL]), weighted_las) 430 | 431 | return result 432 | 433 | def load_deprel_weights(weights_file): 434 | if weights_file is None: 435 | return None 436 | 437 | deprel_weights = {} 438 | for line in weights_file: 439 | # Ignore comments and empty lines 440 | if line.startswith("#") or not line.strip(): 441 | continue 442 | 443 | columns = line.rstrip("\r\n").split() 444 | if len(columns) != 2: 445 | raise ValueError("Expected two columns in the UD Relations weights file on line '{}'".format(line)) 446 | 447 | deprel_weights[columns[0]] = float(columns[1]) 448 | 449 | return deprel_weights 450 | 451 | def load_conllu_file(path): 452 | _file = open(path, mode="r", **({"encoding": "utf-8"} if sys.version_info >= (3, 0) else {})) 453 | return load_conllu(_file) 454 | 455 | def evaluate_wrapper(args): 456 | # Load CoNLL-U files 457 | gold_ud = load_conllu_file(args.gold_file) 458 | system_ud = load_conllu_file(args.system_file) 459 | 460 | # Load weights if requested 461 | deprel_weights = load_deprel_weights(args.weights) 462 | 463 | return evaluate(gold_ud, system_ud, deprel_weights) 464 | 465 | def main(): 466 | # Parse arguments 467 | parser = argparse.ArgumentParser() 468 | parser.add_argument("gold_file", type=str, 469 | help="Name of the CoNLL-U file with the gold data.") 470 | parser.add_argument("system_file", type=str, 471 | help="Name of the CoNLL-U file with the predicted data.") 472 | parser.add_argument("--weights", "-w", type=argparse.FileType("r"), default=None, 473 | metavar="deprel_weights_file", 474 | help="Compute WeightedLAS using given weights for Universal Dependency Relations.") 475 | parser.add_argument("--verbose", "-v", default=0, action="count", 476 | help="Print all metrics.") 477 | args = parser.parse_args() 478 | 479 | # Use verbose if weights are supplied 480 | if args.weights is not None and not args.verbose: 481 | args.verbose = 1 482 | 483 | # Evaluate 484 | evaluation = evaluate_wrapper(args) 485 | 486 | # Print the evaluation 487 | if not args.verbose: 488 | print("LAS F1 Score: {:.2f}".format(100 * evaluation["LAS"].f1)) 489 | else: 490 | metrics = ["Tokens", "Sentences", "Words", "UPOS", "XPOS", "Feats", "AllTags", "Lemmas", "UAS", "LAS"] 491 | if args.weights is not None: 492 | metrics.append("WeightedLAS") 493 | 494 | print("Metrics | Precision | Recall | F1 Score | AligndAcc") 495 | print("-----------+-----------+-----------+-----------+-----------") 496 | for metric in metrics: 497 | print("{:11}|{:10.2f} |{:10.2f} |{:10.2f} |{}".format( 498 | metric, 499 | 100 * evaluation[metric].precision, 500 | 100 * evaluation[metric].recall, 501 | 100 * evaluation[metric].f1, 502 | "{:10.2f}".format(100 * evaluation[metric].aligned_accuracy) if evaluation[metric].aligned_accuracy is not None else "" 503 | )) 504 | 505 | if __name__ == "__main__": 506 | main() 507 | 508 | # Tests, which can be executed with `python -m unittest conll17_ud_eval`. 509 | class TestAlignment(unittest.TestCase): 510 | @staticmethod 511 | def _load_words(words): 512 | """Prepare fake CoNLL-U files with fake HEAD to prevent multiple roots errors.""" 513 | lines, num_words = [], 0 514 | for w in words: 515 | parts = w.split(" ") 516 | if len(parts) == 1: 517 | num_words += 1 518 | lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, parts[0], int(num_words>1))) 519 | else: 520 | lines.append("{}-{}\t{}\t_\t_\t_\t_\t_\t_\t_\t_".format(num_words + 1, num_words + len(parts) - 1, parts[0])) 521 | for part in parts[1:]: 522 | num_words += 1 523 | lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, part, int(num_words>1))) 524 | return load_conllu((io.StringIO if sys.version_info >= (3, 0) else io.BytesIO)("\n".join(lines+["\n"]))) 525 | 526 | def _test_exception(self, gold, system): 527 | self.assertRaises(UDError, evaluate, self._load_words(gold), self._load_words(system)) 528 | 529 | def _test_ok(self, gold, system, correct): 530 | metrics = evaluate(self._load_words(gold), self._load_words(system)) 531 | gold_words = sum((max(1, len(word.split(" ")) - 1) for word in gold)) 532 | system_words = sum((max(1, len(word.split(" ")) - 1) for word in system)) 533 | self.assertEqual((metrics["Words"].precision, metrics["Words"].recall, metrics["Words"].f1), 534 | (correct / system_words, correct / gold_words, 2 * correct / (gold_words + system_words))) 535 | 536 | def test_exception(self): 537 | self._test_exception(["a"], ["b"]) 538 | 539 | def test_equal(self): 540 | self._test_ok(["a"], ["a"], 1) 541 | self._test_ok(["a", "b", "c"], ["a", "b", "c"], 3) 542 | 543 | def test_equal_with_multiword(self): 544 | self._test_ok(["abc a b c"], ["a", "b", "c"], 3) 545 | self._test_ok(["a", "bc b c", "d"], ["a", "b", "c", "d"], 4) 546 | self._test_ok(["abcd a b c d"], ["ab a b", "cd c d"], 4) 547 | self._test_ok(["abc a b c", "de d e"], ["a", "bcd b c d", "e"], 5) 548 | 549 | def test_alignment(self): 550 | self._test_ok(["abcd"], ["a", "b", "c", "d"], 0) 551 | self._test_ok(["abc", "d"], ["a", "b", "c", "d"], 1) 552 | self._test_ok(["a", "bc", "d"], ["a", "b", "c", "d"], 2) 553 | self._test_ok(["a", "bc b c", "d"], ["a", "b", "cd"], 2) 554 | self._test_ok(["abc a BX c", "def d EX f"], ["ab a b", "cd c d", "ef e f"], 4) 555 | self._test_ok(["ab a b", "cd bc d"], ["a", "bc", "d"], 2) 556 | self._test_ok(["a", "bc b c", "d"], ["ab AX BX", "cd CX a"], 1) 557 | -------------------------------------------------------------------------------- /barchybrid/src/utils/evaluation_script/conll17_ud_eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # CoNLL 2017 UD Parsing evaluation script. 4 | # 5 | # Compatible with Python 2.7 and 3.2+, can be used either as a module 6 | # or a standalone executable. 7 | # 8 | # Copyright 2017 Institute of Formal and Applied Linguistics (UFAL), 9 | # Faculty of Mathematics and Physics, Charles University, Czech Republic. 10 | # 11 | # Changelog: 12 | # - [02 Jan 2017] Version 0.9: Initial release 13 | # - [25 Jan 2017] Version 0.9.1: Fix bug in LCS alignment computation 14 | # - [10 Mar 2017] Version 1.0: Add documentation and test 15 | # Compare HEADs correctly using aligned words 16 | # Allow evaluation with errorneous spaces in forms 17 | # Compare forms in LCS case insensitively 18 | # Detect cycles and multiple root nodes 19 | # Compute AlignedAccuracy 20 | 21 | # Command line usage 22 | # ------------------ 23 | # conll17_ud_eval.py [-v] [-w weights_file] gold_conllu_file system_conllu_file 24 | # 25 | # - if no -v is given, only the CoNLL17 UD Shared Task evaluation LAS metrics 26 | # is printed 27 | # - if -v is given, several metrics are printed (as precision, recall, F1 score, 28 | # and in case the metric is computed on aligned words also accuracy on these): 29 | # - Tokens: how well do the gold tokens match system tokens 30 | # - Sentences: how well do the gold sentences match system sentences 31 | # - Words: how well can the gold words be aligned to system words 32 | # - UPOS: using aligned words, how well does UPOS match 33 | # - XPOS: using aligned words, how well does XPOS match 34 | # - Feats: using aligned words, how well does FEATS match 35 | # - AllTags: using aligned words, how well does UPOS+XPOS+FEATS match 36 | # - Lemmas: using aligned words, how well does LEMMA match 37 | # - UAS: using aligned words, how well does HEAD match 38 | # - LAS: using aligned words, how well does HEAD+DEPREL(ignoring subtypes) match 39 | # - if weights_file is given (with lines containing deprel-weight pairs), 40 | # one more metric is shown: 41 | # - WeightedLAS: as LAS, but each deprel (ignoring subtypes) has different weight 42 | 43 | # API usage 44 | # --------- 45 | # - load_conllu(file) 46 | # - loads CoNLL-U file from given file object to an internal representation 47 | # - the file object should return str on both Python 2 and Python 3 48 | # - raises UDError exception if the given file cannot be loaded 49 | # - evaluate(gold_ud, system_ud) 50 | # - evaluate the given gold and system CoNLL-U files (loaded with load_conllu) 51 | # - raises UDError if the concatenated tokens of gold and system file do not match 52 | # - returns a dictionary with the metrics described above, each metrics having 53 | # three fields: precision, recall and f1 54 | 55 | # Description of token matching 56 | # ----------------------------- 57 | # In order to match tokens of gold file and system file, we consider the text 58 | # resulting from concatenation of gold tokens and text resulting from 59 | # concatenation of system tokens. These texts should match -- if they do not, 60 | # the evaluation fails. 61 | # 62 | # If the texts do match, every token is represented as a range in this original 63 | # text, and tokens are equal only if their range is the same. 64 | 65 | # Description of word matching 66 | # ---------------------------- 67 | # When matching words of gold file and system file, we first match the tokens. 68 | # The words which are also tokens are matched as tokens, but words in multi-word 69 | # tokens have to be handled differently. 70 | # 71 | # To handle multi-word tokens, we start by finding "multi-word spans". 72 | # Multi-word span is a span in the original text such that 73 | # - it contains at least one multi-word token 74 | # - all multi-word tokens in the span (considering both gold and system ones) 75 | # are completely inside the span (i.e., they do not "stick out") 76 | # - the multi-word span is as small as possible 77 | # 78 | # For every multi-word span, we align the gold and system words completely 79 | # inside this span using LCS on their FORMs. The words not intersecting 80 | # (even partially) any multi-word span are then aligned as tokens. 81 | 82 | 83 | from __future__ import division 84 | from __future__ import print_function 85 | 86 | import argparse 87 | import io 88 | import sys 89 | import unittest 90 | 91 | # CoNLL-U column names 92 | ID, FORM, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC = range(10) 93 | 94 | # UD Error is used when raising exceptions in this module 95 | class UDError(Exception): 96 | pass 97 | 98 | # Load given CoNLL-U file into internal representation 99 | def load_conllu(file): 100 | # Internal representation classes 101 | class UDRepresentation: 102 | def __init__(self): 103 | # Characters of all the tokens in the whole file. 104 | # Whitespace between tokens is not included. 105 | self.characters = [] 106 | # List of UDSpan instances with start&end indices into `characters`. 107 | self.tokens = [] 108 | # List of UDWord instances. 109 | self.words = [] 110 | # List of UDSpan instances with start&end indices into `characters`. 111 | self.sentences = [] 112 | class UDSpan: 113 | def __init__(self, start, end): 114 | self.start = start 115 | # Note that self.end marks the first position **after the end** of span, 116 | # so we can use characters[start:end] or range(start, end). 117 | self.end = end 118 | class UDWord: 119 | def __init__(self, span, columns, is_multiword): 120 | # Span of this word (or MWT, see below) within ud_representation.characters. 121 | self.span = span 122 | # 10 columns of the CoNLL-U file: ID, FORM, LEMMA,... 123 | self.columns = columns 124 | # is_multiword==True means that this word is part of a multi-word token. 125 | # In that case, self.span marks the span of the whole multi-word token. 126 | self.is_multiword = is_multiword 127 | # Reference to the UDWord instance representing the HEAD (or None if root). 128 | self.parent = None 129 | # Let's ignore language-specific deprel subtypes. 130 | self.columns[DEPREL] = columns[DEPREL].split(':')[0] 131 | 132 | ud = UDRepresentation() 133 | 134 | # Load the CoNLL-U file 135 | index, sentence_start = 0, None 136 | while True: 137 | line = file.readline() 138 | if not line: 139 | break 140 | line = line.rstrip("\r\n") 141 | 142 | # Handle sentence start boundaries 143 | if sentence_start is None: 144 | # Skip comments 145 | if line.startswith("#"): 146 | continue 147 | # Start a new sentence 148 | ud.sentences.append(UDSpan(index, 0)) 149 | sentence_start = len(ud.words) 150 | if not line: 151 | # Add parent UDWord links and check there are no cycles 152 | def process_word(word): 153 | if word.parent == "remapping": 154 | raise UDError("There is a cycle in a sentence") 155 | if word.parent is None: 156 | head = int(word.columns[HEAD]) 157 | if head > len(ud.words) - sentence_start: 158 | raise UDError("HEAD '{}' points outside of the sentence".format(word.columns[HEAD])) 159 | if head: 160 | parent = ud.words[sentence_start + head - 1] 161 | word.parent = "remapping" 162 | process_word(parent) 163 | word.parent = parent 164 | 165 | for word in ud.words[sentence_start:]: 166 | process_word(word) 167 | 168 | # Check there is a single root node 169 | #if len([word for word in ud.words[sentence_start:] if word.parent is None]) != 1: 170 | # raise UDError("There are multiple roots in a sentence") 171 | 172 | # End the sentence 173 | ud.sentences[-1].end = index 174 | sentence_start = None 175 | continue 176 | 177 | # Read next token/word 178 | columns = line.split("\t") 179 | if len(columns) != 10: 180 | raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(line)) 181 | 182 | # Skip empty nodes 183 | if "." in columns[ID]: 184 | continue 185 | 186 | # Delete spaces from FORM so gold.characters == system.characters 187 | # even if one of them tokenizes the space. 188 | columns[FORM] = columns[FORM].replace(" ", "") 189 | if not columns[FORM]: 190 | raise UDError("There is an empty FORM in the CoNLL-U file") 191 | 192 | # Save token 193 | ud.characters.extend(columns[FORM]) 194 | ud.tokens.append(UDSpan(index, index + len(columns[FORM]))) 195 | index += len(columns[FORM]) 196 | 197 | # Handle multi-word tokens to save word(s) 198 | if "-" in columns[ID]: 199 | try: 200 | start, end = map(int, columns[ID].split("-")) 201 | except: 202 | raise UDError("Cannot parse multi-word token ID '{}'".format(columns[ID])) 203 | 204 | for _ in range(start, end + 1): 205 | word_line = file.readline().rstrip("\r\n") 206 | word_columns = word_line.split("\t") 207 | if len(word_columns) != 10: 208 | raise UDError("The CoNLL-U line does not contain 10 tab-separated columns: '{}'".format(word_line)) 209 | ud.words.append(UDWord(ud.tokens[-1], word_columns, is_multiword=True)) 210 | # Basic tokens/words 211 | else: 212 | try: 213 | word_id = int(columns[ID]) 214 | except: 215 | raise UDError("Cannot parse word ID '{}'".format(columns[ID])) 216 | if word_id != len(ud.words) - sentence_start + 1: 217 | raise UDError("Incorrect word ID '{}' for word '{}', expected '{}'".format(columns[ID], columns[FORM], len(ud.words) - sentence_start + 1)) 218 | 219 | try: 220 | head_id = int(columns[HEAD]) 221 | except: 222 | raise UDError("Cannot parse HEAD '{}'".format(columns[HEAD])) 223 | if head_id < 0: 224 | raise UDError("HEAD cannot be negative") 225 | 226 | ud.words.append(UDWord(ud.tokens[-1], columns, is_multiword=False)) 227 | 228 | if sentence_start is not None: 229 | raise UDError("The CoNLL-U file does not end with empty line") 230 | 231 | return ud 232 | 233 | # Evaluate the gold and system treebanks (loaded using load_conllu). 234 | def evaluate(gold_ud, system_ud, deprel_weights=None): 235 | class Score: 236 | def __init__(self, gold_total, system_total, correct, aligned_total=None): 237 | self.precision = correct / system_total if system_total else 0.0 238 | self.recall = correct / gold_total if gold_total else 0.0 239 | self.f1 = 2 * correct / (system_total + gold_total) if system_total + gold_total else 0.0 240 | self.aligned_accuracy = correct / aligned_total if aligned_total else aligned_total 241 | class AlignmentWord: 242 | def __init__(self, gold_word, system_word): 243 | self.gold_word = gold_word 244 | self.system_word = system_word 245 | self.gold_parent = None 246 | self.system_parent_gold_aligned = None 247 | class Alignment: 248 | def __init__(self, gold_words, system_words): 249 | self.gold_words = gold_words 250 | self.system_words = system_words 251 | self.matched_words = [] 252 | self.matched_words_map = {} 253 | def append_aligned_words(self, gold_word, system_word): 254 | self.matched_words.append(AlignmentWord(gold_word, system_word)) 255 | self.matched_words_map[system_word] = gold_word 256 | def fill_parents(self): 257 | # We represent root parents in both gold and system data by '0'. 258 | # For gold data, we represent non-root parent by corresponding gold word. 259 | # For system data, we represent non-root parent by either gold word aligned 260 | # to parent system nodes, or by None if no gold words is aligned to the parent. 261 | for words in self.matched_words: 262 | words.gold_parent = words.gold_word.parent if words.gold_word.parent is not None else 0 263 | words.system_parent_gold_aligned = self.matched_words_map.get(words.system_word.parent, None) \ 264 | if words.system_word.parent is not None else 0 265 | 266 | def lower(text): 267 | if sys.version_info < (3, 0) and isinstance(text, str): 268 | return text.decode("utf-8").lower() 269 | return text.lower() 270 | 271 | def spans_score(gold_spans, system_spans): 272 | correct, gi, si = 0, 0, 0 273 | while gi < len(gold_spans) and si < len(system_spans): 274 | if system_spans[si].start < gold_spans[gi].start: 275 | si += 1 276 | elif gold_spans[gi].start < system_spans[si].start: 277 | gi += 1 278 | else: 279 | correct += gold_spans[gi].end == system_spans[si].end 280 | si += 1 281 | gi += 1 282 | 283 | return Score(len(gold_spans), len(system_spans), correct) 284 | 285 | def alignment_score(alignment, key_fn, weight_fn=lambda w: 1): 286 | gold, system, aligned, correct = 0, 0, 0, 0 287 | 288 | for word in alignment.gold_words: 289 | gold += weight_fn(word) 290 | 291 | for word in alignment.system_words: 292 | system += weight_fn(word) 293 | 294 | for words in alignment.matched_words: 295 | aligned += weight_fn(words.gold_word) 296 | 297 | if key_fn is None: 298 | # Return score for whole aligned words 299 | return Score(gold, system, aligned) 300 | 301 | for words in alignment.matched_words: 302 | if key_fn(words.gold_word, words.gold_parent) == key_fn(words.system_word, words.system_parent_gold_aligned): 303 | correct += weight_fn(words.gold_word) 304 | 305 | return Score(gold, system, correct, aligned) 306 | 307 | def beyond_end(words, i, multiword_span_end): 308 | if i >= len(words): 309 | return True 310 | if words[i].is_multiword: 311 | return words[i].span.start >= multiword_span_end 312 | return words[i].span.end > multiword_span_end 313 | 314 | def extend_end(word, multiword_span_end): 315 | if word.is_multiword and word.span.end > multiword_span_end: 316 | return word.span.end 317 | return multiword_span_end 318 | 319 | def find_multiword_span(gold_words, system_words, gi, si): 320 | # We know gold_words[gi].is_multiword or system_words[si].is_multiword. 321 | # Find the start of the multiword span (gs, ss), so the multiword span is minimal. 322 | # Initialize multiword_span_end characters index. 323 | if gold_words[gi].is_multiword: 324 | multiword_span_end = gold_words[gi].span.end 325 | if not system_words[si].is_multiword and system_words[si].span.start < gold_words[gi].span.start: 326 | si += 1 327 | else: # if system_words[si].is_multiword 328 | multiword_span_end = system_words[si].span.end 329 | if not gold_words[gi].is_multiword and gold_words[gi].span.start < system_words[si].span.start: 330 | gi += 1 331 | gs, ss = gi, si 332 | 333 | # Find the end of the multiword span 334 | # (so both gi and si are pointing to the word following the multiword span end). 335 | while not beyond_end(gold_words, gi, multiword_span_end) or \ 336 | not beyond_end(system_words, si, multiword_span_end): 337 | if gi < len(gold_words) and (si >= len(system_words) or 338 | gold_words[gi].span.start <= system_words[si].span.start): 339 | multiword_span_end = extend_end(gold_words[gi], multiword_span_end) 340 | gi += 1 341 | else: 342 | multiword_span_end = extend_end(system_words[si], multiword_span_end) 343 | si += 1 344 | return gs, ss, gi, si 345 | 346 | def compute_lcs(gold_words, system_words, gi, si, gs, ss): 347 | lcs = [[0] * (si - ss) for i in range(gi - gs)] 348 | for g in reversed(range(gi - gs)): 349 | for s in reversed(range(si - ss)): 350 | if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]): 351 | lcs[g][s] = 1 + (lcs[g+1][s+1] if g+1 < gi-gs and s+1 < si-ss else 0) 352 | lcs[g][s] = max(lcs[g][s], lcs[g+1][s] if g+1 < gi-gs else 0) 353 | lcs[g][s] = max(lcs[g][s], lcs[g][s+1] if s+1 < si-ss else 0) 354 | return lcs 355 | 356 | def align_words(gold_words, system_words): 357 | alignment = Alignment(gold_words, system_words) 358 | 359 | gi, si = 0, 0 360 | while gi < len(gold_words) and si < len(system_words): 361 | if gold_words[gi].is_multiword or system_words[si].is_multiword: 362 | # A: Multi-word tokens => align via LCS within the whole "multiword span". 363 | gs, ss, gi, si = find_multiword_span(gold_words, system_words, gi, si) 364 | 365 | if si > ss and gi > gs: 366 | lcs = compute_lcs(gold_words, system_words, gi, si, gs, ss) 367 | 368 | # Store aligned words 369 | s, g = 0, 0 370 | while g < gi - gs and s < si - ss: 371 | if lower(gold_words[gs + g].columns[FORM]) == lower(system_words[ss + s].columns[FORM]): 372 | alignment.append_aligned_words(gold_words[gs+g], system_words[ss+s]) 373 | g += 1 374 | s += 1 375 | elif lcs[g][s] == (lcs[g+1][s] if g+1 < gi-gs else 0): 376 | g += 1 377 | else: 378 | s += 1 379 | else: 380 | # B: No multi-word token => align according to spans. 381 | if (gold_words[gi].span.start, gold_words[gi].span.end) == (system_words[si].span.start, system_words[si].span.end): 382 | alignment.append_aligned_words(gold_words[gi], system_words[si]) 383 | gi += 1 384 | si += 1 385 | elif gold_words[gi].span.start <= system_words[si].span.start: 386 | gi += 1 387 | else: 388 | si += 1 389 | 390 | alignment.fill_parents() 391 | 392 | return alignment 393 | 394 | # Check that underlying character sequences do match 395 | if gold_ud.characters != system_ud.characters: 396 | index = 0 397 | while gold_ud.characters[index] == system_ud.characters[index]: 398 | index += 1 399 | 400 | raise UDError( 401 | "The concatenation of tokens in gold file and in system file differ!\n" + 402 | "First 20 differing characters in gold file: '{}' and system file: '{}'".format( 403 | "".join(gold_ud.characters[index:index + 20]), 404 | "".join(system_ud.characters[index:index + 20]) 405 | ) 406 | ) 407 | 408 | # Align words 409 | alignment = align_words(gold_ud.words, system_ud.words) 410 | 411 | # Compute the F1-scores 412 | result = { 413 | "Tokens": spans_score(gold_ud.tokens, system_ud.tokens), 414 | "Sentences": spans_score(gold_ud.sentences, system_ud.sentences), 415 | "Words": alignment_score(alignment, None), 416 | "UPOS": alignment_score(alignment, lambda w, parent: w.columns[UPOS]), 417 | "XPOS": alignment_score(alignment, lambda w, parent: w.columns[XPOS]), 418 | "Feats": alignment_score(alignment, lambda w, parent: w.columns[FEATS]), 419 | "AllTags": alignment_score(alignment, lambda w, parent: (w.columns[UPOS], w.columns[XPOS], w.columns[FEATS])), 420 | "Lemmas": alignment_score(alignment, lambda w, parent: w.columns[LEMMA]), 421 | "UAS": alignment_score(alignment, lambda w, parent: parent), 422 | "LAS": alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL])), 423 | } 424 | 425 | # Add WeightedLAS if weights are given 426 | if deprel_weights is not None: 427 | def weighted_las(word): 428 | return deprel_weights.get(word.columns[DEPREL], 1.0) 429 | result["WeightedLAS"] = alignment_score(alignment, lambda w, parent: (parent, w.columns[DEPREL]), weighted_las) 430 | 431 | return result 432 | 433 | def load_deprel_weights(weights_file): 434 | if weights_file is None: 435 | return None 436 | 437 | deprel_weights = {} 438 | for line in weights_file: 439 | # Ignore comments and empty lines 440 | if line.startswith("#") or not line.strip(): 441 | continue 442 | 443 | columns = line.rstrip("\r\n").split() 444 | if len(columns) != 2: 445 | raise ValueError("Expected two columns in the UD Relations weights file on line '{}'".format(line)) 446 | 447 | deprel_weights[columns[0]] = float(columns[1]) 448 | 449 | return deprel_weights 450 | 451 | def load_conllu_file(path): 452 | _file = open(path, mode="r", **({"encoding": "utf-8"} if sys.version_info >= (3, 0) else {})) 453 | return load_conllu(_file) 454 | 455 | def evaluate_wrapper(args): 456 | # Load CoNLL-U files 457 | gold_ud = load_conllu_file(args.gold_file) 458 | system_ud = load_conllu_file(args.system_file) 459 | 460 | # Load weights if requested 461 | deprel_weights = load_deprel_weights(args.weights) 462 | 463 | return evaluate(gold_ud, system_ud, deprel_weights) 464 | 465 | def main(): 466 | # Parse arguments 467 | parser = argparse.ArgumentParser() 468 | parser.add_argument("gold_file", type=str, 469 | help="Name of the CoNLL-U file with the gold data.") 470 | parser.add_argument("system_file", type=str, 471 | help="Name of the CoNLL-U file with the predicted data.") 472 | parser.add_argument("--weights", "-w", type=argparse.FileType("r"), default=None, 473 | metavar="deprel_weights_file", 474 | help="Compute WeightedLAS using given weights for Universal Dependency Relations.") 475 | parser.add_argument("--verbose", "-v", default=0, action="count", 476 | help="Print all metrics.") 477 | args = parser.parse_args() 478 | 479 | # Use verbose if weights are supplied 480 | if args.weights is not None and not args.verbose: 481 | args.verbose = 1 482 | 483 | # Evaluate 484 | evaluation = evaluate_wrapper(args) 485 | 486 | # Print the evaluation 487 | if not args.verbose: 488 | print("LAS F1 Score: {:.2f}".format(100 * evaluation["LAS"].f1)) 489 | else: 490 | metrics = ["Tokens", "Sentences", "Words", "UPOS", "XPOS", "Feats", "AllTags", "Lemmas", "UAS", "LAS"] 491 | if args.weights is not None: 492 | metrics.append("WeightedLAS") 493 | 494 | print("Metrics | Precision | Recall | F1 Score | AligndAcc") 495 | print("-----------+-----------+-----------+-----------+-----------") 496 | for metric in metrics: 497 | print("{:11}|{:10.2f} |{:10.2f} |{:10.2f} |{}".format( 498 | metric, 499 | 100 * evaluation[metric].precision, 500 | 100 * evaluation[metric].recall, 501 | 100 * evaluation[metric].f1, 502 | "{:10.2f}".format(100 * evaluation[metric].aligned_accuracy) if evaluation[metric].aligned_accuracy is not None else "" 503 | )) 504 | 505 | if __name__ == "__main__": 506 | main() 507 | 508 | # Tests, which can be executed with `python -m unittest conll17_ud_eval`. 509 | class TestAlignment(unittest.TestCase): 510 | @staticmethod 511 | def _load_words(words): 512 | """Prepare fake CoNLL-U files with fake HEAD to prevent multiple roots errors.""" 513 | lines, num_words = [], 0 514 | for w in words: 515 | parts = w.split(" ") 516 | if len(parts) == 1: 517 | num_words += 1 518 | lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, parts[0], int(num_words>1))) 519 | else: 520 | lines.append("{}-{}\t{}\t_\t_\t_\t_\t_\t_\t_\t_".format(num_words + 1, num_words + len(parts) - 1, parts[0])) 521 | for part in parts[1:]: 522 | num_words += 1 523 | lines.append("{}\t{}\t_\t_\t_\t_\t{}\t_\t_\t_".format(num_words, part, int(num_words>1))) 524 | return load_conllu((io.StringIO if sys.version_info >= (3, 0) else io.BytesIO)("\n".join(lines+["\n"]))) 525 | 526 | def _test_exception(self, gold, system): 527 | self.assertRaises(UDError, evaluate, self._load_words(gold), self._load_words(system)) 528 | 529 | def _test_ok(self, gold, system, correct): 530 | metrics = evaluate(self._load_words(gold), self._load_words(system)) 531 | gold_words = sum((max(1, len(word.split(" ")) - 1) for word in gold)) 532 | system_words = sum((max(1, len(word.split(" ")) - 1) for word in system)) 533 | self.assertEqual((metrics["Words"].precision, metrics["Words"].recall, metrics["Words"].f1), 534 | (correct / system_words, correct / gold_words, 2 * correct / (gold_words + system_words))) 535 | 536 | def test_exception(self): 537 | self._test_exception(["a"], ["b"]) 538 | 539 | def test_equal(self): 540 | self._test_ok(["a"], ["a"], 1) 541 | self._test_ok(["a", "b", "c"], ["a", "b", "c"], 3) 542 | 543 | def test_equal_with_multiword(self): 544 | self._test_ok(["abc a b c"], ["a", "b", "c"], 3) 545 | self._test_ok(["a", "bc b c", "d"], ["a", "b", "c", "d"], 4) 546 | self._test_ok(["abcd a b c d"], ["ab a b", "cd c d"], 4) 547 | self._test_ok(["abc a b c", "de d e"], ["a", "bcd b c d", "e"], 5) 548 | 549 | def test_alignment(self): 550 | self._test_ok(["abcd"], ["a", "b", "c", "d"], 0) 551 | self._test_ok(["abc", "d"], ["a", "b", "c", "d"], 1) 552 | self._test_ok(["a", "bc", "d"], ["a", "b", "c", "d"], 2) 553 | self._test_ok(["a", "bc b c", "d"], ["a", "b", "cd"], 2) 554 | self._test_ok(["abc a BX c", "def d EX f"], ["ab a b", "cd c d", "ef e f"], 4) 555 | self._test_ok(["ab a b", "cd bc d"], ["a", "bc", "d"], 2) 556 | self._test_ok(["a", "bc b c", "d"], ["ab AX BX", "cd CX a"], 1) 557 | -------------------------------------------------------------------------------- /barchybrid/src/utils/eval.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Author: Yuval Krymolowski 4 | # Addition of precision and recall 5 | # and of frame confusion list: Sabine Buchholz 6 | # Addition of DEPREL + ATTACHMENT: 7 | # Prokopis Prokopidis (prokopis at ilsp dot gr) 8 | # Acknowledgements: 9 | # to Markus Kuhn for suggesting the use of 10 | # the Unicode category property 11 | 12 | if ($] < 5.008001) 13 | { 14 | printf STDERR < -s 39 | 40 | This script evaluates a system output with respect to a gold standard. 41 | Both files should be in UTF-8 encoded CoNLL-X tabular format. 42 | 43 | Punctuation tokens (those where all characters have the Unicode 44 | category property "Punctuation") are ignored for scoring (unless the 45 | -p flag is used). 46 | 47 | The output breaks down the errors according to their type and context. 48 | 49 | Optional parameters: 50 | -o FILE : output: print output to FILE (default is standard output) 51 | -q : quiet: only print overall performance, without the details 52 | -b : evalb: produce output in a format similar to evalb 53 | (http://nlp.cs.nyu.edu/evalb/); use together with -q 54 | -p : punctuation: also score on punctuation (default is not to score on it) 55 | -v : version: show the version number 56 | -h : help: print this help text and exit 57 | 58 | EOT 59 | ; 60 | 61 | my ($line_num) ; 62 | my ($sep) = '0x01' ; 63 | 64 | my ($START) = '.S' ; 65 | my ($END) = '.E' ; 66 | 67 | my ($con_err_num) = 3 ; 68 | my ($freq_err_num) = 10 ; 69 | my ($spec_err_loc_con) = 8 ; 70 | 71 | ################################################################################ 72 | ### subfunctions ### 73 | ################################################################################ 74 | 75 | # Whether a string consists entirely of characters with the Unicode 76 | # category property "Punctuation" (see "man perlunicode") 77 | sub is_uni_punct 78 | { 79 | my ($word) = @_ ; 80 | 81 | return scalar(Encode::decode_utf8($word)=~ /^\p{Punctuation}+$/) ; 82 | } 83 | 84 | # The length of a unicode string, excluding non-spacing marks 85 | # (for example vowel marks in Arabic) 86 | 87 | sub uni_len 88 | { 89 | my ($word) = @_ ; 90 | my ($ch, $l) ; 91 | 92 | $l = 0 ; 93 | foreach $ch (split(//, Encode::decode_utf8($word))) 94 | { 95 | if ($ch !~ /^\p{NonspacingMark}/) 96 | { 97 | $l++ ; 98 | } 99 | } 100 | 101 | return $l ; 102 | } 103 | 104 | sub filter_context_counts 105 | { # filter_context_counts 106 | 107 | my ($vec, $num, $max_len) = @_ ; 108 | my ($con, $l, $thresh) ; 109 | 110 | $thresh = (sort {$b <=> $a} values %{$vec})[$num-1] ; 111 | 112 | foreach $con (keys %{$vec}) 113 | { 114 | if (${$vec}{$con} < $thresh) 115 | { 116 | delete ${$vec}{$con} ; 117 | next ; 118 | } 119 | 120 | $l = uni_len($con) ; 121 | 122 | if ($l > ${$max_len}) 123 | { 124 | ${$max_len} = $l ; 125 | } 126 | } 127 | 128 | } # filter_context_counts 129 | 130 | sub print_context 131 | { # print_context 132 | 133 | my ($counts, $counts_pos, $max_con_len, $max_con_pos_len) = @_ ; 134 | my (@v_con, @v_con_pos, $con, $con_pos, $i, $n) ; 135 | 136 | printf OUT " %-*s | %-4s | %-4s | %-4s | %-4s", $max_con_pos_len, 'CPOS', 'any', 'head', 'dep', 'both' ; 137 | printf OUT " ||" ; 138 | printf OUT " %-*s | %-4s | %-4s | %-4s | %-4s", $max_con_len, 'word', 'any', 'head', 'dep', 'both' ; 139 | printf OUT "\n" ; 140 | printf OUT " %s-+------+------+------+-----", '-' x $max_con_pos_len; 141 | printf OUT "--++" ; 142 | printf OUT "--%s-+------+------+------+-----", '-' x $max_con_len; 143 | printf OUT "\n" ; 144 | 145 | @v_con = sort {${$counts}{tot}{$b} <=> ${$counts}{tot}{$a}} keys %{${$counts}{tot}} ; 146 | @v_con_pos = sort {${$counts_pos}{tot}{$b} <=> ${$counts_pos}{tot}{$a}} keys %{${$counts_pos}{tot}} ; 147 | 148 | $n = scalar @v_con ; 149 | if (scalar @v_con_pos > $n) 150 | { 151 | $n = scalar @v_con_pos ; 152 | } 153 | 154 | foreach $i (0 .. $n-1) 155 | { 156 | if (defined $v_con_pos[$i]) 157 | { 158 | $con_pos = $v_con_pos[$i] ; 159 | printf OUT " %-*s | %4d | %4d | %4d | %4d", 160 | $max_con_pos_len, $con_pos, ${$counts_pos}{tot}{$con_pos}, 161 | ${$counts_pos}{err_head}{$con_pos}, ${$counts_pos}{err_dep}{$con_pos}, 162 | ${$counts_pos}{err_dep}{$con_pos}+${$counts_pos}{err_head}{$con_pos}-${$counts_pos}{tot}{$con_pos} ; 163 | } 164 | else 165 | { 166 | printf OUT " %-*s | %4s | %4s | %4s | %4s", 167 | $max_con_pos_len, ' ', ' ', ' ', ' ', ' ' ; 168 | } 169 | 170 | printf OUT " ||" ; 171 | 172 | if (defined $v_con[$i]) 173 | { 174 | $con = $v_con[$i] ; 175 | printf OUT " %-*s | %4d | %4d | %4d | %4d", 176 | $max_con_len+length($con)-uni_len($con), $con, ${$counts}{tot}{$con}, 177 | ${$counts}{err_head}{$con}, ${$counts}{err_dep}{$con}, 178 | ${$counts}{err_dep}{$con}+${$counts}{err_head}{$con}-${$counts}{tot}{$con} ; 179 | } 180 | else 181 | { 182 | printf OUT " %-*s | %4s | %4s | %4s | %4s", 183 | $max_con_len, ' ', ' ', ' ', ' ', ' ' ; 184 | } 185 | 186 | printf OUT "\n" ; 187 | } 188 | 189 | printf OUT " %s-+------+------+------+-----", '-' x $max_con_pos_len; 190 | printf OUT "--++" ; 191 | printf OUT "--%s-+------+------+------+-----", '-' x $max_con_len; 192 | printf OUT "\n" ; 193 | 194 | printf OUT "\n\n" ; 195 | 196 | } # print_context 197 | 198 | sub num_as_word 199 | { 200 | my ($num) = @_ ; 201 | 202 | $num = abs($num) ; 203 | 204 | if ($num == 1) 205 | { 206 | return ('one word') ; 207 | } 208 | elsif ($num == 2) 209 | { 210 | return ('two words') ; 211 | } 212 | elsif ($num == 3) 213 | { 214 | return ('three words') ; 215 | } 216 | elsif ($num == 4) 217 | { 218 | return ('four words') ; 219 | } 220 | else 221 | { 222 | return ($num.' words') ; 223 | } 224 | } 225 | 226 | sub describe_err 227 | { # describe_err 228 | 229 | my ($head_err, $head_aft_bef, $dep_err) = @_ ; 230 | my ($dep_g, $dep_s, $desc) ; 231 | my ($head_aft_bef_g, $head_aft_bef_s) = split(//, $head_aft_bef) ; 232 | 233 | if ($head_err eq '-') 234 | { 235 | $desc = 'correct head' ; 236 | 237 | if ($head_aft_bef_s eq '0') 238 | { 239 | $desc .= ' (0)' ; 240 | } 241 | elsif ($head_aft_bef_s eq 'e') 242 | { 243 | $desc .= ' (the focus word)' ; 244 | } 245 | elsif ($head_aft_bef_s eq 'a') 246 | { 247 | $desc .= ' (after the focus word)' ; 248 | } 249 | elsif ($head_aft_bef_s eq 'b') 250 | { 251 | $desc .= ' (before the focus word)' ; 252 | } 253 | } 254 | elsif ($head_aft_bef_s eq '0') 255 | { 256 | $desc = 'head = 0 instead of ' ; 257 | if ($head_aft_bef_g eq 'a') 258 | { 259 | $desc.= 'after ' ; 260 | } 261 | if ($head_aft_bef_g eq 'b') 262 | { 263 | $desc.= 'before ' ; 264 | } 265 | $desc .= 'the focus word' ; 266 | } 267 | elsif ($head_aft_bef_g eq '0') 268 | { 269 | $desc = 'head is ' ; 270 | if ($head_aft_bef_g eq 'a') 271 | { 272 | $desc.= 'after ' ; 273 | } 274 | if ($head_aft_bef_g eq 'b') 275 | { 276 | $desc.= 'before ' ; 277 | } 278 | $desc .= 'the focus word instead of 0' ; 279 | } 280 | else 281 | { 282 | $desc = num_as_word($head_err) ; 283 | if ($head_err < 0) 284 | { 285 | $desc .= ' before' ; 286 | } 287 | else 288 | { 289 | $desc .= ' after' ; 290 | } 291 | 292 | $desc = 'head '.$desc.' the correct head ' ; 293 | 294 | if ($head_aft_bef_s eq '0') 295 | { 296 | $desc .= '(0' ; 297 | } 298 | elsif ($head_aft_bef_s eq 'e') 299 | { 300 | $desc .= '(the focus word' ; 301 | } 302 | elsif ($head_aft_bef_s eq 'a') 303 | { 304 | $desc .= '(after the focus word' ; 305 | } 306 | elsif ($head_aft_bef_s eq 'b') 307 | { 308 | $desc .= '(before the focus word' ; 309 | } 310 | 311 | if ($head_aft_bef_g ne $head_aft_bef_s) 312 | { 313 | $desc .= ' instead of' ; 314 | if ($head_aft_bef_s eq '0') 315 | { 316 | $desc .= '0' ; 317 | } 318 | elsif ($head_aft_bef_s eq 'e') 319 | { 320 | $desc .= 'the focus word' ; 321 | } 322 | elsif ($head_aft_bef_s eq 'a') 323 | { 324 | $desc .= 'after the focus word' ; 325 | } 326 | elsif ($head_aft_bef_s eq 'b') 327 | { 328 | $desc .= 'before the focus word' ; 329 | } 330 | } 331 | 332 | $desc .= ')' ; 333 | } 334 | 335 | $desc .= ', ' ; 336 | 337 | if ($dep_err eq '-') 338 | { 339 | $desc .= 'correct dependency' ; 340 | } 341 | else 342 | { 343 | ($dep_g, $dep_s) = ($dep_err =~ /^(.*)->(.*)$/) ; 344 | $desc .= sprintf('dependency "%s" instead of "%s"', $dep_s, $dep_g) ; 345 | } 346 | 347 | return($desc) ; 348 | 349 | } # describe_err 350 | 351 | sub get_context 352 | { # get_context 353 | 354 | my ($sent, $i_w) = @_ ; 355 | my ($w_2, $w_1, $w1, $w2) ; 356 | my ($p_2, $p_1, $p1, $p2) ; 357 | 358 | if ($i_w >= 2) 359 | { 360 | $w_2 = ${${$sent}[$i_w-2]}{word} ; 361 | $p_2 = ${${$sent}[$i_w-2]}{pos} ; 362 | } 363 | else 364 | { 365 | $w_2 = $START ; 366 | $p_2 = $START ; 367 | } 368 | 369 | if ($i_w >= 1) 370 | { 371 | $w_1 = ${${$sent}[$i_w-1]}{word} ; 372 | $p_1 = ${${$sent}[$i_w-1]}{pos} ; 373 | } 374 | else 375 | { 376 | $w_1 = $START ; 377 | $p_1 = $START ; 378 | } 379 | 380 | if ($i_w <= scalar @{$sent}-2) 381 | { 382 | $w1 = ${${$sent}[$i_w+1]}{word} ; 383 | $p1 = ${${$sent}[$i_w+1]}{pos} ; 384 | } 385 | else 386 | { 387 | $w1 = $END ; 388 | $p1 = $END ; 389 | } 390 | 391 | if ($i_w <= scalar @{$sent}-3) 392 | { 393 | $w2 = ${${$sent}[$i_w+2]}{word} ; 394 | $p2 = ${${$sent}[$i_w+2]}{pos} ; 395 | } 396 | else 397 | { 398 | $w2 = $END ; 399 | $p2 = $END ; 400 | } 401 | 402 | return ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) ; 403 | 404 | } # get_context 405 | 406 | sub read_sent 407 | { # read_sent 408 | 409 | my ($sent_gold, $sent_sys) = @_ ; 410 | my ($line_g, $line_s, $new_sent) ; 411 | my (%fields_g, %fields_s) ; 412 | 413 | $new_sent = 1 ; 414 | 415 | @{$sent_gold} = () ; 416 | @{$sent_sys} = () ; 417 | 418 | while (1) 419 | { # main reading loop 420 | 421 | $line_g = ; 422 | $line_s = ; 423 | 424 | $line_num++ ; 425 | 426 | # system output has fewer lines than gold standard 427 | if ((defined $line_g) && (! defined $line_s)) 428 | { 429 | printf STDERR "line mismatch, line %d:\n", $line_num ; 430 | printf STDERR " gold: %s", $line_g ; 431 | printf STDERR " sys : past end of file\n" ; 432 | exit(1) ; 433 | } 434 | 435 | # system output has more lines than gold standard 436 | if ((! defined $line_g) && (defined $line_s)) 437 | { 438 | printf STDERR "line mismatch, line %d:\n", $line_num ; 439 | printf STDERR " gold: past end of file\n" ; 440 | printf STDERR " sys : %s", $line_s ; 441 | exit(1) ; 442 | } 443 | 444 | # end of file reached for both 445 | if ((! defined $line_g) && (! defined $line_s)) 446 | { 447 | return (1) ; 448 | } 449 | 450 | # one contains end of sentence but other one does not 451 | if (($line_g =~ /^\s+$/) != ($line_s =~ /^\s+$/)) 452 | { 453 | printf STDERR "line mismatch, line %d:\n", $line_num ; 454 | printf STDERR " gold: %s", $line_g ; 455 | printf STDERR " sys : %s", $line_s ; 456 | exit(1) ; 457 | } 458 | 459 | # end of sentence reached 460 | if ($line_g =~ /^\s+$/) 461 | { 462 | return(0) ; 463 | } 464 | 465 | # now both lines contain information 466 | 467 | if ($new_sent) 468 | { 469 | $new_sent = 0 ; 470 | } 471 | 472 | # 'official' column names 473 | # options.output = ['id','form','lemma','cpostag','postag', 474 | # 'feats','head','deprel','phead','pdeprel'] 475 | 476 | @fields_g{'word', 'pos', 'head', 'dep'} = (split (/\s+/, $line_g))[1, 3, 6, 7] ; 477 | 478 | push @{$sent_gold}, { %fields_g } ; 479 | 480 | @fields_s{'word', 'pos', 'head', 'dep'} = (split (/\s+/, $line_s))[1, 3, 6, 7] ; 481 | 482 | if (($fields_g{word} ne $fields_s{word}) 483 | || 484 | ($fields_g{pos} ne $fields_s{pos})) 485 | { 486 | printf STDERR "Word/pos mismatch, line %d:\n", $line_num ; 487 | printf STDERR " gold: %s", $line_g ; 488 | printf STDERR " sys : %s", $line_s ; 489 | #exit(1) ; 490 | } 491 | 492 | push @{$sent_sys}, { %fields_s } ; 493 | 494 | } # main reading loop 495 | 496 | } # read_sent 497 | 498 | ################################################################################ 499 | ### main ### 500 | ################################################################################ 501 | 502 | our ($opt_g, $opt_s, $opt_o, $opt_h, $opt_v, $opt_q, $opt_p, $opt_b) ; 503 | 504 | my ($sent_num, $eof, $word_num, @err_sent) ; 505 | my (@sent_gold, @sent_sys, @starts) ; 506 | my ($word, $pos, $wp, $head_g, $dep_g, $head_s, $dep_s) ; 507 | my (%counts, $err_head, $err_dep, $con, $con1, $con_pos, $con_pos1, $thresh) ; 508 | my ($head_err, $dep_err, @cur_err, %err_counts, $err_counter, $err_desc) ; 509 | my ($loc_con, %loc_con_err_counts, %err_desc) ; 510 | my ($head_aft_bef_g, $head_aft_bef_s, $head_aft_bef) ; 511 | my ($con_bef, $con_aft, $con_bef_2, $con_aft_2, @bits, @e_bits, @v_con, @v_con_pos) ; 512 | my ($con_pos_bef, $con_pos_aft, $con_pos_bef_2, $con_pos_aft_2) ; 513 | my ($max_word_len, $max_pos_len, $max_con_len, $max_con_pos_len) ; 514 | my ($max_word_spec_len, $max_con_bef_len, $max_con_aft_len) ; 515 | my (%freq_err, $err) ; 516 | 517 | my ($i, $j, $i_w, $l, $n_args) ; 518 | my ($w_2, $w_1, $w1, $w2) ; 519 | my ($wp_2, $wp_1, $wp1, $wp2) ; 520 | my ($p_2, $p_1, $p1, $p2) ; 521 | 522 | my ($short_output) ; 523 | my ($score_on_punct) ; 524 | $counts{punct} = 0; # initialize 525 | 526 | getopts("g:o:s:qvhpb") ; 527 | 528 | if (defined $opt_v) 529 | { 530 | my $id = '$Id: eval.pl,v 1.9 2006/05/09 20:30:01 yuval Exp $'; 531 | my @parts = split ' ',$id; 532 | print "Version $parts[2]\n"; 533 | exit(0); 534 | } 535 | 536 | if ((defined $opt_h) || ((! defined $opt_g) && (! defined $opt_s))) 537 | { 538 | die $usage ; 539 | } 540 | 541 | if (! defined $opt_g) 542 | { 543 | die "Gold standard file (-g) missing\n" ; 544 | } 545 | 546 | if (! defined $opt_s) 547 | { 548 | die "System output file (-s) missing\n" ; 549 | } 550 | 551 | if (! defined $opt_o) 552 | { 553 | $opt_o = '-' ; 554 | } 555 | 556 | if (defined $opt_q) 557 | { 558 | $short_output = 1 ; 559 | } else { 560 | $short_output = 0 ; 561 | } 562 | 563 | if (defined $opt_p) 564 | { 565 | $score_on_punct = 1 ; 566 | } else { 567 | $score_on_punct = 0 ; 568 | } 569 | 570 | $line_num = 0 ; 571 | $sent_num = 0 ; 572 | $eof = 0 ; 573 | 574 | @err_sent = () ; 575 | @starts = () ; 576 | 577 | %{$err_sent[0]} = () ; 578 | 579 | $max_pos_len = length('CPOS') ; 580 | 581 | ################################################################################ 582 | ### reading input ### 583 | ################################################################################ 584 | 585 | open (GOLD, "<$opt_g") || die "Could not open gold standard file $opt_g\n" ; 586 | open (SYS, "<$opt_s") || die "Could not open system output file $opt_s\n" ; 587 | open (OUT, ">$opt_o") || die "Could not open output file $opt_o\n" ; 588 | 589 | 590 | if (defined $opt_b) { # produce output similar to evalb 591 | print OUT " Sent. Attachment Correct Scoring \n"; 592 | print OUT " ID Tokens - Unlab. Lab. HEAD HEAD+DEPREL tokens - - - -\n"; 593 | print OUT " ============================================================================\n"; 594 | } 595 | 596 | 597 | while (! $eof) 598 | { # main reading loop 599 | 600 | $starts[$sent_num] = $line_num+1 ; 601 | $eof = read_sent(\@sent_gold, \@sent_sys) ; 602 | 603 | $sent_num++ ; 604 | 605 | %{$err_sent[$sent_num]} = () ; 606 | $word_num = scalar @sent_gold ; 607 | 608 | # for accuracy per sentence 609 | my %sent_counts = ( tot => 0, 610 | err_any => 0, 611 | err_head => 0 612 | ); 613 | 614 | # printf "$sent_num $word_num\n" ; 615 | 616 | my @frames_g = ('** '); # the initial frame for the virtual root 617 | my @frames_s = ('** '); # the initial frame for the virtual root 618 | foreach $i_w (0 .. $word_num-1) 619 | { # loop on words 620 | push @frames_g, ''; # initialize 621 | push @frames_s, ''; # initialize 622 | } 623 | 624 | foreach $i_w (0 .. $word_num-1) 625 | { # loop on words 626 | 627 | ($word, $pos, $head_g, $dep_g) 628 | = @{$sent_gold[$i_w]}{'word', 'pos', 'head', 'dep'} ; 629 | $wp = $word.' / '.$pos ; 630 | 631 | # printf "%d: %s %s %s %s\n", $i_w, $word, $pos, $head_g, $dep_g ; 632 | 633 | if ((! $score_on_punct) && is_uni_punct($word)) 634 | { 635 | $counts{punct}++ ; 636 | # ignore punctuations 637 | next ; 638 | } 639 | 640 | if (length($pos) > $max_pos_len) 641 | { 642 | $max_pos_len = length($pos) ; 643 | } 644 | 645 | ($head_s, $dep_s) = @{$sent_sys[$i_w]}{'head', 'dep'} ; 646 | 647 | $counts{tot}++ ; 648 | $counts{word}{$wp}{tot}++ ; 649 | $counts{pos}{$pos}{tot}++ ; 650 | $counts{head}{$head_g-$i_w-1}{tot}++ ; 651 | 652 | # for frame confusions 653 | # add child to frame of parent 654 | $frames_g[$head_g] .= "$dep_g "; 655 | $frames_s[$head_s] .= "$dep_s "; 656 | # add to frame of token itself 657 | $frames_g[$i_w+1] .= "*$dep_g* "; # $i_w+1 because $i_w starts counting at zero 658 | $frames_s[$i_w+1] .= "*$dep_g* "; 659 | 660 | # for precision and recall of DEPREL 661 | $counts{dep}{$dep_g}{tot}++ ; # counts for gold standard deprels 662 | $counts{dep2}{$dep_g}{$dep_s}++ ; # counts for confusions 663 | $counts{dep_s}{$dep_s}{tot}++ ; # counts for system deprels 664 | $counts{all_dep}{$dep_g} = 1 ; # list of all deprels that occur ... 665 | $counts{all_dep}{$dep_s} = 1 ; # ... in either gold or system output 666 | 667 | # for precision and recall of HEAD direction 668 | my $dir_g; 669 | if ($head_g == 0) { 670 | $dir_g = 'to_root'; 671 | } elsif ($head_g < $i_w+1) { # $i_w+1 because $i_w starts counting at zero 672 | # also below 673 | $dir_g = 'left'; 674 | } elsif ($head_g > $i_w+1) { 675 | $dir_g = 'right'; 676 | } else { 677 | # token links to itself; should never happen in correct gold standard 678 | $dir_g = 'self'; 679 | } 680 | my $dir_s; 681 | if ($head_s == 0) { 682 | $dir_s = 'to_root'; 683 | } elsif ($head_s < $i_w+1) { 684 | $dir_s = 'left'; 685 | } elsif ($head_s > $i_w+1) { 686 | $dir_s = 'right'; 687 | } else { 688 | # token links to itself; should not happen in good system 689 | # (but not forbidden in shared task) 690 | $dir_s = 'self'; 691 | } 692 | $counts{dir_g}{$dir_g}{tot}++ ; # counts for gold standard head direction 693 | $counts{dir2}{$dir_g}{$dir_s}++ ; # counts for confusions 694 | $counts{dir_s}{$dir_s}{tot}++ ; # counts for system head direction 695 | 696 | # for precision and recall of HEAD distance 697 | my $dist_g; 698 | if ($head_g == 0) { 699 | $dist_g = 'to_root'; 700 | } elsif ( abs($head_g - ($i_w+1)) <= 1 ) { 701 | $dist_g = '1'; # includes the 'self' cases 702 | } elsif ( abs($head_g - ($i_w+1)) <= 2 ) { 703 | $dist_g = '2'; 704 | } elsif ( abs($head_g - ($i_w+1)) <= 6 ) { 705 | $dist_g = '3-6'; 706 | } else { 707 | $dist_g = '7-...'; 708 | } 709 | my $dist_s; 710 | if ($head_s == 0) { 711 | $dist_s = 'to_root'; 712 | } elsif ( abs($head_s - ($i_w+1)) <= 1 ) { 713 | $dist_s = '1'; # includes the 'self' cases 714 | } elsif ( abs($head_s - ($i_w+1)) <= 2 ) { 715 | $dist_s = '2'; 716 | } elsif ( abs($head_s - ($i_w+1)) <= 6 ) { 717 | $dist_s = '3-6'; 718 | } else { 719 | $dist_s = '7-...'; 720 | } 721 | $counts{dist_g}{$dist_g}{tot}++ ; # counts for gold standard head distance 722 | $counts{dist2}{$dist_g}{$dist_s}++ ; # counts for confusions 723 | $counts{dist_s}{$dist_s}{tot}++ ; # counts for system head distance 724 | 725 | 726 | $err_head = ($head_g ne $head_s) ; # error in head 727 | $err_dep = ($dep_g ne $dep_s) ; # error in deprel 728 | 729 | $head_err = '-' ; 730 | $dep_err = '-' ; 731 | 732 | # for accuracy per sentence 733 | $sent_counts{tot}++ ; 734 | if ($err_dep || $err_head) { 735 | $sent_counts{err_any}++ ; 736 | } 737 | if ($err_head) { 738 | $sent_counts{err_head}++ ; 739 | } 740 | 741 | # total counts and counts for CPOS involved in errors 742 | 743 | if ($head_g eq '0') 744 | { 745 | $head_aft_bef_g = '0' ; 746 | } 747 | elsif ($head_g eq $i_w+1) 748 | { 749 | $head_aft_bef_g = 'e' ; 750 | } 751 | else 752 | { 753 | $head_aft_bef_g = ($head_g <= $i_w+1 ? 'b' : 'a') ; 754 | } 755 | 756 | if ($head_s eq '0') 757 | { 758 | $head_aft_bef_s = '0' ; 759 | } 760 | elsif ($head_s eq $i_w+1) 761 | { 762 | $head_aft_bef_s = 'e' ; 763 | } 764 | else 765 | { 766 | $head_aft_bef_s = ($head_s <= $i_w+1 ? 'b' : 'a') ; 767 | } 768 | 769 | $head_aft_bef = $head_aft_bef_g.$head_aft_bef_s ; 770 | 771 | if ($err_head) 772 | { 773 | if ($head_aft_bef_s eq '0') 774 | { 775 | $head_err = 0 ; 776 | } 777 | else 778 | { 779 | $head_err = $head_s-$head_g ; 780 | } 781 | 782 | $err_sent[$sent_num]{head}++ ; 783 | $counts{err_head}{tot}++ ; 784 | $counts{err_head}{$head_err}++ ; 785 | 786 | $counts{word}{err_head}{$wp}++ ; 787 | $counts{pos}{$pos}{err_head}{tot}++ ; 788 | $counts{pos}{$pos}{err_head}{$head_err}++ ; 789 | } 790 | 791 | if ($err_dep) 792 | { 793 | $dep_err = $dep_g.'->'.$dep_s ; 794 | $err_sent[$sent_num]{dep}++ ; 795 | $counts{err_dep}{tot}++ ; 796 | $counts{err_dep}{$dep_err}++ ; 797 | 798 | $counts{word}{err_dep}{$wp}++ ; 799 | $counts{pos}{$pos}{err_dep}{tot}++ ; 800 | $counts{pos}{$pos}{err_dep}{$dep_err}++ ; 801 | 802 | if ($err_head) 803 | { 804 | $counts{err_both}++ ; 805 | $counts{pos}{$pos}{err_both}++ ; 806 | } 807 | } 808 | 809 | ### DEPREL + ATTACHMENT 810 | if ((!$err_dep) && ($err_head)) { 811 | $counts{err_head_corr_dep}{tot}++ ; 812 | $counts{err_head_corr_dep}{$dep_s}++ ; 813 | } 814 | ### DEPREL + ATTACHMENT 815 | 816 | # counts for words involved in errors 817 | 818 | if (! ($err_head || $err_dep)) 819 | { 820 | next ; 821 | } 822 | 823 | $err_sent[$sent_num]{word}++ ; 824 | $counts{err_any}++ ; 825 | $counts{word}{err_any}{$wp}++ ; 826 | $counts{pos}{$pos}{err_any}++ ; 827 | 828 | ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) = get_context(\@sent_gold, $i_w) ; 829 | 830 | if ($w_2 ne $START) 831 | { 832 | $wp_2 = $w_2.' / '.$p_2 ; 833 | } 834 | else 835 | { 836 | $wp_2 = $w_2 ; 837 | } 838 | 839 | if ($w_1 ne $START) 840 | { 841 | $wp_1 = $w_1.' / '.$p_1 ; 842 | } 843 | else 844 | { 845 | $wp_1 = $w_1 ; 846 | } 847 | 848 | if ($w1 ne $END) 849 | { 850 | $wp1 = $w1.' / '.$p1 ; 851 | } 852 | else 853 | { 854 | $wp1 = $w1 ; 855 | } 856 | 857 | if ($w2 ne $END) 858 | { 859 | $wp2 = $w2.' / '.$p2 ; 860 | } 861 | else 862 | { 863 | $wp2 = $w2 ; 864 | } 865 | 866 | $con_bef = $wp_1 ; 867 | $con_bef_2 = $wp_2.' + '.$wp_1 ; 868 | $con_aft = $wp1 ; 869 | $con_aft_2 = $wp1.' + '.$wp2 ; 870 | 871 | $con_pos_bef = $p_1 ; 872 | $con_pos_bef_2 = $p_2.'+'.$p_1 ; 873 | $con_pos_aft = $p1 ; 874 | $con_pos_aft_2 = $p1.'+'.$p2 ; 875 | 876 | if ($w_1 ne $START) 877 | { 878 | # do not count '.S' as a word context 879 | $counts{con_bef_2}{tot}{$con_bef_2}++ ; 880 | $counts{con_bef_2}{err_head}{$con_bef_2} += $err_head ; 881 | $counts{con_bef_2}{err_dep}{$con_bef_2} += $err_dep ; 882 | $counts{con_bef}{tot}{$con_bef}++ ; 883 | $counts{con_bef}{err_head}{$con_bef} += $err_head ; 884 | $counts{con_bef}{err_dep}{$con_bef} += $err_dep ; 885 | } 886 | 887 | if ($w1 ne $END) 888 | { 889 | # do not count '.E' as a word context 890 | $counts{con_aft_2}{tot}{$con_aft_2}++ ; 891 | $counts{con_aft_2}{err_head}{$con_aft_2} += $err_head ; 892 | $counts{con_aft_2}{err_dep}{$con_aft_2} += $err_dep ; 893 | $counts{con_aft}{tot}{$con_aft}++ ; 894 | $counts{con_aft}{err_head}{$con_aft} += $err_head ; 895 | $counts{con_aft}{err_dep}{$con_aft} += $err_dep ; 896 | } 897 | 898 | $counts{con_pos_bef_2}{tot}{$con_pos_bef_2}++ ; 899 | $counts{con_pos_bef_2}{err_head}{$con_pos_bef_2} += $err_head ; 900 | $counts{con_pos_bef_2}{err_dep}{$con_pos_bef_2} += $err_dep ; 901 | $counts{con_pos_bef}{tot}{$con_pos_bef}++ ; 902 | $counts{con_pos_bef}{err_head}{$con_pos_bef} += $err_head ; 903 | $counts{con_pos_bef}{err_dep}{$con_pos_bef} += $err_dep ; 904 | 905 | $counts{con_pos_aft_2}{tot}{$con_pos_aft_2}++ ; 906 | $counts{con_pos_aft_2}{err_head}{$con_pos_aft_2} += $err_head ; 907 | $counts{con_pos_aft_2}{err_dep}{$con_pos_aft_2} += $err_dep ; 908 | $counts{con_pos_aft}{tot}{$con_pos_aft}++ ; 909 | $counts{con_pos_aft}{err_head}{$con_pos_aft} += $err_head ; 910 | $counts{con_pos_aft}{err_dep}{$con_pos_aft} += $err_dep ; 911 | 912 | $err = $head_err.$sep.$head_aft_bef.$sep.$dep_err ; 913 | $freq_err{$err}++ ; 914 | 915 | } # loop on words 916 | 917 | foreach $i_w (0 .. $word_num) # including one for the virtual root 918 | { # loop on words 919 | if ($frames_g[$i_w] ne $frames_s[$i_w]) { 920 | $counts{frame2}{"$frames_g[$i_w]/ $frames_s[$i_w]"}++ ; 921 | } 922 | } 923 | 924 | if (defined $opt_b) { # produce output similar to evalb 925 | if ($word_num > 0) { 926 | my ($unlabeled,$labeled) = ('NaN', 'NaN'); 927 | if ($sent_counts{tot} > 0) { # there are scoring tokens 928 | $unlabeled = 100-$sent_counts{err_head}*100.0/$sent_counts{tot}; 929 | $labeled = 100-$sent_counts{err_any} *100.0/$sent_counts{tot}; 930 | } 931 | printf OUT " %4d %4d 0 %6.2f %6.2f %4d %4d %4d 0 0 0 0\n", 932 | $sent_num, $word_num, 933 | $unlabeled, $labeled, 934 | $sent_counts{tot}-$sent_counts{err_head}, 935 | $sent_counts{tot}-$sent_counts{err_any}, 936 | $sent_counts{tot},; 937 | } 938 | } 939 | 940 | } # main reading loop 941 | 942 | ################################################################################ 943 | ### printing output ### 944 | ################################################################################ 945 | 946 | if (defined $opt_b) { # produce output similar to evalb 947 | print OUT "\n\n"; 948 | } 949 | printf OUT " Labeled attachment score: %d / %d * 100 = %.2f %%\n", 950 | $counts{tot}-$counts{err_any}, $counts{tot}, 100-$counts{err_any}*100.0/$counts{tot} ; 951 | printf OUT " Unlabeled attachment score: %d / %d * 100 = %.2f %%\n", 952 | $counts{tot}-$counts{err_head}{tot}, $counts{tot}, 100-$counts{err_head}{tot}*100.0/$counts{tot} ; 953 | printf OUT " Label accuracy score: %d / %d * 100 = %.2f %%\n", 954 | $counts{tot}-$counts{err_dep}{tot}, $counts{tot}, 100-$counts{err_dep}{tot}*100.0/$counts{tot} ; 955 | 956 | if ($short_output) 957 | { 958 | exit(0) ; 959 | } 960 | printf OUT "\n %s\n\n", '=' x 80 ; 961 | printf OUT " Evaluation of the results in %s\n vs. gold standard %s:\n\n", $opt_s, $opt_g ; 962 | 963 | printf OUT " Legend: '%s' - the beginning of a sentence, '%s' - the end of a sentence\n\n", $START, $END ; 964 | 965 | printf OUT " Number of non-scoring tokens: $counts{punct}\n\n"; 966 | 967 | printf OUT " The overall accuracy and its distribution over CPOSTAGs\n\n" ; 968 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 969 | 970 | printf OUT " %-10s | %-5s | %-5s | %% | %-5s | %% | %-5s | %%\n", 971 | 'Accuracy', 'words', 'right', 'right', 'both' ; 972 | printf OUT " %-10s | %-5s | %-5s | | %-5s | | %-5s |\n", 973 | ' ', ' ', 'head', ' dep', 'right' ; 974 | 975 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 976 | 977 | printf OUT " %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n", 978 | 'total', $counts{tot}, 979 | $counts{tot}-$counts{err_head}{tot}, 100-$counts{err_head}{tot}*100.0/$counts{tot}, 980 | $counts{tot}-$counts{err_dep}{tot}, 100-$counts{err_dep}{tot}*100.0/$counts{tot}, 981 | $counts{tot}-$counts{err_any}, 100-$counts{err_any}*100.0/$counts{tot} ; 982 | 983 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 984 | 985 | foreach $pos (sort {$counts{pos}{$b}{tot} <=> $counts{pos}{$a}{tot}} keys %{$counts{pos}}) 986 | { 987 | if (! defined($counts{pos}{$pos}{err_head}{tot})) 988 | { 989 | $counts{pos}{$pos}{err_head}{tot} = 0 ; 990 | } 991 | if (! defined($counts{pos}{$pos}{err_dep}{tot})) 992 | { 993 | $counts{pos}{$pos}{err_dep}{tot} = 0 ; 994 | } 995 | if (! defined($counts{pos}{$pos}{err_any})) 996 | { 997 | $counts{pos}{$pos}{err_any} = 0 ; 998 | } 999 | 1000 | printf OUT " %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n", 1001 | $pos, $counts{pos}{$pos}{tot}, 1002 | $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_head}{tot}, 100-$counts{pos}{$pos}{err_head}{tot}*100.0/$counts{pos}{$pos}{tot}, 1003 | $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_dep}{tot}, 100-$counts{pos}{$pos}{err_dep}{tot}*100.0/$counts{pos}{$pos}{tot}, 1004 | $counts{pos}{$pos}{tot}-$counts{pos}{$pos}{err_any}, 100-$counts{pos}{$pos}{err_any}*100.0/$counts{pos}{$pos}{tot} ; 1005 | } 1006 | 1007 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 1008 | 1009 | printf OUT "\n\n" ; 1010 | 1011 | printf OUT " The overall error rate and its distribution over CPOSTAGs\n\n" ; 1012 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 1013 | 1014 | printf OUT " %-10s | %-5s | %-5s | %% | %-5s | %% | %-5s | %%\n", 1015 | 'Error', 'words', 'head', ' dep', 'both' ; 1016 | printf OUT " %-10s | %-5s | %-5s | | %-5s | | %-5s |\n", 1017 | 1018 | 'Rate', ' ', 'err', ' err', 'wrong' ; 1019 | 1020 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 1021 | 1022 | printf OUT " %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n", 1023 | 'total', $counts{tot}, 1024 | $counts{err_head}{tot}, $counts{err_head}{tot}*100.0/$counts{tot}, 1025 | $counts{err_dep}{tot}, $counts{err_dep}{tot}*100.0/$counts{tot}, 1026 | $counts{err_both}, $counts{err_both}*100.0/$counts{tot} ; 1027 | 1028 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 1029 | 1030 | foreach $pos (sort {$counts{pos}{$b}{tot} <=> $counts{pos}{$a}{tot}} keys %{$counts{pos}}) 1031 | { 1032 | if (! defined($counts{pos}{$pos}{err_both})) 1033 | { 1034 | $counts{pos}{$pos}{err_both} = 0 ; 1035 | } 1036 | 1037 | printf OUT " %-10s | %5d | %5d | %3.0f%% | %5d | %3.0f%% | %5d | %3.0f%%\n", 1038 | $pos, $counts{pos}{$pos}{tot}, 1039 | $counts{pos}{$pos}{err_head}{tot}, $counts{pos}{$pos}{err_head}{tot}*100.0/$counts{pos}{$pos}{tot}, 1040 | $counts{pos}{$pos}{err_dep}{tot}, $counts{pos}{$pos}{err_dep}{tot}*100.0/$counts{pos}{$pos}{tot}, 1041 | $counts{pos}{$pos}{err_both}, $counts{pos}{$pos}{err_both}*100.0/$counts{pos}{$pos}{tot} ; 1042 | 1043 | } 1044 | 1045 | printf OUT "%s\n", " -----------+-------+-------+------+-------+------+-------+-------" ; 1046 | 1047 | ### added by Sabine Buchholz 1048 | printf OUT "\n\n"; 1049 | printf OUT " Precision and recall of DEPREL\n\n"; 1050 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1051 | printf OUT " deprel | gold | correct | system | recall (%%) | precision (%%) \n"; 1052 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1053 | foreach my $dep (sort keys %{$counts{all_dep}}) { 1054 | # initialize 1055 | my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN'); 1056 | 1057 | if (defined($counts{dep2}{$dep}{$dep})) { 1058 | $tot_corr = $counts{dep2}{$dep}{$dep}; 1059 | } 1060 | if (defined($counts{dep}{$dep}{tot})) { 1061 | $tot_g = $counts{dep}{$dep}{tot}; 1062 | $rec = sprintf("%.2f",$tot_corr / $tot_g * 100); 1063 | } 1064 | if (defined($counts{dep_s}{$dep}{tot})) { 1065 | $tot_s = $counts{dep_s}{$dep}{tot}; 1066 | $prec = sprintf("%.2f",$tot_corr / $tot_s * 100); 1067 | } 1068 | printf OUT " %-15s | %4d | %7d | %6d | %10s | %13s\n", 1069 | $dep, $tot_g, $tot_corr, $tot_s, $rec, $prec; 1070 | } 1071 | 1072 | ### DEPREL + ATTACHMENT: 1073 | ### Same as Sabine's DEPREL apart from $tot_corr calculation 1074 | printf OUT "\n\n"; 1075 | printf OUT " Precision and recall of DEPREL + ATTACHMENT\n\n"; 1076 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1077 | printf OUT " deprel | gold | correct | system | recall (%%) | precision (%%) \n"; 1078 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1079 | foreach my $dep (sort keys %{$counts{all_dep}}) { 1080 | # initialize 1081 | my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN'); 1082 | 1083 | if (defined($counts{dep2}{$dep}{$dep})) { 1084 | if (defined($counts{err_head_corr_dep}{$dep})) { 1085 | $tot_corr = $counts{dep2}{$dep}{$dep} - $counts{err_head_corr_dep}{$dep}; 1086 | } else { 1087 | $tot_corr = $counts{dep2}{$dep}{$dep}; 1088 | } 1089 | } 1090 | if (defined($counts{dep}{$dep}{tot})) { 1091 | $tot_g = $counts{dep}{$dep}{tot}; 1092 | $rec = sprintf("%.2f",$tot_corr / $tot_g * 100); 1093 | } 1094 | if (defined($counts{dep_s}{$dep}{tot})) { 1095 | $tot_s = $counts{dep_s}{$dep}{tot}; 1096 | $prec = sprintf("%.2f",$tot_corr / $tot_s * 100); 1097 | } 1098 | printf OUT " %-15s | %4d | %7d | %6d | %10s | %13s\n", 1099 | $dep, $tot_g, $tot_corr, $tot_s, $rec, $prec; 1100 | } 1101 | ### DEPREL + ATTACHMENT 1102 | 1103 | printf OUT "\n\n"; 1104 | printf OUT " Precision and recall of binned HEAD direction\n\n"; 1105 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1106 | printf OUT " direction | gold | correct | system | recall (%%) | precision (%%) \n"; 1107 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1108 | foreach my $dir ('to_root', 'left', 'right', 'self') { 1109 | # initialize 1110 | my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN'); 1111 | 1112 | if (defined($counts{dir2}{$dir}{$dir})) { 1113 | $tot_corr = $counts{dir2}{$dir}{$dir}; 1114 | } 1115 | if (defined($counts{dir_g}{$dir}{tot})) { 1116 | $tot_g = $counts{dir_g}{$dir}{tot}; 1117 | $rec = sprintf("%.2f",$tot_corr / $tot_g * 100); 1118 | } 1119 | if (defined($counts{dir_s}{$dir}{tot})) { 1120 | $tot_s = $counts{dir_s}{$dir}{tot}; 1121 | $prec = sprintf("%.2f",$tot_corr / $tot_s * 100); 1122 | } 1123 | printf OUT " %-15s | %4d | %7d | %6d | %10s | %13s\n", 1124 | $dir, $tot_g, $tot_corr, $tot_s, $rec, $prec; 1125 | } 1126 | 1127 | printf OUT "\n\n"; 1128 | printf OUT " Precision and recall of binned HEAD distance\n\n"; 1129 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1130 | printf OUT " distance | gold | correct | system | recall (%%) | precision (%%) \n"; 1131 | printf OUT " ----------------+------+---------+--------+------------+---------------\n"; 1132 | foreach my $dist ('to_root', '1', '2', '3-6', '7-...') { 1133 | # initialize 1134 | my ($tot_corr, $tot_g, $tot_s, $prec, $rec) = (0, 0, 0, 'NaN', 'NaN'); 1135 | 1136 | if (defined($counts{dist2}{$dist}{$dist})) { 1137 | $tot_corr = $counts{dist2}{$dist}{$dist}; 1138 | } 1139 | if (defined($counts{dist_g}{$dist}{tot})) { 1140 | $tot_g = $counts{dist_g}{$dist}{tot}; 1141 | $rec = sprintf("%.2f",$tot_corr / $tot_g * 100); 1142 | } 1143 | if (defined($counts{dist_s}{$dist}{tot})) { 1144 | $tot_s = $counts{dist_s}{$dist}{tot}; 1145 | $prec = sprintf("%.2f",$tot_corr / $tot_s * 100); 1146 | } 1147 | printf OUT " %-15s | %4d | %7d | %6d | %10s | %13s\n", 1148 | $dist, $tot_g, $tot_corr, $tot_s, $rec, $prec; 1149 | } 1150 | 1151 | printf OUT "\n\n"; 1152 | printf OUT " Frame confusions (gold versus system; *...* marks the head token)\n\n"; 1153 | foreach my $frame (sort {$counts{frame2}{$b} <=> $counts{frame2}{$a}} keys %{$counts{frame2}}) 1154 | { 1155 | if ($counts{frame2}{$frame} >= 5) # (make 5 a changeable threshold later) 1156 | { 1157 | printf OUT " %3d %s\n", $counts{frame2}{$frame}, $frame; 1158 | } 1159 | } 1160 | ### end of: added by Sabine Buchholz 1161 | 1162 | 1163 | # 1164 | # Leave only the 5 words mostly involved in errors 1165 | # 1166 | 1167 | 1168 | $thresh = (sort {$b <=> $a} values %{$counts{word}{err_any}})[4] ; 1169 | 1170 | # ensure enough space for title 1171 | $max_word_len = length('word') ; 1172 | 1173 | foreach $word (keys %{$counts{word}{err_any}}) 1174 | { 1175 | if ($counts{word}{err_any}{$word} < $thresh) 1176 | { 1177 | delete $counts{word}{err_any}{$word} ; 1178 | next ; 1179 | } 1180 | 1181 | $l = uni_len($word) ; 1182 | if ($l > $max_word_len) 1183 | { 1184 | $max_word_len = $l ; 1185 | } 1186 | } 1187 | 1188 | # filter a case when the difference between the error counts 1189 | # for 2-word and 1-word contexts is small 1190 | # (leave the 2-word context) 1191 | 1192 | foreach $con (keys %{$counts{con_aft_2}{tot}}) 1193 | { 1194 | ($w1) = split(/\+/, $con) ; 1195 | 1196 | if (defined $counts{con_aft}{tot}{$w1} && 1197 | $counts{con_aft}{tot}{$w1}-$counts{con_aft_2}{tot}{$con} <= 1) 1198 | { 1199 | delete $counts{con_aft}{tot}{$w1} ; 1200 | } 1201 | } 1202 | 1203 | foreach $con (keys %{$counts{con_bef_2}{tot}}) 1204 | { 1205 | ($w_2, $w_1) = split(/\+/, $con) ; 1206 | 1207 | if (defined $counts{con_bef}{tot}{$w_1} && 1208 | $counts{con_bef}{tot}{$w_1}-$counts{con_bef_2}{tot}{$con} <= 1) 1209 | { 1210 | delete $counts{con_bef}{tot}{$w_1} ; 1211 | } 1212 | } 1213 | 1214 | foreach $con_pos (keys %{$counts{con_pos_aft_2}{tot}}) 1215 | { 1216 | ($p1) = split(/\+/, $con_pos) ; 1217 | 1218 | if (defined($counts{con_pos_aft}{tot}{$p1}) && 1219 | $counts{con_pos_aft}{tot}{$p1}-$counts{con_pos_aft_2}{tot}{$con_pos} <= 1) 1220 | { 1221 | delete $counts{con_pos_aft}{tot}{$p1} ; 1222 | } 1223 | } 1224 | 1225 | foreach $con_pos (keys %{$counts{con_pos_bef_2}{tot}}) 1226 | { 1227 | ($p_2, $p_1) = split(/\+/, $con_pos) ; 1228 | 1229 | if (defined($counts{con_pos_bef}{tot}{$p_1}) && 1230 | $counts{con_pos_bef}{tot}{$p_1}-$counts{con_pos_bef_2}{tot}{$con_pos} <= 1) 1231 | { 1232 | delete $counts{con_pos_bef}{tot}{$p_1} ; 1233 | } 1234 | } 1235 | 1236 | # for each context type, take the three contexts most involved in errors 1237 | 1238 | $max_con_len = 0 ; 1239 | 1240 | filter_context_counts($counts{con_bef_2}{tot}, $con_err_num, \$max_con_len) ; 1241 | 1242 | filter_context_counts($counts{con_bef}{tot}, $con_err_num, \$max_con_len) ; 1243 | 1244 | filter_context_counts($counts{con_aft}{tot}, $con_err_num, \$max_con_len) ; 1245 | 1246 | filter_context_counts($counts{con_aft_2}{tot}, $con_err_num, \$max_con_len) ; 1247 | 1248 | # for each CPOS context type, take the three CPOS contexts most involved in errors 1249 | 1250 | $max_con_pos_len = 0 ; 1251 | 1252 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_bef_2}{tot}})[$con_err_num-1] ; 1253 | 1254 | foreach $con_pos (keys %{$counts{con_pos_bef_2}{tot}}) 1255 | { 1256 | if ($counts{con_pos_bef_2}{tot}{$con_pos} < $thresh) 1257 | { 1258 | delete $counts{con_pos_bef_2}{tot}{$con_pos} ; 1259 | next ; 1260 | } 1261 | if (length($con_pos) > $max_con_pos_len) 1262 | { 1263 | $max_con_pos_len = length($con_pos) ; 1264 | } 1265 | } 1266 | 1267 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_bef}{tot}})[$con_err_num-1] ; 1268 | 1269 | foreach $con_pos (keys %{$counts{con_pos_bef}{tot}}) 1270 | { 1271 | if ($counts{con_pos_bef}{tot}{$con_pos} < $thresh) 1272 | { 1273 | delete $counts{con_pos_bef}{tot}{$con_pos} ; 1274 | next ; 1275 | } 1276 | if (length($con_pos) > $max_con_pos_len) 1277 | { 1278 | $max_con_pos_len = length($con_pos) ; 1279 | } 1280 | } 1281 | 1282 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_aft}{tot}})[$con_err_num-1] ; 1283 | 1284 | foreach $con_pos (keys %{$counts{con_pos_aft}{tot}}) 1285 | { 1286 | if ($counts{con_pos_aft}{tot}{$con_pos} < $thresh) 1287 | { 1288 | delete $counts{con_pos_aft}{tot}{$con_pos} ; 1289 | next ; 1290 | } 1291 | if (length($con_pos) > $max_con_pos_len) 1292 | { 1293 | $max_con_pos_len = length($con_pos) ; 1294 | } 1295 | } 1296 | 1297 | $thresh = (sort {$b <=> $a} values %{$counts{con_pos_aft_2}{tot}})[$con_err_num-1] ; 1298 | 1299 | foreach $con_pos (keys %{$counts{con_pos_aft_2}{tot}}) 1300 | { 1301 | if ($counts{con_pos_aft_2}{tot}{$con_pos} < $thresh) 1302 | { 1303 | delete $counts{con_pos_aft_2}{tot}{$con_pos} ; 1304 | next ; 1305 | } 1306 | if (length($con_pos) > $max_con_pos_len) 1307 | { 1308 | $max_con_pos_len = length($con_pos) ; 1309 | } 1310 | } 1311 | 1312 | # printing 1313 | 1314 | # ------------- focus words 1315 | 1316 | printf OUT "\n\n" ; 1317 | printf OUT " %d focus words where most of the errors occur:\n\n", scalar keys %{$counts{word}{err_any}} ; 1318 | 1319 | printf OUT " %-*s | %-4s | %-4s | %-4s | %-4s\n", $max_word_len, ' ', 'any', 'head', 'dep', 'both' ; 1320 | printf OUT " %s-+------+------+------+------\n", '-' x $max_word_len; 1321 | 1322 | foreach $word (sort {$counts{word}{err_any}{$b} <=> $counts{word}{err_any}{$a}} keys %{$counts{word}{err_any}}) 1323 | { 1324 | if (!defined($counts{word}{err_head}{$word})) 1325 | { 1326 | $counts{word}{err_head}{$word} = 0 ; 1327 | } 1328 | if (! defined($counts{word}{err_dep}{$word})) 1329 | { 1330 | $counts{word}{err_dep}{$word} = 0 ; 1331 | } 1332 | if (! defined($counts{word}{err_any}{$word})) 1333 | { 1334 | $counts{word}{err_any}{$word} = 0; 1335 | } 1336 | printf OUT " %-*s | %4d | %4d | %4d | %4d\n", 1337 | $max_word_len+length($word)-uni_len($word), $word, $counts{word}{err_any}{$word}, 1338 | $counts{word}{err_head}{$word}, 1339 | $counts{word}{err_dep}{$word}, 1340 | $counts{word}{err_dep}{$word}+$counts{word}{err_head}{$word}-$counts{word}{err_any}{$word} ; 1341 | } 1342 | 1343 | printf OUT " %s-+------+------+------+------\n", '-' x $max_word_len; 1344 | 1345 | # ------------- contexts 1346 | 1347 | printf OUT "\n\n" ; 1348 | 1349 | printf OUT " one-token preceeding contexts where most of the errors occur:\n\n" ; 1350 | 1351 | print_context($counts{con_bef}, $counts{con_pos_bef}, $max_con_len, $max_con_pos_len) ; 1352 | 1353 | printf OUT " two-token preceeding contexts where most of the errors occur:\n\n" ; 1354 | 1355 | print_context($counts{con_bef_2}, $counts{con_pos_bef_2}, $max_con_len, $max_con_pos_len) ; 1356 | 1357 | printf OUT " one-token following contexts where most of the errors occur:\n\n" ; 1358 | 1359 | print_context($counts{con_aft}, $counts{con_pos_aft}, $max_con_len, $max_con_pos_len) ; 1360 | 1361 | printf OUT " two-token following contexts where most of the errors occur:\n\n" ; 1362 | 1363 | print_context($counts{con_aft_2}, $counts{con_pos_aft_2}, $max_con_len, $max_con_pos_len) ; 1364 | 1365 | # ------------- Sentences 1366 | 1367 | printf OUT " Sentence with the highest number of word errors:\n" ; 1368 | $i = (sort { (defined($err_sent[$b]{word}) && $err_sent[$b]{word}) 1369 | <=> (defined($err_sent[$a]{word}) && $err_sent[$a]{word}) } 1 .. $sent_num)[0] ; 1370 | printf OUT " Sentence %d line %d, ", $i, $starts[$i-1] ; 1371 | printf OUT "%d head errors, %d dependency errors, %d word errors\n", 1372 | $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ; 1373 | 1374 | printf OUT "\n\n" ; 1375 | 1376 | printf OUT " Sentence with the highest number of head errors:\n" ; 1377 | $i = (sort { (defined($err_sent[$b]{head}) && $err_sent[$b]{head}) 1378 | <=> (defined($err_sent[$a]{head}) && $err_sent[$a]{head}) } 1 .. $sent_num)[0] ; 1379 | printf OUT " Sentence %d line %d, ", $i, $starts[$i-1] ; 1380 | printf OUT "%d head errors, %d dependency errors, %d word errors\n", 1381 | $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ; 1382 | 1383 | printf OUT "\n\n" ; 1384 | 1385 | printf OUT " Sentence with the highest number of dependency errors:\n" ; 1386 | $i = (sort { (defined($err_sent[$b]{dep}) && $err_sent[$b]{dep}) 1387 | <=> (defined($err_sent[$a]{dep}) && $err_sent[$a]{dep}) } 1 .. $sent_num)[0] ; 1388 | printf OUT " Sentence %d line %d, ", $i, $starts[$i-1] ; 1389 | printf OUT "%d head errors, %d dependency errors, %d word errors\n", 1390 | $err_sent[$i]{head}, $err_sent[$i]{dep}, $err_sent[$i]{word} ; 1391 | 1392 | # 1393 | # Second pass, collect statistics of the frequent errors 1394 | # 1395 | 1396 | # filter the errors, leave the most frequent $freq_err_num errors 1397 | 1398 | $i = 0 ; 1399 | 1400 | $thresh = (sort {$b <=> $a} values %freq_err)[$freq_err_num-1] ; 1401 | 1402 | foreach $err (keys %freq_err) 1403 | { 1404 | if ($freq_err{$err} < $thresh) 1405 | { 1406 | delete $freq_err{$err} ; 1407 | } 1408 | } 1409 | 1410 | # in case there are several errors with the threshold count 1411 | 1412 | $freq_err_num = scalar keys %freq_err ; 1413 | 1414 | %err_counts = () ; 1415 | 1416 | $eof = 0 ; 1417 | 1418 | seek (GOLD, 0, 0) ; 1419 | seek (SYS, 0, 0) ; 1420 | 1421 | while (! $eof) 1422 | { # second reading loop 1423 | 1424 | $eof = read_sent(\@sent_gold, \@sent_sys) ; 1425 | $sent_num++ ; 1426 | 1427 | $word_num = scalar @sent_gold ; 1428 | 1429 | # printf "$sent_num $word_num\n" ; 1430 | 1431 | foreach $i_w (0 .. $word_num-1) 1432 | { # loop on words 1433 | ($word, $pos, $head_g, $dep_g) 1434 | = @{$sent_gold[$i_w]}{'word', 'pos', 'head', 'dep'} ; 1435 | 1436 | # printf "%d: %s %s %s %s\n", $i_w, $word, $pos, $head_g, $dep_g ; 1437 | 1438 | if ((! $score_on_punct) && is_uni_punct($word)) 1439 | { 1440 | # ignore punctuations 1441 | next ; 1442 | } 1443 | 1444 | ($head_s, $dep_s) = @{$sent_sys[$i_w]}{'head', 'dep'} ; 1445 | 1446 | $err_head = ($head_g ne $head_s) ; 1447 | $err_dep = ($dep_g ne $dep_s) ; 1448 | 1449 | $head_err = '-' ; 1450 | $dep_err = '-' ; 1451 | 1452 | if ($head_g eq '0') 1453 | { 1454 | $head_aft_bef_g = '0' ; 1455 | } 1456 | elsif ($head_g eq $i_w+1) 1457 | { 1458 | $head_aft_bef_g = 'e' ; 1459 | } 1460 | else 1461 | { 1462 | $head_aft_bef_g = ($head_g <= $i_w+1 ? 'b' : 'a') ; 1463 | } 1464 | 1465 | if ($head_s eq '0') 1466 | { 1467 | $head_aft_bef_s = '0' ; 1468 | } 1469 | elsif ($head_s eq $i_w+1) 1470 | { 1471 | $head_aft_bef_s = 'e' ; 1472 | } 1473 | else 1474 | { 1475 | $head_aft_bef_s = ($head_s <= $i_w+1 ? 'b' : 'a') ; 1476 | } 1477 | 1478 | $head_aft_bef = $head_aft_bef_g.$head_aft_bef_s ; 1479 | 1480 | if ($err_head) 1481 | { 1482 | if ($head_aft_bef_s eq '0') 1483 | { 1484 | $head_err = 0 ; 1485 | } 1486 | else 1487 | { 1488 | $head_err = $head_s-$head_g ; 1489 | } 1490 | } 1491 | 1492 | if ($err_dep) 1493 | { 1494 | $dep_err = $dep_g.'->'.$dep_s ; 1495 | } 1496 | 1497 | if (! ($err_head || $err_dep)) 1498 | { 1499 | next ; 1500 | } 1501 | 1502 | # handle only the most frequent errors 1503 | 1504 | $err = $head_err.$sep.$head_aft_bef.$sep.$dep_err ; 1505 | 1506 | if (! exists $freq_err{$err}) 1507 | { 1508 | next ; 1509 | } 1510 | 1511 | ($w_2, $w_1, $w1, $w2, $p_2, $p_1, $p1, $p2) = get_context(\@sent_gold, $i_w) ; 1512 | 1513 | $con_bef = $w_1 ; 1514 | $con_bef_2 = $w_2.' + '.$w_1 ; 1515 | $con_aft = $w1 ; 1516 | $con_aft_2 = $w1.' + '.$w2 ; 1517 | 1518 | $con_pos_bef = $p_1 ; 1519 | $con_pos_bef_2 = $p_2.'+'.$p_1 ; 1520 | $con_pos_aft = $p1 ; 1521 | $con_pos_aft_2 = $p1.'+'.$p2 ; 1522 | 1523 | @cur_err = ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) ; 1524 | 1525 | # printf "# %-25s %-15s %-10s %-25s %-3s %-30s\n", 1526 | # $con_bef, $word, $pos, $con_aft, $head_err, $dep_err ; 1527 | 1528 | @bits = (0, 0, 0, 0, 0, 0) ; 1529 | $j = 0 ; 1530 | 1531 | while ($j == 0) 1532 | { 1533 | for ($i = 0; $i <= $#bits; $i++) 1534 | { 1535 | if ($bits[$i] == 0) 1536 | { 1537 | $bits[$i] = 1 ; 1538 | $j = 0 ; 1539 | last ; 1540 | } 1541 | else 1542 | { 1543 | $bits[$i] = 0 ; 1544 | $j = 1 ; 1545 | } 1546 | } 1547 | 1548 | @e_bits = @cur_err ; 1549 | 1550 | for ($i = 0; $i <= $#bits; $i++) 1551 | { 1552 | if (! $bits[$i]) 1553 | { 1554 | $e_bits[$i] = '*' ; 1555 | } 1556 | } 1557 | 1558 | # include also the last case which is the most general 1559 | # (wildcards for everything) 1560 | $err_counts{$err}{join($sep, @e_bits)}++ ; 1561 | 1562 | } 1563 | 1564 | } # loop on words 1565 | } # second reading loop 1566 | 1567 | printf OUT "\n\n" ; 1568 | printf OUT " Specific errors, %d most frequent errors:", $freq_err_num ; 1569 | printf OUT "\n %s\n", '=' x 41 ; 1570 | 1571 | 1572 | # deleting local contexts which are too general 1573 | 1574 | foreach $err (keys %err_counts) 1575 | { 1576 | foreach $loc_con (sort {$err_counts{$err}{$b} <=> $err_counts{$err}{$a}} 1577 | keys %{$err_counts{$err}}) 1578 | { 1579 | @cur_err = split(/\Q$sep\E/, $loc_con) ; 1580 | 1581 | # In this loop, one or two elements of the local context are 1582 | # replaced with '*' to make it more general. If the entry for 1583 | # the general context has the same count it is removed. 1584 | 1585 | foreach $i (0 .. $#cur_err) 1586 | { 1587 | $w1 = $cur_err[$i] ; 1588 | if ($cur_err[$i] eq '*') 1589 | { 1590 | next ; 1591 | } 1592 | $cur_err[$i] = '*' ; 1593 | $con1 = join($sep, @cur_err) ; 1594 | if ( defined($err_counts{$err}{$con1}) && defined($err_counts{$err}{$loc_con}) 1595 | && ($err_counts{$err}{$con1} == $err_counts{$err}{$loc_con})) 1596 | { 1597 | delete $err_counts{$err}{$con1} ; 1598 | } 1599 | for ($j = $i+1; $j <=$#cur_err; $j++) 1600 | { 1601 | if ($cur_err[$j] eq '*') 1602 | { 1603 | next ; 1604 | } 1605 | $w2 = $cur_err[$j] ; 1606 | $cur_err[$j] = '*' ; 1607 | $con1 = join($sep, @cur_err) ; 1608 | if ( defined($err_counts{$err}{$con1}) && defined($err_counts{$err}{$loc_con}) 1609 | && ($err_counts{$err}{$con1} == $err_counts{$err}{$loc_con})) 1610 | { 1611 | delete $err_counts{$err}{$con1} ; 1612 | } 1613 | $cur_err[$j] = $w2 ; 1614 | } 1615 | $cur_err[$i] = $w1 ; 1616 | } 1617 | } 1618 | } 1619 | 1620 | # Leaving only the topmost local contexts for each error 1621 | 1622 | foreach $err (keys %err_counts) 1623 | { 1624 | $thresh = (sort {$b <=> $a} values %{$err_counts{$err}})[$spec_err_loc_con-1] || 0 ; 1625 | 1626 | # of the threshold is too low, take the 2nd highest count 1627 | # (the highest may be the total which is the generic case 1628 | # and not relevant for printing) 1629 | 1630 | if ($thresh < 5) 1631 | { 1632 | $thresh = (sort {$b <=> $a} values %{$err_counts{$err}})[1] ; 1633 | } 1634 | 1635 | foreach $loc_con (keys %{$err_counts{$err}}) 1636 | { 1637 | if ($err_counts{$err}{$loc_con} < $thresh) 1638 | { 1639 | delete $err_counts{$err}{$loc_con} ; 1640 | } 1641 | else 1642 | { 1643 | if ($loc_con ne join($sep, ('*', '*', '*', '*', '*', '*'))) 1644 | { 1645 | $loc_con_err_counts{$loc_con}{$err} = $err_counts{$err}{$loc_con} ; 1646 | } 1647 | } 1648 | } 1649 | } 1650 | 1651 | # printing an error summary 1652 | 1653 | # calculating the context field length 1654 | 1655 | $max_word_spec_len= length('word') ; 1656 | $max_con_aft_len = length('word') ; 1657 | $max_con_bef_len = length('word') ; 1658 | $max_con_pos_len = length('CPOS') ; 1659 | 1660 | foreach $err (keys %err_counts) 1661 | { 1662 | foreach $loc_con (sort keys %{$err_counts{$err}}) 1663 | { 1664 | ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) = 1665 | split(/\Q$sep\E/, $loc_con) ; 1666 | 1667 | $l = uni_len($word) ; 1668 | if ($l > $max_word_spec_len) 1669 | { 1670 | $max_word_spec_len = $l ; 1671 | } 1672 | 1673 | $l = uni_len($con_bef) ; 1674 | if ($l > $max_con_bef_len) 1675 | { 1676 | $max_con_bef_len = $l ; 1677 | } 1678 | 1679 | $l = uni_len($con_aft) ; 1680 | if ($l > $max_con_aft_len) 1681 | { 1682 | $max_con_aft_len = $l ; 1683 | } 1684 | 1685 | if (length($con_pos_aft) > $max_con_pos_len) 1686 | { 1687 | $max_con_pos_len = length($con_pos_aft) ; 1688 | } 1689 | 1690 | if (length($con_pos_bef) > $max_con_pos_len) 1691 | { 1692 | $max_con_pos_len = length($con_pos_bef) ; 1693 | } 1694 | } 1695 | } 1696 | 1697 | $err_counter = 0 ; 1698 | 1699 | foreach $err (sort {$freq_err{$b} <=> $freq_err{$a}} keys %freq_err) 1700 | { 1701 | 1702 | ($head_err, $head_aft_bef, $dep_err) = split(/\Q$sep\E/, $err) ; 1703 | 1704 | $err_counter++ ; 1705 | $err_desc{$err} = sprintf("%2d. ", $err_counter). 1706 | describe_err($head_err, $head_aft_bef, $dep_err) ; 1707 | 1708 | # printf OUT " %-3s %-30s %d\n", $head_err, $dep_err, $freq_err{$err} ; 1709 | printf OUT "\n" ; 1710 | printf OUT " %s : %d times\n", $err_desc{$err}, $freq_err{$err} ; 1711 | 1712 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n", 1713 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1714 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1715 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1716 | 1717 | printf OUT " %-*s | %-*s | %-*s | %s\n", 1718 | $max_con_pos_len+$max_con_bef_len+3, ' Before', 1719 | $max_word_spec_len+$max_pos_len+3, ' Focus', 1720 | $max_con_pos_len+$max_con_aft_len+3, ' After', 1721 | 'Count' ; 1722 | 1723 | printf OUT " %-*s %-*s | %-*s %-*s | %-*s %-*s |\n", 1724 | $max_con_pos_len, 'CPOS', $max_con_bef_len, 'word', 1725 | $max_pos_len, 'CPOS', $max_word_spec_len, 'word', 1726 | $max_con_pos_len, 'CPOS', $max_con_aft_len, 'word' ; 1727 | 1728 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n", 1729 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1730 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1731 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1732 | 1733 | foreach $loc_con (sort {$err_counts{$err}{$b} <=> $err_counts{$err}{$a}} 1734 | keys %{$err_counts{$err}}) 1735 | { 1736 | if ($loc_con eq join($sep, ('*', '*', '*', '*', '*', '*'))) 1737 | { 1738 | next ; 1739 | } 1740 | 1741 | $con1 = $loc_con ; 1742 | $con1 =~ s/\*/ /g ; 1743 | 1744 | ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) = 1745 | split(/\Q$sep\E/, $con1) ; 1746 | 1747 | printf OUT " %-*s | %-*s | %-*s | %-*s | %-*s | %-*s | %3d\n", 1748 | $max_con_pos_len, $con_pos_bef, $max_con_bef_len+length($con_bef)-uni_len($con_bef), $con_bef, 1749 | $max_pos_len, $pos, $max_word_spec_len+length($word)-uni_len($word), $word, 1750 | $max_con_pos_len, $con_pos_aft, $max_con_aft_len+length($con_aft)-uni_len($con_aft), $con_aft, 1751 | $err_counts{$err}{$loc_con} ; 1752 | } 1753 | 1754 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-+------\n", 1755 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1756 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1757 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1758 | 1759 | } 1760 | 1761 | printf OUT "\n\n" ; 1762 | printf OUT " Local contexts involved in several frequent errors:" ; 1763 | printf OUT "\n %s\n", '=' x 51 ; 1764 | printf OUT "\n\n" ; 1765 | 1766 | foreach $loc_con (sort {scalar keys %{$loc_con_err_counts{$b}} <=> 1767 | scalar keys %{$loc_con_err_counts{$a}}} 1768 | keys %loc_con_err_counts) 1769 | { 1770 | 1771 | if (scalar keys %{$loc_con_err_counts{$loc_con}} == 1) 1772 | { 1773 | next ; 1774 | } 1775 | 1776 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n", 1777 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1778 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1779 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1780 | 1781 | printf OUT " %-*s | %-*s | %-*s \n", 1782 | $max_con_pos_len+$max_con_bef_len+3, ' Before', 1783 | $max_word_spec_len+$max_pos_len+3, ' Focus', 1784 | $max_con_pos_len+$max_con_aft_len+3, ' After' ; 1785 | 1786 | printf OUT " %-*s %-*s | %-*s %-*s | %-*s %-*s \n", 1787 | $max_con_pos_len, 'CPOS', $max_con_bef_len, 'word', 1788 | $max_pos_len, 'CPOS', $max_word_spec_len, 'word', 1789 | $max_con_pos_len, 'CPOS', $max_con_aft_len, 'word' ; 1790 | 1791 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n", 1792 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1793 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1794 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1795 | 1796 | $con1 = $loc_con ; 1797 | $con1 =~ s/\*/ /g ; 1798 | 1799 | ($con_pos_bef, $con_bef, $word, $pos, $con_pos_aft, $con_aft) = 1800 | split(/\Q$sep\E/, $con1) ; 1801 | 1802 | printf OUT " %-*s | %-*s | %-*s | %-*s | %-*s | %-*s \n", 1803 | $max_con_pos_len, $con_pos_bef, $max_con_bef_len+length($con_bef)-uni_len($con_bef), $con_bef, 1804 | $max_pos_len, $pos, $max_word_spec_len+length($word)-uni_len($word), $word, 1805 | $max_con_pos_len, $con_pos_aft, $max_con_aft_len+length($con_aft)-uni_len($con_aft), $con_aft ; 1806 | 1807 | printf OUT " %s-+-%s-+-%s-+-%s-+-%s-+-%s-\n", 1808 | '-' x $max_con_pos_len, '-' x $max_con_bef_len, 1809 | '-' x $max_pos_len, '-' x $max_word_spec_len, 1810 | '-' x $max_con_pos_len, '-' x $max_con_aft_len ; 1811 | 1812 | foreach $err (sort {$loc_con_err_counts{$loc_con}{$b} <=> 1813 | $loc_con_err_counts{$loc_con}{$a}} 1814 | keys %{$loc_con_err_counts{$loc_con}}) 1815 | { 1816 | printf OUT " %s : %d times\n", $err_desc{$err}, 1817 | $loc_con_err_counts{$loc_con}{$err} ; 1818 | } 1819 | 1820 | printf OUT "\n" ; 1821 | } 1822 | 1823 | close GOLD ; 1824 | close SYS ; 1825 | 1826 | close OUT ; 1827 | --------------------------------------------------------------------------------