├── README.md ├── __init__.py ├── __pycache__ ├── __init__.cpython-36.pyc ├── alignments.cpython-36.pyc ├── amr.cpython-36.pyc ├── amrdata.cpython-36.pyc ├── utils.cpython-36.pyc └── utils.cpython-37.pyc ├── alignments.py ├── amr.py ├── amrdata.py ├── evaluation.sh ├── extract_np.py ├── preprocessing_np.sh ├── scores.py ├── smatch ├── LICENSE.txt ├── README.md ├── __init__.py ├── __init__.pyc ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── __init__.cpython-37.pyc │ ├── amr.cpython-36.pyc │ ├── amr.cpython-37.pyc │ ├── smatch_fromlists.cpython-36.pyc │ └── smatch_fromlists.cpython-37.pyc ├── amr.py ├── amr.pyc ├── sample_file_list ├── setup.py ├── smatch-table.py ├── smatch.py ├── smatch_fromlists.py ├── smatch_fromlists.pyc ├── smatch_tool_guideline.txt ├── test_input1.txt ├── test_input2.txt └── update_log ├── smatch_old ├── .filt ├── .output_jamr.txt.swp ├── .smatch.py.swp ├── .smatch_fromlists.py.swp ├── LICENSE.txt ├── README.txt ├── __init__.py ├── __init__.pyc ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── __init__.cpython-37.pyc │ ├── amr.cpython-36.pyc │ ├── amr.cpython-37.pyc │ ├── amr_edited.cpython-36.pyc │ ├── smatch_fromlists.cpython-36.pyc │ └── smatch_fromlists.cpython-37.pyc ├── amr.py ├── amr.pyc ├── amr_edited.py ├── amr_edited.pyc ├── sample_file_list ├── smatch-table.py ├── smatch.py ├── smatch_edited.py ├── smatch_edited.pyc ├── smatch_fromannot.py ├── smatch_fromannot.pyc ├── smatch_fromlists.py ├── smatch_fromlists.pyc ├── smatch_fromsubgraphs.py ├── smatch_fromsubgraphs.pyc └── update_log ├── test_input1.txt ├── test_input2.txt ├── utils.py └── utils.pyc /README.md: -------------------------------------------------------------------------------- 1 | # amr-evaluation 2 | 3 | Evaluation metrics to compare AMR graphs based on Smatch (http://amr.isi.edu/evaluation.html). The script computes a set of metrics between AMR graphs in addition to the traditional Smatch code: 4 | 5 | * Unlabeled: Smatch score computed on the predicted graphs after removing all edge labels 6 | * No WSD. Smatch score while ignoring Propbank senses (e.g., duck-01 vs duck-02) 7 | * Named Ent. F-score on the named entity recognition (:name roles) 8 | * Wikification. F-score on the wikification (:wiki roles) 9 | * Negations. F-score on the negation detection (:polarity roles) 10 | * Concepts. F-score on the concept identification task 11 | * Reentrancy. Smatch computed on reentrant edges only 12 | * SRL. Smatch computed on :ARG-i roles only 13 | 14 | The different metrics were introduced in the paper below, which also uses them to evaluate several AMR parsers: 15 | 16 | "An Incremental Parser for Abstract Meaning Representation", Marco Damonte, Shay B. Cohen and Giorgio Satta. In arXiv:1608.06111 (2016). URL: https://arxiv.org/abs/1608.06111 17 | 18 | **Usage:** ```./evaluation.sh ```, 19 | where and are two files which contain multiple AMRs. A blank line is used to separate two AMRs (same format required by Smatch). 20 | 21 | In the paper we also discuss a metric for noun phrase analysis. To compute this metric: 22 | 23 | - ```./preprocessing.sh ``` and ```python extract_np.py ``` to extract the noun phrases from your gold dataset. This will create two files: ```np_sents.txt``` and ```np_graphs.txt```. 24 | - Parse ```np_sents.txt``` with the AMR parser and evaluate with Smatch ```python smatch/smatch.py --pr -f np_graphs.txt``` 25 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from . import * 2 | -------------------------------------------------------------------------------- /__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/alignments.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/__pycache__/alignments.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/amr.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/__pycache__/amr.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/amrdata.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/__pycache__/amrdata.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /alignments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding=utf-8 3 | 4 | ''' 5 | Definition of Alignments class. For each sentence, computes the list of node variables that are aligned 6 | to each index in the sentence, assuming alignments in the format returned by JAMR 7 | 8 | @author: Marco Damonte (m.damonte@sms.ed.ac.uk) 9 | @since: 03-10-16 10 | ''' 11 | 12 | import amr as amr_annot 13 | from collections import defaultdict 14 | 15 | class Alignments: 16 | 17 | def _traverse(self, parsed_amr, amr): 18 | triples = parsed_amr.get_triples3() 19 | triples2 = [] 20 | root = None 21 | for i in range (0, len(triples)): 22 | rel = triples[i] 23 | if rel[1] == "TOP": 24 | triples2.append(("TOP",":top",rel[0])) 25 | root = rel[0] 26 | elif rel not in [r for r in parsed_amr.reent if r[2] in parsed_amr.nodes]: 27 | triples2.append((rel[0],":" + rel[1],rel[2])) 28 | indexes = {} 29 | queue = [] 30 | visited = [] 31 | queue.append((root, "0")) 32 | while len(queue) > 0: 33 | (node, prefix) = queue.pop(0) 34 | if node in visited: 35 | continue 36 | indexes[prefix] = node 37 | if node in parsed_amr.nodes: 38 | visited.append(node) 39 | children = [t for t in triples2 if str(t[0]) == node] 40 | i = 0 41 | for c in children: 42 | v = str(c[2]) 43 | queue.append((v, prefix + "." + str(i))) 44 | i += 1 45 | return indexes 46 | 47 | 48 | def __init__(self, alignments_filename, graphs): 49 | self.alignments = [] 50 | for g, line in zip(graphs,open(alignments_filename)): 51 | amr = g.strip() 52 | parsed_amr = amr_annot.AMR.parse_AMR_line(amr.replace("\n",""), False) 53 | line = line.strip() 54 | indexes = self._traverse(parsed_amr, amr) 55 | al = defaultdict(list) 56 | if line != "": 57 | for a in line.split(" "): 58 | if a.strip() == "": 59 | continue 60 | start = a.split("|")[0].split("-")[0] 61 | if start[0] == "*": 62 | start = start[1:] 63 | end = a.split("|")[0].split("-")[1] 64 | for i in range(int(start),int(end)): 65 | for segment in a.split("|")[1].split("+"): 66 | al[i].append(indexes[segment]) 67 | self.alignments.append(al) 68 | -------------------------------------------------------------------------------- /amr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | 4 | """ 5 | AMR (Abstract Meaning Representation) structure 6 | For detailed description of AMR, see http://www.isi.edu/natural-language/amr/a.pdf 7 | 8 | """ 9 | import collections 10 | from collections import defaultdict 11 | import sys 12 | 13 | # change this if needed 14 | ERROR_LOG = sys.stderr 15 | 16 | # change this if needed 17 | DEBUG_LOG = sys.stderr 18 | 19 | 20 | class AMR(object): 21 | """ 22 | AMR is a rooted, labeled graph to represent semantics. 23 | This class has the following members: 24 | nodes: list of node in the graph. Its ith element is the name of the ith node. For example, a node name 25 | could be "a1", "b", "g2", .etc 26 | node_values: list of node labels (values) of the graph. Its ith element is the value associated with node i in 27 | nodes list. In AMR, such value is usually a semantic concept (e.g. "boy", "want-01") 28 | root: root node name 29 | relations: list of edges connecting two nodes in the graph. Each entry is a link between two nodes, i.e. a triple 30 | . In AMR, such link denotes the relation between two semantic 31 | concepts. For example, "arg0" means that one of the concepts is the 0th argument of the other. 32 | attributes: list of edges connecting a node to an attribute name and its value. For example, if the polarity of 33 | some node is negative, there should be an edge connecting this node and "-". A triple < attribute name, 34 | node name, attribute value> is used to represent such attribute. It can also be viewed as a relation. 35 | 36 | """ 37 | def __init__(self, node_list=None, node_value_list=None, relation_list=None, attribute_list=None, reent=None, allrelations=None): 38 | """ 39 | node_list: names of nodes in AMR graph, e.g. "a11", "n" 40 | node_value_list: values of nodes in AMR graph, e.g. "group" for a node named "g" 41 | relation_list: list of relations between two nodes 42 | attribute_list: list of attributes (links between one node and one constant value) 43 | 44 | """ 45 | # initialize AMR graph nodes using list of nodes name 46 | # root, by default, is the first in var_list 47 | attribute_list2 = [] 48 | for dct in attribute_list: 49 | dct2 = collections.OrderedDict() 50 | for item in dct: 51 | if len(dct[item]) > 1 and dct[item].endswith("_"): 52 | dct[item] = '"' + dct[item][0:-1] + '"' 53 | dct2[item] = dct[item] 54 | attribute_list2.append(dct2) 55 | reent2 = [] 56 | for r in reent: 57 | if len(r[2]) > 1 and r[2].endswith("_"): 58 | reent2.append((r[0], r[1], '"' + r[2][0:-1] + '"')) 59 | else: 60 | reent2.append(r) 61 | allrelations2 = [] 62 | for r in allrelations: 63 | if len(r[2]) > 1 and r[2].endswith("_"): 64 | allrelations2.append((r[0], r[1], '"' + r[2][0:-1] + '"')) 65 | else: 66 | allrelations2.append(r) 67 | if node_list is None: 68 | self.nodes = [] 69 | self.root = None 70 | else: 71 | self.nodes = node_list[:] 72 | if len(node_list) != 0: 73 | self.root = node_list[0] 74 | else: 75 | self.root = None 76 | if node_value_list is None: 77 | self.node_values = [] 78 | else: 79 | self.node_values = node_value_list[:] 80 | if relation_list is None: 81 | self.relations = [] 82 | else: 83 | self.relations = relation_list[:] 84 | if attribute_list2 is None: 85 | self.attributes = [] 86 | else: 87 | self.attributes = attribute_list2[:] 88 | 89 | self.reent= reent2 90 | self.allrelations = allrelations2 91 | 92 | def rename_node(self, prefix): 93 | """ 94 | Rename AMR graph nodes to prefix + node_index to avoid nodes with the same name in two different AMRs. 95 | 96 | """ 97 | node_map_dict = {} 98 | # map each node to its new name (e.g. "a1") 99 | for i in range(0, len(self.nodes)): 100 | node_map_dict[self.nodes[i]] = prefix + str(i) 101 | # update node name 102 | for i, v in enumerate(self.nodes): 103 | self.nodes[i] = node_map_dict[v] 104 | # update node name in relations 105 | for i, d in enumerate(self.relations): 106 | new_dict = {} 107 | for k, v_lst in d.items(): 108 | for v in v_lst: 109 | if node_map_dict[k] not in new_dict: 110 | new_dict[node_map_dict[k]] = [v] 111 | else: 112 | new_dict[node_map_dict[k]].append(v) 113 | self.relations[i] = new_dict 114 | 115 | def get_triples(self): 116 | """ 117 | Get the triples in three lists. 118 | instance_triple: a triple representing an instance. E.g. instance(w, want-01) 119 | attribute triple: relation of attributes, e.g. polarity(w, - ) 120 | and relation triple, e.g. arg0 (w, b) 121 | 122 | """ 123 | instance_triple = [] 124 | relation_triple = [] 125 | attribute_triple = [] 126 | for i in range(len(self.nodes)): 127 | instance_triple.append(("instance", self.nodes[i], self.node_values[i])) 128 | # k is the other node this node has relation with 129 | # v is relation name 130 | for k, v_lst in self.relations[i].items(): 131 | for v in v_lst: 132 | relation_triple.append((v, self.nodes[i], k)) 133 | # k2 is the attribute name 134 | # v2 is the attribute value 135 | for k2, v2 in self.attributes[i].items(): 136 | attribute_triple.append((k2, self.nodes[i], v2)) 137 | return instance_triple, attribute_triple, relation_triple 138 | 139 | 140 | def get_triples2(self): 141 | """ 142 | Get the triples in two lists: 143 | instance_triple: a triple representing an instance. E.g. instance(w, want-01) 144 | relation_triple: a triple representing all relations. E.g arg0 (w, b) or E.g. polarity(w, - ) 145 | Note that we do not differentiate between attribute triple and relation triple. Both are considered as relation 146 | triples. 147 | All triples are represented by (triple_type, argument 1 of the triple, argument 2 of the triple) 148 | 149 | """ 150 | instance_triple = [] 151 | relation_triple = [] 152 | for i in range(len(self.nodes)): 153 | # an instance triple is instance(node name, node value). 154 | # For example, instance(b, boy). 155 | instance_triple.append(("instance", self.nodes[i], self.node_values[i])) 156 | # k is the other node this node has relation with 157 | # v is relation name 158 | for k, v_lst in self.relations[i].items(): 159 | for v in v_lst: 160 | relation_triple.append((v, self.nodes[i], k)) 161 | # k2 is the attribute name 162 | # v2 is the attribute value 163 | for k2, v2 in self.attributes[i].items(): 164 | relation_triple.append((k2, self.nodes[i], v2)) 165 | return instance_triple, relation_triple 166 | 167 | def get_triples3(self): 168 | relation_triple = [(self.nodes[0],"TOP",self.node_values[0])] 169 | relation_triple.extend(self.allrelations) 170 | return relation_triple 171 | 172 | def __str__(self): 173 | """ 174 | Generate AMR string for better readability 175 | 176 | """ 177 | lines = [] 178 | for i in range(len(self.nodes)): 179 | lines.append("Node "+ str(i) + " " + self.nodes[i]) 180 | lines.append("Value: " + self.node_values[i]) 181 | lines.append("Relations:") 182 | for k, v_lst in self.relations[i].items(): 183 | for v in v_lst: 184 | lines.append("Node " + k + " via " + v) 185 | for k2, v2 in self.attributes[i].items(): 186 | lines.append("Attribute: " + k2 + " value " + v2) 187 | return "\n".join(lines) 188 | 189 | def __repr__(self): 190 | return self.__str__() 191 | 192 | def output_amr(self): 193 | """ 194 | Output AMR string 195 | 196 | """ 197 | print >> DEBUG_LOG, self.__str__() 198 | 199 | 200 | @staticmethod 201 | def parse_AMR_line(line, normalize_inv = True): 202 | """ 203 | Parse a AMR from line representation to an AMR object. 204 | This parsing algorithm scans the line once and process each character, in a shift-reduce style. 205 | 206 | """ 207 | # Current state. It denotes the last significant symbol encountered. 1 for (, 2 for :, 3 for /, 208 | # and 0 for start state or ')' 209 | # Last significant symbol is ( --- start processing node name 210 | # Last significant symbol is : --- start processing relation name 211 | # Last significant symbol is / --- start processing node value (concept name) 212 | # Last significant symbol is ) --- current node processing is complete 213 | # Note that if these symbols are inside parenthesis, they are not significant symbols. 214 | state = 0 215 | stack = [] 216 | # current not-yet-reduced character sequence 217 | cur_charseq = [] 218 | # key: node name value: node value 219 | node_dict = collections.OrderedDict() 220 | # node name list (order: occurrence of the node) 221 | node_name_list = [] 222 | # key: node name: value: list of (relation name, the other node name) 223 | node_relation_dict1 = defaultdict(list) 224 | # key: node name, value: list of (attribute name, const value) or (relation name, unseen node name) 225 | node_relation_dict2 = defaultdict(list) 226 | allrelations = [] 227 | reent = [] 228 | # current relation name 229 | cur_relation_name = "" 230 | # having unmatched quote string 231 | in_quote = False 232 | for i, c in enumerate(line.strip()): 233 | if c == " ": 234 | # allow space in relation name 235 | if state == 2: 236 | cur_charseq.append(c) 237 | continue 238 | if c == "\"": 239 | # flip in_quote value when a quote symbol is encountered 240 | # insert placeholder if in_quote from last symbol 241 | if in_quote: 242 | cur_charseq.append('_') 243 | in_quote = not in_quote 244 | elif c == "(": 245 | # not significant symbol if inside quote 246 | if in_quote: 247 | cur_charseq.append(c) 248 | continue 249 | # get the attribute name 250 | # e.g :arg0 (x ... 251 | # at this point we get "arg0" 252 | if state == 2: 253 | # in this state, current relation name should be empty 254 | if cur_relation_name != "": 255 | print >> ERROR_LOG, "Format error when processing ", line[0:i+1] 256 | return None 257 | # update current relation name for future use 258 | cur_relation_name = "".join(cur_charseq).strip() 259 | cur_charseq[:] = [] 260 | state = 1 261 | elif c == ":": 262 | # not significant symbol if inside quote 263 | if in_quote: 264 | cur_charseq.append(c) 265 | continue 266 | # Last significant symbol is "/". Now we encounter ":" 267 | # Example: 268 | # :OR (o2 / *OR* 269 | # :mod (o3 / official) 270 | # gets node value "*OR*" at this point 271 | if state == 3: 272 | node_value = "".join(cur_charseq) 273 | # clear current char sequence 274 | cur_charseq[:] = [] 275 | # pop node name ("o2" in the above example) 276 | cur_node_name = stack[-1] 277 | # update node name/value map 278 | node_dict[cur_node_name] = node_value 279 | # Last significant symbol is ":". Now we encounter ":" 280 | # Example: 281 | # :op1 w :quant 30 282 | # or :day 14 :month 3 283 | # the problem is that we cannot decide if node value is attribute value (constant) 284 | # or node value (variable) at this moment 285 | elif state == 2: 286 | temp_attr_value = "".join(cur_charseq) 287 | cur_charseq[:] = [] 288 | parts = temp_attr_value.split() 289 | if len(parts) < 2: 290 | print >> ERROR_LOG, "Error in processing; part len < 2", line[0:i+1] 291 | return None 292 | # For the above example, node name is "op1", and node value is "w" 293 | # Note that this node name might not be encountered before 294 | relation_name = parts[0].strip() 295 | relation_value = parts[1].strip() 296 | # We need to link upper level node to the current 297 | # top of stack is upper level node 298 | if len(stack) == 0: 299 | print >> ERROR_LOG, "Error in processing", line[:i], relation_name, relation_value 300 | return None 301 | # if we have not seen this node name before 302 | if relation_name.endswith("-of") and normalize_inv: 303 | allrelations.append((relation_value,relation_name[:-3],stack[-1])) 304 | if relation_value not in node_dict: 305 | node_relation_dict2[relation_value].append((relation_name[:-3], stack[-1])) 306 | else: 307 | node_relation_dict1[relation_value].append((relation_name[:-3], stack[-1])) 308 | reent.append((relation_value,relation_name[:-3],stack[-1])) 309 | else: 310 | allrelations.append((stack[-1],relation_name,relation_value)) 311 | if relation_value not in node_dict: 312 | node_relation_dict2[stack[-1]].append((relation_name, relation_value)) 313 | else: 314 | node_relation_dict1[stack[-1]].append((relation_name, relation_value)) 315 | reent.append((stack[-1],relation_name,relation_value)) 316 | state = 2 317 | elif c == "/": 318 | if in_quote: 319 | cur_charseq.append(c) 320 | continue 321 | # Last significant symbol is "(". Now we encounter "/" 322 | # Example: 323 | # (d / default-01 324 | # get "d" here 325 | if state == 1: 326 | node_name = "".join(cur_charseq) 327 | cur_charseq[:] = [] 328 | # if this node name is already in node_dict, it is duplicate 329 | if node_name in node_dict: 330 | print >> ERROR_LOG, "Duplicate node name ", node_name, " in parsing AMR" 331 | return None 332 | # push the node name to stack 333 | stack.append(node_name) 334 | # add it to node name list 335 | node_name_list.append(node_name) 336 | # if this node is part of the relation 337 | # Example: 338 | # :arg1 (n / nation) 339 | # cur_relation_name is arg1 340 | # node name is n 341 | # we have a relation arg1(upper level node, n) 342 | if cur_relation_name != "": 343 | # if relation name ends with "-of", e.g."arg0-of", 344 | # it is reverse of some relation. For example, if a is "arg0-of" b, 345 | # we can also say b is "arg0" a. 346 | # If the relation name ends with "-of", we store the reverse relation. 347 | if (not cur_relation_name.endswith("-of")) or normalize_inv == False: 348 | # stack[-2] is upper_level node we encountered, as we just add node_name to stack 349 | node_relation_dict1[stack[-2]].append((cur_relation_name, node_name)) 350 | allrelations.append((stack[-2],cur_relation_name, node_name)) 351 | #if node_name in node_name_list: 352 | # reent.append((stack[-2],cur_relation_name,node_name)) 353 | else: 354 | # cur_relation_name[:-3] is to delete "-of" 355 | node_relation_dict1[node_name].append((cur_relation_name[:-3], stack[-2])) 356 | allrelations.append((node_name,cur_relation_name[:-3], stack[-2])) 357 | # clear current_relation_name 358 | cur_relation_name = "" 359 | else: 360 | # error if in other state 361 | print >> ERROR_LOG, "Error in parsing AMR", line[0:i+1] 362 | return None 363 | state = 3 364 | elif c == ")": 365 | if in_quote: 366 | cur_charseq.append(c) 367 | continue 368 | # stack should be non-empty to find upper level node 369 | if len(stack) == 0: 370 | print >> ERROR_LOG, "Unmatched parenthesis at position", i, "in processing", line[0:i+1] 371 | return None 372 | # Last significant symbol is ":". Now we encounter ")" 373 | # Example: 374 | # :op2 "Brown") or :op2 w) 375 | # get \"Brown\" or w here 376 | if state == 2: 377 | temp_attr_value = "".join(cur_charseq) 378 | cur_charseq[:] = [] 379 | parts = temp_attr_value.split() 380 | if len(parts) < 2: 381 | print >> ERROR_LOG, "Error processing", line[:i+1], temp_attr_value 382 | return None 383 | relation_name = parts[0].strip() 384 | relation_value = parts[1].strip() 385 | # store reverse of the relation 386 | # we are sure relation_value is a node here, as "-of" relation is only between two nodes 387 | if relation_name.endswith("-of") and normalize_inv: 388 | allrelations.append((relation_value,relation_name[:-3], stack[-1])) 389 | node_relation_dict1[relation_value].append((relation_name[:-3], stack[-1])) 390 | # attribute value not seen before 391 | # Note that it might be a constant attribute value, or an unseen node 392 | # process this after we have seen all the node names 393 | else: 394 | allrelations.append((stack[-1],relation_name, relation_value)) 395 | if relation_value not in node_dict: 396 | node_relation_dict2[stack[-1]].append((relation_name, relation_value)) 397 | else: 398 | node_relation_dict1[stack[-1]].append((relation_name, relation_value)) 399 | #if relation_value in node_name_list: 400 | reent.append((stack[-1],relation_name,relation_value)) 401 | # Last significant symbol is "/". Now we encounter ")" 402 | # Example: 403 | # :arg1 (n / nation) 404 | # we get "nation" here 405 | elif state == 3: 406 | node_value = "".join(cur_charseq) 407 | cur_charseq[:] = [] 408 | cur_node_name = stack[-1] 409 | # map node name to its value 410 | node_dict[cur_node_name] = node_value 411 | #if node_value in node_name_list: 412 | # reent.append((stack[-1],relation_name,relation_value)) 413 | # pop from stack, as the current node has been processed 414 | stack.pop() 415 | cur_relation_name = "" 416 | state = 0 417 | else: 418 | # not significant symbols, so we just shift. 419 | cur_charseq.append(c) 420 | #create data structures to initialize an AMR 421 | node_value_list = [] 422 | relation_list = [] 423 | attribute_list = [] 424 | for v in node_name_list: 425 | if v not in node_dict: 426 | print >> ERROR_LOG, "Error: Node name not found", v 427 | return None 428 | else: 429 | node_value_list.append(node_dict[v]) 430 | # build relation map and attribute map for this node 431 | relation_dict = collections.OrderedDict() 432 | attribute_dict = collections.OrderedDict() 433 | if v in node_relation_dict1: 434 | for v1 in node_relation_dict1[v]: 435 | if v1[1] not in relation_dict: 436 | relation_dict[v1[1]] = [v1[0]] 437 | else: 438 | relation_dict[v1[1]].append(v1[0]) 439 | if v in node_relation_dict2: 440 | for v2 in node_relation_dict2[v]: 441 | # if value is in quote, it is a constant value 442 | # strip the quote and put it in attribute map 443 | if v2[1][0] == "\"" and v2[1][-1] == "\"": 444 | attribute_dict[v2[0]] = v2[1][1:-1] 445 | # if value is a node name 446 | elif v2[1] in node_dict: 447 | if v2[1] not in relation_dict: 448 | relation_dict[v2[1]] = [v2[0]] 449 | else: 450 | relation_dict[v2[1]].append(v2[0]) 451 | else: 452 | attribute_dict[v2[0]] = v2[1] 453 | # each node has a relation map and attribute map 454 | relation_list.append(relation_dict) 455 | attribute_list.append(attribute_dict) 456 | # add TOP as an attribute. The attribute value is the top node value 457 | attribute_list[0]["TOP"] = node_value_list[0] 458 | result_amr = AMR(node_name_list, node_value_list, relation_list, attribute_list, reent, allrelations) 459 | return result_amr 460 | 461 | # test AMR parsing 462 | # a unittest can also be used. 463 | if __name__ == "__main__": 464 | if len(sys.argv) < 2: 465 | print >> ERROR_LOG, "No file given" 466 | exit(1) 467 | amr_count = 1 468 | for line in open(sys.argv[1]): 469 | cur_line = line.strip() 470 | if cur_line == "" or cur_line.startswith("#"): 471 | continue 472 | print >> DEBUG_LOG, "AMR", amr_count 473 | current = AMR.parse_AMR_line(cur_line) 474 | current.output_amr() 475 | amr_count += 1 476 | -------------------------------------------------------------------------------- /amrdata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding=utf-8 3 | 4 | ''' 5 | AMRDataset reads the file generated by preprocessing.sh and it generates a AMRSentence instance for each sentence, 6 | containing all information necessary to the parser. 7 | @author: Marco Damonte (m.damonte@sms.ed.ac.uk) 8 | @since: 3-10-16 9 | ''' 10 | 11 | import re 12 | from alignments import Alignments as Alignments 13 | import sys 14 | import amr as amrannot 15 | 16 | class AMRSentence: 17 | def __init__(self, tokens, pos, lemmas, nes, dependencies, variables = None, relations = None, graph = None, alignments = None): 18 | self.tokens = tokens 19 | self.pos = pos 20 | self.lemmas = lemmas 21 | self.nes = nes 22 | self.dependencies = dependencies 23 | if variables is not None: 24 | self.variables = [(str(k),str(variables[k])) for k in variables] 25 | if relations is not None: 26 | self.relations = [r for r in relations if r[0] != r[2]] 27 | self.graph = graph 28 | self.alignments = alignments 29 | 30 | class AMRDataset: 31 | def _var2concept(self, amr): 32 | v2c = {} 33 | for n, v in zip(amr.nodes, amr.node_values): 34 | v2c[n] = v 35 | return v2c 36 | 37 | def __init__(self, prefix, amrs, normalize = True): 38 | self.normalize = normalize 39 | self.sentences = [] 40 | 41 | alltokens, allpos, alllemmas, allnes, alldepslines = self._loadFromFile(prefix + ".out") 42 | if amrs: 43 | allgraphs = open(prefix + ".graphs").read().split("\n\n") 44 | a = Alignments(prefix + ".alignments", allgraphs) 45 | allalignments = a.alignments 46 | 47 | for graph, alignments, depslines, tokens, pos, lemmas, nes in zip(allgraphs, allalignments, alldepslines, alltokens, allpos, alllemmas, allnes): 48 | graph = graph.strip() 49 | amr = amrannot.AMR.parse_AMR_line(graph.replace("\n",""), False) 50 | variables = {} 51 | for n, v in zip(amr.nodes, amr.node_values): 52 | variables[n] = v 53 | role_triples = amr.get_triples3() 54 | relations = [] 55 | for (var1,label,var2) in role_triples: 56 | if label == "TOP": 57 | relations.append(("TOP",":top",var1)) 58 | else: 59 | relations.append((str(var1),":" + str(label),str(var2))) 60 | dependencies = [] 61 | for line in depslines.split("\n"): 62 | pattern = "^(.+)$.+-([0-9]+), .+-([0-9]+)$" 63 | regex = re.match(pattern, line) 64 | if regex is not None: 65 | label = regex.group(1) 66 | a = int(regex.group(2)) - 1 67 | b = int(regex.group(3)) - 1 68 | if a == -1: 69 | dependencies.append((b, 'ROOT', b)) 70 | elif a != b: 71 | dependencies.append((a, label, b)) 72 | self.sentences.append(AMRSentence(tokens, pos, lemmas, nes, dependencies, variables, relations, graph, alignments)) 73 | else: 74 | for depslines, tokens, pos, lemmas, nes in zip(alldepslines, alltokens, allpos, alllemmas, allnes): 75 | dependencies = [] 76 | for line in depslines.split("\n"): 77 | pattern = "^(.+)$.+-([0-9]+), .+-([0-9]+)$" 78 | regex = re.match(pattern, line) 79 | if regex is not None: 80 | label = regex.group(1) 81 | a = int(regex.group(2)) - 1 82 | b = int(regex.group(3)) - 1 83 | if a == -1: 84 | dependencies.append((b, 'ROOT', b)) 85 | elif a != b: 86 | dependencies.append((a, label, b)) 87 | self.sentences.append(AMRSentence(tokens, pos, lemmas, nes, dependencies)) 88 | 89 | 90 | def getSent(self, index): 91 | return self.sentences[index] 92 | 93 | def getAllSents(self): 94 | return self.sentences 95 | 96 | def _loadFromFile(self, stanfordOutput, norm = True): 97 | alltokens = [] 98 | allpos = [] 99 | alllemmas = [] 100 | allnes = [] 101 | alldepslines = [] 102 | blocks = open(stanfordOutput, 'r').read().split("\n\n") 103 | while True: 104 | if len(blocks) == 1: 105 | break 106 | block = blocks.pop(0).strip().split("\n") 107 | tokens = [] 108 | lemmas = [] 109 | nes = [] 110 | pos = [] 111 | i = 2 112 | while block[i].startswith("[Text"): 113 | tokens.extend([t[5:-1] for t in re.findall('Text=[^\s]* ', block[i])]) 114 | pos.extend([t[13:-1] for t in re.findall('PartOfSpeech=[^\s]* ', block[i])]) 115 | lemmas.extend([t[6:-1] for t in re.findall('Lemma=[^\s]* ', block[i])]) 116 | nes.extend([t[15:] for t in re.findall('NamedEntityTag=[^\]]*', block[i])]) 117 | i += 1 118 | allpos.append(pos) 119 | if blocks[0].startswith("\n"): 120 | b = "" 121 | else: 122 | b = blocks.pop(0) 123 | depslines = b 124 | alltokens.append(tokens) 125 | alllemmas.append(lemmas) 126 | allnes.append(nes) 127 | alldepslines.append(depslines) 128 | continue # don't need this part of evaluation.... 129 | 130 | #very messy piece of code to handle corenlp normalization (for dates, currencies, etc) 131 | tokens2 = [] 132 | lemmas2 = [] 133 | nes2 = [] 134 | lastnorm = None 135 | for token, lemma, ne in zip(tokens, lemmas, nes): 136 | nesplit = ne.split() 137 | if len(nesplit) > 1: 138 | mne = re.match("^([a-zA-Z\%\>\<\$\~\=]*)([0-9\.]*.*)", nesplit[1][25:].encode('ascii', 'ignore')) 139 | else: 140 | mne = None 141 | 142 | if nesplit[0] == "DATE" and re.match("^(\d{4}|XXXX)(-\d{2})?(-\d{2})?$",nesplit[1][25:]) is not None: 143 | norm = nesplit[1][25:] 144 | lastnorm = norm 145 | tokens2.append(norm) 146 | lemmas2.append(norm) 147 | nes2.append(nesplit[0]) 148 | 149 | elif (nesplit[0] == "MONEY" or nesplit[0] == "PERCENT") and self.normalize and len(nesplit) == 2 and mne is not None: 150 | [name, norm] = nesplit 151 | curr = mne.groups()[0] 152 | norm = mne.groups()[1] 153 | curr = curr.replace("<","").replace(">","").replace("~","").replace("=","") 154 | if curr == "$": 155 | curr = "dollar" 156 | if curr == "": 157 | w = nesplit[1][25:].replace("<","").replace(">","").replace("~","").replace("=","") 158 | if w.startswith(u"\u00A5"): 159 | curr = "yen" 160 | elif w.startswith(u"\u5143"): 161 | curr = "yuan" 162 | elif w.startswith(u"\u00A3"): 163 | curr = "pound" 164 | elif w.startswith(u"\u20AC"): 165 | curr = "euro" 166 | else: 167 | curr = "NULL" 168 | 169 | m = re.match("([0-9\.][0-9\.]*)E([0-9][0-9]*)$",norm) 170 | if m is not None: 171 | n = m.groups()[0] 172 | z = "".join(["0"]*int(m.groups()[1])) 173 | norm = format(float(n)*float("1"+z), ".32f") 174 | norm = re.sub("\.00*$","",norm) 175 | if token.endswith(".0") == False: 176 | norm = re.sub("\.0$","",norm) 177 | if token.replace(",","").replace(".","").isdigit() == False and lastnorm is not None: 178 | norm = "," 179 | token = "," 180 | name = "O" 181 | lastnorm = norm 182 | if norm == ",": 183 | tokens2.append(norm) 184 | else: 185 | tokens2.append(norm + "_" + curr) 186 | lemmas2.append(token) 187 | nes2.append(name) 188 | elif self.normalize and len(nesplit) == 2 and re.match("^[0-9].*", nesplit[1][25:]) is not None: #numbers 189 | [name, norm] = nesplit 190 | norm = norm[25:] 191 | m = re.match("([0-9\.][0-9\.]*)E([0-9][0-9]*)$",norm) 192 | if m is not None: 193 | n = m.groups()[0] 194 | z = "".join(["0"]*int(m.groups()[1])) 195 | norm = str(float(n)*int("1"+z)) 196 | if token.endswith(".0") == False: 197 | norm = re.sub("\.0$","",norm) 198 | if token.replace(",","").replace(".","").isdigit() == False and lastnorm is not None: 199 | norm = "," 200 | token = "," 201 | name = "O" 202 | lastnorm = norm 203 | tokens2.append(norm) 204 | lemmas2.append(token) 205 | nes2.append(name) 206 | else: 207 | lastnorm = None 208 | tokens2.append(token) 209 | lemmas2.append(lemma) 210 | nes2.append(nesplit[0]) 211 | alltokens.append(tokens2) 212 | alllemmas.append(lemmas2) 213 | allnes.append(nes2) 214 | alldepslines.append(depslines) 215 | return (alltokens, allpos, alllemmas, allnes, alldepslines) 216 | -------------------------------------------------------------------------------- /evaluation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Evaluation script. Run as: ./evaluation.sh 4 | 5 | out=`python smatch/smatch.py --pr -f "$1" "$2"` 6 | pr=`echo $out | cut -d' ' -f2` 7 | rc=`echo $out | cut -d' ' -f4` 8 | fs=`echo $out | cut -d' ' -f6` 9 | echo 'Smatch -> P: '$pr', R: '$rc', F: '$fs 10 | 11 | sed 's/:[a-zA-Z0-9-]*/:label/g' "$1" > 1.tmp 12 | sed 's/:[a-zA-Z0-9-]*/:label/g' "$2" > 2.tmp 13 | out=`python smatch/smatch.py --pr -f 1.tmp 2.tmp` 14 | pr=`echo $out | cut -d' ' -f2` 15 | rc=`echo $out | cut -d' ' -f4` 16 | fs=`echo $out | cut -d' ' -f6` 17 | echo 'Unlabeled -> P: '$pr', R: '$rc', F: '$fs 18 | 19 | cat "$1" | perl -ne 's/(\/ [a-zA-Z0-9\-][a-zA-Z0-9\-]*)-[0-9][0-9]*/\1-01/g; print;' > 1.tmp 20 | cat "$2" | perl -ne 's/(\/ [a-zA-Z0-9\-][a-zA-Z0-9\-]*)-[0-9][0-9]*/\1-01/g; print;' > 2.tmp 21 | out=`python smatch/smatch.py --pr -f 1.tmp 2.tmp` 22 | pr=`echo $out | cut -d' ' -f2` 23 | rc=`echo $out | cut -d' ' -f4` 24 | fs=`echo $out | cut -d' ' -f6` 25 | echo 'No WSD -> -> P: '$pr', R: '$rc', F: '$fs 26 | 27 | cat "$1" | perl -ne 's/^#.*\n//g; print;' | tr '\t' ' ' | tr -s ' ' > 1.tmp 28 | cat "$2" | perl -ne 's/^#.*\n//g; print;' | tr '\t' ' ' | tr -s ' ' > 2.tmp 29 | python scores.py "1.tmp" "2.tmp" 30 | 31 | rm 1.tmp 32 | rm 2.tmp 33 | -------------------------------------------------------------------------------- /extract_np.py: -------------------------------------------------------------------------------- 1 | import smatch.amr as amr 2 | import sys 3 | import re 4 | from amrdata import * 5 | from collections import defaultdict 6 | import copy 7 | 8 | def _to_string(triples, root, level, last_child, seen, prefix, indexes): 9 | children = [t for t in triples if str(t[0]) == root.split()[0]] 10 | if root in seen: 11 | root = root.split()[0] 12 | children = [] 13 | else: 14 | var = root 15 | if " / " in root: 16 | var = root.split()[0] 17 | indexes[var].append(prefix) 18 | if " / " in root: 19 | seen.append(root) 20 | graph = "(" + root 21 | if len(children) > 0: 22 | graph += "\n" 23 | else: 24 | graph += ")" 25 | else: 26 | graph = root 27 | j = 0 28 | for k, t in enumerate(children): 29 | if str(t[0]) == root.split()[0]: 30 | next_r = t[3] 31 | if t[4] != "": 32 | next_r += " / " + t[4] 33 | for i in range(0, level): 34 | graph += " " 35 | seen2 = copy.deepcopy(seen) 36 | graph += t[2] + " " + _to_string(triples, next_r, level + 1, k == len(children) - 1, seen, prefix + "." + str(j), indexes)[0] 37 | if next_r not in seen2 or " / " not in next_r: 38 | j += 1 39 | if len(children) > 0: 40 | graph += ")" 41 | if not last_child: 42 | graph += "\n" 43 | 44 | return graph, indexes 45 | 46 | def to_string(triples, root): 47 | children = [t for t in triples if str(t[0]) == root] 48 | if len(children) > 1: 49 | counter = 1 50 | triples2 = [("TOP","",":top","mu","multi-sentence")] 51 | for t in triples: 52 | if t[0] == "TOP": 53 | triples2.append(("mu", "multi-sentence", ":snt" + str(counter), t[3], t[4])) 54 | counter += 1 55 | else: 56 | triples2.append(t) 57 | else: 58 | triples2 = triples 59 | children = [t for t in triples2 if str(t[0]) == root] 60 | assert(len(children) == 1) 61 | if children[0][4] == "": 62 | return "(e / emptygraph)\n", defaultdict(list) 63 | return _to_string(triples2, children[0][3] + " / " + children[0][4], 1, False, [], "0", defaultdict(list)) 64 | 65 | def var2concept(amr): 66 | v2c = {} 67 | for n, v in zip(amr.nodes, amr.node_values): 68 | v2c[n] = v 69 | return v2c 70 | 71 | def preprocess_constituency_tree(snt, syntax): 72 | for idx, word in enumerate(snt.split()): 73 | new_syntax = [] 74 | done = False 75 | for tok in syntax.split(): 76 | if not done and word == tok and not tok.startswith('<<'): 77 | new_syntax.append('<<' + str(idx) + '>>' + tok) 78 | done = True 79 | else: 80 | new_syntax.append(tok) 81 | syntax = ' '.join(new_syntax) 82 | return syntax 83 | 84 | def run(prefix): 85 | blocks = open(prefix + ".sentences.nopars.out").read().split("\n\n") 86 | nps = [] 87 | npstart = False 88 | par = 0 89 | k = -1 90 | sents = AMRDataset(prefix, True, False).getAllSents() 91 | famr = open("np_graphs.txt","w") 92 | fsent = open("np_sents.txt","w") 93 | while True: 94 | k += 1 95 | if len(blocks) == 1: 96 | break 97 | block_txt = blocks.pop(0).strip() 98 | block = block_txt.split("\n") 99 | const = "".join(block[3:]) 100 | if blocks[0].startswith("\n"): 101 | b = "" 102 | else: 103 | b = blocks.pop(0) 104 | 105 | snt = ' '.join(sents[k].tokens) 106 | snt = snt.replace('(', '') 107 | snt = snt.replace(')', '') 108 | 109 | syntax = " ".join(const.split(']')[-1].replace(')',' )').split()) 110 | syntax = preprocess_constituency_tree(snt, syntax) 111 | 112 | nps = [] 113 | nps_idxs = [] 114 | np_flag = False 115 | new_np = "" 116 | new_np_idxs = [] 117 | pars = 0 118 | 119 | # find all NPs 120 | for tok in syntax.split(): 121 | fields = tok.split('>>') 122 | if len(fields) > 1: 123 | i = tok.split('>>')[0][2:] 124 | tok = tok.split('>>')[1] 125 | else: 126 | i = -1 127 | if '(' in tok: 128 | pars += 1 129 | elif ')' in tok: 130 | pars -= 1 131 | if np_flag: 132 | if tok == ')' and pars == 0: 133 | np_flag = False 134 | new_np += tok 135 | new_np_idxs.append(i) 136 | nouns = [x for x in new_np.split() if x.startswith('(N')] 137 | if len(nouns) > 1: 138 | nps.append(re.sub(r'$[A-Z:\-\,\.\$\'\`][A-Z:\-\,\.\$\'\`]*|$', '', new_np).split()) 139 | nps_idxs.append(new_np_idxs[0:-1]) 140 | assert(len(nps[-1]) == len(nps_idxs[-1])) 141 | else: 142 | new_np += ' ' + tok 143 | if i != -1: 144 | new_np_idxs.append(i) 145 | else: 146 | if tok == '(NP': 147 | pars = 1 148 | np_flag = True 149 | new_np = tok 150 | new_np_idxs = [] 151 | 152 | # align NPs with tokens in text and write to file 153 | for n, i in zip(nps, nps_idxs): 154 | nodes = [] 155 | if n == []: 156 | continue 157 | a = int(i[0]) 158 | b = int(i[-1]) 159 | for index in range(a, b + 1): 160 | nodes.extend(sents[k].alignments[index]) 161 | if nodes == []: 162 | continue 163 | 164 | v2c = defaultdict(str) 165 | amr_annot = amr.AMR.parse_AMR_line(sents[k].graph.replace("\n","")) 166 | for key in var2concept(amr_annot): 167 | v2c[str(key)] = str(var2concept(amr_annot)[key]) 168 | 169 | rels = [r for r in sents[k].relations if r[0] in nodes and r[2] in nodes] 170 | rels2 = [(r[0], v2c[r[0]], r[1], r[2], v2c[r[2]]) for r in rels] 171 | if len(rels2) > 0: 172 | rels2.insert(0, ("TOP", "", ":top", rels2[0][0], v2c[rels2[0][0]])) 173 | for node in nodes: 174 | if node not in [r[0] for r in rels2] and node not in [r[3] for r in rels2]: 175 | rels2.insert(0, ("TOP", "", ":top", node, v2c[node])) 176 | amr_str = to_string(rels2, rels2[0][0])[0] 177 | 178 | famr.write(amr_str + "\n") 179 | fsent.write(" ".join(n).replace('', '(').replace('', ')') + "\n") 180 | 181 | if __name__ == "__main__": 182 | run(sys.argv[1]) 183 | -------------------------------------------------------------------------------- /preprocessing_np.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Preprocessing script for AMR data 4 | # For preocessing unaligned amr annotations, use: ./preprocessing.sh 5 | # For preprocessing amr annotations aligned with JAMR (or other aligner that generate similar output), use: ./preprocessing.sh -a 6 | # For preprocessing English sentences (parsing only), use: ./preprocessing.sh -s 7 | 8 | JAMR="/disk/ocean/public/tools/jamr2016" 9 | TOKENIZER="../cdec-master/corpus/tokenize-anything.sh" 10 | CORENLP="../stanford-corenlp-full-2015-12-09/" 11 | 12 | if [[ "$JAMR" != "" ]]; 13 | then 14 | source $JAMR/scripts/config.sh 15 | fi 16 | 17 | ALIGNED="0" 18 | SENTS="0" 19 | while [[ $# -gt 1 ]] 20 | do 21 | key="$1" 22 | case $key in 23 | -a|--aligned) 24 | ALIGNED="1" 25 | ;; 26 | -s|--sents) 27 | SENTS="1" 28 | ;; 29 | *) 30 | # unknown option 31 | ;; 32 | esac 33 | shift # past argument or value 34 | done 35 | 36 | if [ "$#" -ne 1 ]; then 37 | echo "Usage: preprocessing.sh AMR_annotation_file" 38 | exit 39 | fi 40 | workdir=$(dirname $1) 41 | 42 | if [[ $SENTS -eq "1" ]]; 43 | then 44 | "${TOKENIZER}" < "$1" | sed -E 's/(^# ::.*)cannot/\1can not/g' > "$1.sentences" 45 | 46 | else 47 | echo "Extracting AMR graphs.." 48 | cat $1 | grep -v '^#' > "$1.graphs" 49 | 50 | if [[ $ALIGNED -eq "0" ]]; 51 | then 52 | if [[ $JAMR != "" ]]; 53 | then 54 | echo "Running JAMR aligner.." 55 | source $JAMR/scripts/config.sh 56 | sed -E 's/(^# ::.*)cannot/\1can not/g' "$1" > "$1.jamr" 57 | $JAMR/scripts/ALIGN.sh < "$1.jamr" > "$1.tmp" 58 | rm "$1.jamr" 59 | else 60 | echo "JAMR path not specified" 61 | fi 62 | 63 | echo "Extracting tokenized sentences and alignments.." 64 | cat "$1.tmp" | grep '# ::alignments ' | grep '::annotator Aligner' | sed 's/^# ::alignments //' | cut -d":" -f1 > "$1.alignments" 65 | cat "$1.tmp" | grep '# ::tok ' | sed 's/^# ::tok //' > "$1.sentences" 66 | rm "$1.tmp" 67 | else 68 | echo "Extracting tokenized sentences and alignments.." 69 | cat $1 | grep '# ::alignments ' | sed 's/^# ::alignments //' | cut -d":" -f1 > "$1.alignments" 70 | cat $1 | grep '# ::tok ' | sed 's/^# ::tok //' > "$1.sentences" 71 | fi 72 | 73 | fi 74 | 75 | sed 's/ ( / /g' $1.sentences | sed 's/ ) / /g' | sed 's/( / /g' | sed 's/ )/ /g' > $1.sentences.nopars 76 | echo "Running CoreNLP.." 77 | java -mx6g -cp "$CORENLP/*" edu.stanford.nlp.pipeline.StanfordCoreNLP -props "../corenlp.properties" -file "$1.sentences.nopars" --outputFormat text --outputDirectory "$workdir" 78 | 79 | echo "Done!" 80 | 81 | -------------------------------------------------------------------------------- /scores.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding=utf-8 3 | 4 | ''' 5 | Computes AMR scores for concept identification, named entity recognition, wikification, 6 | negation detection, reentrancy detection and SRL. 7 | 8 | @author: Marco Damonte (m.damonte@sms.ed.ac.uk) 9 | @since: 03-10-16 10 | ''' 11 | 12 | import sys 13 | import smatch.amr as amr 14 | import smatch.smatch_fromlists as smatch 15 | from collections import defaultdict 16 | from utils import * 17 | 18 | pred = open(sys.argv[1]).read().strip().split("\n\n") 19 | gold = open(sys.argv[2]).read().strip().split("\n\n") 20 | 21 | inters = defaultdict(int) 22 | golds = defaultdict(int) 23 | preds = defaultdict(int) 24 | reentrancies_pred = [] 25 | reentrancies_gold = [] 26 | srl_pred = [] 27 | srl_gold = [] 28 | 29 | k = 0 30 | tot = 0 31 | correct = 0 32 | for amr_pred, amr_gold in zip(pred, gold): 33 | amr_pred = amr.AMR.parse_AMR_line(amr_pred.replace("\n","")) 34 | dict_pred = var2concept(amr_pred) 35 | triples_pred = [] 36 | for t in amr_pred.get_triples()[1] + amr_pred.get_triples()[2]: 37 | if t[0].endswith('-of'): 38 | triples_pred.append((t[0][:-3], t[2], t[1])) 39 | else: 40 | triples_pred.append((t[0], t[1], t[2])) 41 | 42 | amr_gold = amr.AMR.parse_AMR_line(amr_gold.replace("\n","")) 43 | dict_gold = var2concept(amr_gold) 44 | triples_gold = [] 45 | for t in amr_gold.get_triples()[1] + amr_gold.get_triples()[2]: 46 | if t[0].endswith('-of'): 47 | triples_gold.append((t[0][:-3], t[2], t[1])) 48 | else: 49 | triples_gold.append((t[0], t[1], t[2])) 50 | 51 | list_pred = disambig(concepts(dict_pred)) 52 | list_gold = disambig(concepts(dict_gold)) 53 | inters["Concepts"] += len(list(set(list_pred) & set(list_gold))) 54 | preds["Concepts"] += len(set(list_pred)) 55 | golds["Concepts"] += len(set(list_gold)) 56 | list_pred = disambig(namedent(dict_pred, triples_pred)) 57 | list_gold = disambig(namedent(dict_gold, triples_gold)) 58 | inters["Named Ent."] += len(list(set(list_pred) & set(list_gold))) 59 | preds["Named Ent."] += len(set(list_pred)) 60 | golds["Named Ent."] += len(set(list_gold)) 61 | list_pred = disambig(negations(dict_pred, triples_pred)) 62 | list_gold = disambig(negations(dict_gold, triples_gold)) 63 | inters["Negations"] += len(list(set(list_pred) & set(list_gold))) 64 | preds["Negations"] += len(set(list_pred)) 65 | golds["Negations"] += len(set(list_gold)) 66 | 67 | list_pred = disambig(wikification(triples_pred)) 68 | list_gold = disambig(wikification(triples_gold)) 69 | inters["Wikification"] += len(list(set(list_pred) & set(list_gold))) 70 | preds["Wikification"] += len(set(list_pred)) 71 | golds["Wikification"] += len(set(list_gold)) 72 | 73 | reentrancies_pred.append(reentrancies(dict_pred, triples_pred)) 74 | reentrancies_gold.append(reentrancies(dict_gold, triples_gold)) 75 | 76 | srl_pred.append(srl(dict_pred, triples_pred)) 77 | srl_gold.append(srl(dict_gold, triples_gold)) 78 | 79 | for score in preds: 80 | if preds[score] > 0: 81 | pr = inters[score]/float(preds[score]) 82 | else: 83 | pr = 0 84 | if golds[score] > 0: 85 | rc = inters[score]/float(golds[score]) 86 | else: 87 | rc = 0 88 | if pr + rc > 0: 89 | f = 2*(pr*rc)/(pr+rc) 90 | print (score, '-> P:', "{0:.2f}".format(pr), ', R:', "{0:.2f}".format(rc), ', F:', "{0:.2f}".format(f)) 91 | else: 92 | print (score, '-> P:', "{0:.2f}".format(pr), ', R:', "{0:.2f}".format(rc), ', F: 0.00') 93 | 94 | pr, rc, f = smatch.main(reentrancies_pred, reentrancies_gold, True) 95 | print ('Reentrancies -> P:', "{0:.2f}".format(float(pr)), ', R:', "{0:.2f}".format(float(rc)), ', F:', "{0:.2f}".format(float(f))) 96 | pr, rc, f = smatch.main(srl_pred, srl_gold, True) 97 | print ('SRL -> P:', "{0:.2f}".format(float(pr)), ', R:', "{0:.2f}".format(float(rc)), ', F:', "{0:.2f}".format(float(f))) 98 | -------------------------------------------------------------------------------- /smatch/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2015 Shu Cai and Kevin Knight 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /smatch/README.md: -------------------------------------------------------------------------------- 1 | # Smatch (semantic match) tool 2 | 3 | This is source code of [smatch](http://amr.isi.edu/evaluation.html), an evaluation tool for AMR (Abstract Meaning Representation). 4 | 5 | The code was mostly developed during 2012-2013, and has undergone many fixes and updates. It is now hosted on github for better collaboration. 6 | 7 | More details and updates about AMR and smatch can be found in USC/ISI's AMR site: http://amr.isi.edu/index.html 8 | -------------------------------------------------------------------------------- /smatch/__init__.py: -------------------------------------------------------------------------------- 1 | from . import * 2 | -------------------------------------------------------------------------------- /smatch/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch/__init__.pyc -------------------------------------------------------------------------------- /smatch/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /smatch/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /smatch/__pycache__/amr.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch/__pycache__/amr.cpython-36.pyc -------------------------------------------------------------------------------- /smatch/__pycache__/amr.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch/__pycache__/amr.cpython-37.pyc -------------------------------------------------------------------------------- /smatch/__pycache__/smatch_fromlists.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch/__pycache__/smatch_fromlists.cpython-36.pyc -------------------------------------------------------------------------------- /smatch/__pycache__/smatch_fromlists.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch/__pycache__/smatch_fromlists.cpython-37.pyc -------------------------------------------------------------------------------- /smatch/amr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | AMR (Abstract Meaning Representation) structure 6 | For detailed description of AMR, see http://www.isi.edu/natural-language/amr/a.pdf 7 | 8 | """ 9 | 10 | from __future__ import print_function 11 | from collections import defaultdict 12 | import sys 13 | 14 | # change this if needed 15 | ERROR_LOG = sys.stderr 16 | 17 | # change this if needed 18 | DEBUG_LOG = sys.stderr 19 | 20 | 21 | class AMR(object): 22 | """ 23 | AMR is a rooted, labeled graph to represent semantics. 24 | This class has the following members: 25 | nodes: list of node in the graph. Its ith element is the name of the ith node. For example, a node name 26 | could be "a1", "b", "g2", .etc 27 | node_values: list of node labels (values) of the graph. Its ith element is the value associated with node i in 28 | nodes list. In AMR, such value is usually a semantic concept (e.g. "boy", "want-01") 29 | root: root node name 30 | relations: list of edges connecting two nodes in the graph. Each entry is a link between two nodes, i.e. a triple 31 | . In AMR, such link denotes the relation between two semantic 32 | concepts. For example, "arg0" means that one of the concepts is the 0th argument of the other. 33 | attributes: list of edges connecting a node to an attribute name and its value. For example, if the polarity of 34 | some node is negative, there should be an edge connecting this node and "-". A triple < attribute name, 35 | node name, attribute value> is used to represent such attribute. It can also be viewed as a relation. 36 | 37 | """ 38 | def __init__(self, node_list=None, node_value_list=None, relation_list=None, attribute_list=None): 39 | """ 40 | node_list: names of nodes in AMR graph, e.g. "a11", "n" 41 | node_value_list: values of nodes in AMR graph, e.g. "group" for a node named "g" 42 | relation_list: list of relations between two nodes 43 | attribute_list: list of attributes (links between one node and one constant value) 44 | 45 | """ 46 | # initialize AMR graph nodes using list of nodes name 47 | # root, by default, is the first in var_list 48 | 49 | if node_list is None: 50 | self.nodes = [] 51 | self.root = None 52 | else: 53 | self.nodes = node_list[:] 54 | if len(node_list) != 0: 55 | self.root = node_list[0] 56 | else: 57 | self.root = None 58 | if node_value_list is None: 59 | self.node_values = [] 60 | else: 61 | self.node_values = node_value_list[:] 62 | if relation_list is None: 63 | self.relations = [] 64 | else: 65 | self.relations = relation_list[:] 66 | if attribute_list is None: 67 | self.attributes = [] 68 | else: 69 | self.attributes = attribute_list[:] 70 | 71 | def rename_node(self, prefix): 72 | """ 73 | Rename AMR graph nodes to prefix + node_index to avoid nodes with the same name in two different AMRs. 74 | 75 | """ 76 | node_map_dict = {} 77 | # map each node to its new name (e.g. "a1") 78 | for i in range(0, len(self.nodes)): 79 | node_map_dict[self.nodes[i]] = prefix + str(i) 80 | # update node name 81 | for i, v in enumerate(self.nodes): 82 | self.nodes[i] = node_map_dict[v] 83 | # update node name in relations 84 | for node_relations in self.relations: 85 | for i, l in enumerate(node_relations): 86 | node_relations[i][1] = node_map_dict[l[1]] 87 | 88 | def get_triples(self): 89 | """ 90 | Get the triples in three lists. 91 | instance_triple: a triple representing an instance. E.g. instance(w, want-01) 92 | attribute triple: relation of attributes, e.g. polarity(w, - ) 93 | and relation triple, e.g. arg0 (w, b) 94 | 95 | """ 96 | instance_triple = [] 97 | relation_triple = [] 98 | attribute_triple = [] 99 | for i in range(len(self.nodes)): 100 | instance_triple.append(("instance", self.nodes[i], self.node_values[i])) 101 | # l[0] is relation name 102 | # l[1] is the other node this node has relation with 103 | for l in self.relations[i]: 104 | relation_triple.append((l[0], self.nodes[i], l[1])) 105 | # l[0] is the attribute name 106 | # l[1] is the attribute value 107 | for l in self.attributes[i]: 108 | attribute_triple.append((l[0], self.nodes[i], l[1])) 109 | return instance_triple, attribute_triple, relation_triple 110 | 111 | 112 | def get_triples2(self): 113 | """ 114 | Get the triples in two lists: 115 | instance_triple: a triple representing an instance. E.g. instance(w, want-01) 116 | relation_triple: a triple representing all relations. E.g arg0 (w, b) or E.g. polarity(w, - ) 117 | Note that we do not differentiate between attribute triple and relation triple. Both are considered as relation 118 | triples. 119 | All triples are represented by (triple_type, argument 1 of the triple, argument 2 of the triple) 120 | 121 | """ 122 | instance_triple = [] 123 | relation_triple = [] 124 | for i in range(len(self.nodes)): 125 | # an instance triple is instance(node name, node value). 126 | # For example, instance(b, boy). 127 | instance_triple.append(("instance", self.nodes[i], self.node_values[i])) 128 | # l[0] is relation name 129 | # l[1] is the other node this node has relation with 130 | for l in self.relations[i]: 131 | relation_triple.append((l[0], self.nodes[i], l[1])) 132 | # l[0] is the attribute name 133 | # l[1] is the attribute value 134 | for l in self.attributes[i]: 135 | relation_triple.append((l[0], self.nodes[i], l[1])) 136 | return instance_triple, relation_triple 137 | 138 | 139 | def __str__(self): 140 | """ 141 | Generate AMR string for better readability 142 | 143 | """ 144 | lines = [] 145 | for i in range(len(self.nodes)): 146 | lines.append("Node "+ str(i) + " " + self.nodes[i]) 147 | lines.append("Value: " + self.node_values[i]) 148 | lines.append("Relations:") 149 | for relation in self.relations[i]: 150 | lines.append("Node " + relation[1] + " via " + relation[0]) 151 | for attribute in self.attributes[i]: 152 | lines.append("Attribute: " + attribute[0] + " value " + attribute[1]) 153 | return "\n".join(lines) 154 | 155 | def __repr__(self): 156 | return self.__str__() 157 | 158 | def output_amr(self): 159 | """ 160 | Output AMR string 161 | 162 | """ 163 | print(self.__str__(), file=DEBUG_LOG) 164 | 165 | @staticmethod 166 | def get_amr_line(input_f): 167 | """ 168 | Read the file containing AMRs. AMRs are separated by a blank line. 169 | Each call of get_amr_line() returns the next available AMR (in one-line form). 170 | Note: this function does not verify if the AMR is valid 171 | 172 | """ 173 | cur_amr = [] 174 | has_content = False 175 | for line in input_f: 176 | line = line.strip() 177 | if line == "": 178 | if not has_content: 179 | # empty lines before current AMR 180 | continue 181 | else: 182 | # end of current AMR 183 | break 184 | if line.strip().startswith("#"): 185 | # ignore the comment line (starting with "#") in the AMR file 186 | continue 187 | else: 188 | has_content = True 189 | cur_amr.append(line.strip()) 190 | return "".join(cur_amr) 191 | 192 | @staticmethod 193 | def parse_AMR_line(line): 194 | """ 195 | Parse a AMR from line representation to an AMR object. 196 | This parsing algorithm scans the line once and process each character, in a shift-reduce style. 197 | 198 | """ 199 | # Current state. It denotes the last significant symbol encountered. 1 for (, 2 for :, 3 for /, 200 | # and 0 for start state or ')' 201 | # Last significant symbol is ( --- start processing node name 202 | # Last significant symbol is : --- start processing relation name 203 | # Last significant symbol is / --- start processing node value (concept name) 204 | # Last significant symbol is ) --- current node processing is complete 205 | # Note that if these symbols are inside parenthesis, they are not significant symbols. 206 | state = 0 207 | # node stack for parsing 208 | stack = [] 209 | # current not-yet-reduced character sequence 210 | cur_charseq = [] 211 | # key: node name value: node value 212 | node_dict = {} 213 | # node name list (order: occurrence of the node) 214 | node_name_list = [] 215 | # key: node name: value: list of (relation name, the other node name) 216 | node_relation_dict1 = defaultdict(list) 217 | # key: node name, value: list of (attribute name, const value) or (relation name, unseen node name) 218 | node_relation_dict2 = defaultdict(list) 219 | # current relation name 220 | cur_relation_name = "" 221 | # having unmatched quote string 222 | in_quote = False 223 | for i, c in enumerate(line.strip()): 224 | if c == " ": 225 | # allow space in relation name 226 | if state == 2: 227 | cur_charseq.append(c) 228 | continue 229 | if c == "\"": 230 | # flip in_quote value when a quote symbol is encountered 231 | # insert placeholder if in_quote from last symbol 232 | if in_quote: 233 | cur_charseq.append('_') 234 | in_quote = not in_quote 235 | elif c == "(": 236 | # not significant symbol if inside quote 237 | if in_quote: 238 | cur_charseq.append(c) 239 | continue 240 | # get the attribute name 241 | # e.g :arg0 (x ... 242 | # at this point we get "arg0" 243 | if state == 2: 244 | # in this state, current relation name should be empty 245 | if cur_relation_name != "": 246 | print("Format error when processing ", line[0:i + 1], file=ERROR_LOG) 247 | return None 248 | # update current relation name for future use 249 | cur_relation_name = "".join(cur_charseq).strip() 250 | cur_charseq[:] = [] 251 | state = 1 252 | elif c == ":": 253 | # not significant symbol if inside quote 254 | if in_quote: 255 | cur_charseq.append(c) 256 | continue 257 | # Last significant symbol is "/". Now we encounter ":" 258 | # Example: 259 | # :OR (o2 / *OR* 260 | # :mod (o3 / official) 261 | # gets node value "*OR*" at this point 262 | if state == 3: 263 | node_value = "".join(cur_charseq) 264 | # clear current char sequence 265 | cur_charseq[:] = [] 266 | # pop node name ("o2" in the above example) 267 | cur_node_name = stack[-1] 268 | # update node name/value map 269 | node_dict[cur_node_name] = node_value 270 | # Last significant symbol is ":". Now we encounter ":" 271 | # Example: 272 | # :op1 w :quant 30 273 | # or :day 14 :month 3 274 | # the problem is that we cannot decide if node value is attribute value (constant) 275 | # or node value (variable) at this moment 276 | elif state == 2: 277 | temp_attr_value = "".join(cur_charseq) 278 | cur_charseq[:] = [] 279 | parts = temp_attr_value.split() 280 | if len(parts) < 2: 281 | print("Error in processing; part len < 2", line[0:i + 1], file=ERROR_LOG) 282 | return None 283 | # For the above example, node name is "op1", and node value is "w" 284 | # Note that this node name might not be encountered before 285 | relation_name = parts[0].strip() 286 | relation_value = parts[1].strip() 287 | # We need to link upper level node to the current 288 | # top of stack is upper level node 289 | if len(stack) == 0: 290 | print("Error in processing", line[:i], relation_name, relation_value, file=ERROR_LOG) 291 | return None 292 | # if we have not seen this node name before 293 | if relation_value not in node_dict: 294 | node_relation_dict2[stack[-1]].append((relation_name, relation_value)) 295 | else: 296 | node_relation_dict1[stack[-1]].append((relation_name, relation_value)) 297 | state = 2 298 | elif c == "/": 299 | if in_quote: 300 | cur_charseq.append(c) 301 | continue 302 | # Last significant symbol is "(". Now we encounter "/" 303 | # Example: 304 | # (d / default-01 305 | # get "d" here 306 | if state == 1: 307 | node_name = "".join(cur_charseq) 308 | cur_charseq[:] = [] 309 | # if this node name is already in node_dict, it is duplicate 310 | if node_name in node_dict: 311 | print("Duplicate node name ", node_name, " in parsing AMR", file=ERROR_LOG) 312 | return None 313 | # push the node name to stack 314 | stack.append(node_name) 315 | # add it to node name list 316 | node_name_list.append(node_name) 317 | # if this node is part of the relation 318 | # Example: 319 | # :arg1 (n / nation) 320 | # cur_relation_name is arg1 321 | # node name is n 322 | # we have a relation arg1(upper level node, n) 323 | if cur_relation_name != "": 324 | # if relation name ends with "-of", e.g."arg0-of", 325 | # it is reverse of some relation. For example, if a is "arg0-of" b, 326 | # we can also say b is "arg0" a. 327 | # If the relation name ends with "-of", we store the reverse relation. 328 | if not cur_relation_name.endswith("-of"): 329 | # stack[-2] is upper_level node we encountered, as we just add node_name to stack 330 | node_relation_dict1[stack[-2]].append((cur_relation_name, node_name)) 331 | else: 332 | # cur_relation_name[:-3] is to delete "-of" 333 | node_relation_dict1[node_name].append((cur_relation_name[:-3], stack[-2])) 334 | # clear current_relation_name 335 | cur_relation_name = "" 336 | else: 337 | # error if in other state 338 | print("Error in parsing AMR", line[0:i + 1], file=ERROR_LOG) 339 | return None 340 | state = 3 341 | elif c == ")": 342 | if in_quote: 343 | cur_charseq.append(c) 344 | continue 345 | # stack should be non-empty to find upper level node 346 | if len(stack) == 0: 347 | print("Unmatched parenthesis at position", i, "in processing", line[0:i + 1], file=ERROR_LOG) 348 | return None 349 | # Last significant symbol is ":". Now we encounter ")" 350 | # Example: 351 | # :op2 "Brown") or :op2 w) 352 | # get \"Brown\" or w here 353 | if state == 2: 354 | temp_attr_value = "".join(cur_charseq) 355 | cur_charseq[:] = [] 356 | parts = temp_attr_value.split() 357 | if len(parts) < 2: 358 | print("Error processing", line[:i + 1], temp_attr_value, file=ERROR_LOG) 359 | return None 360 | relation_name = parts[0].strip() 361 | relation_value = parts[1].strip() 362 | # store reverse of the relation 363 | # we are sure relation_value is a node here, as "-of" relation is only between two nodes 364 | if relation_name.endswith("-of"): 365 | node_relation_dict1[relation_value].append((relation_name[:-3], stack[-1])) 366 | # attribute value not seen before 367 | # Note that it might be a constant attribute value, or an unseen node 368 | # process this after we have seen all the node names 369 | elif relation_value not in node_dict: 370 | node_relation_dict2[stack[-1]].append((relation_name, relation_value)) 371 | else: 372 | node_relation_dict1[stack[-1]].append((relation_name, relation_value)) 373 | # Last significant symbol is "/". Now we encounter ")" 374 | # Example: 375 | # :arg1 (n / nation) 376 | # we get "nation" here 377 | elif state == 3: 378 | node_value = "".join(cur_charseq) 379 | cur_charseq[:] = [] 380 | cur_node_name = stack[-1] 381 | # map node name to its value 382 | node_dict[cur_node_name] = node_value 383 | # pop from stack, as the current node has been processed 384 | stack.pop() 385 | cur_relation_name = "" 386 | state = 0 387 | else: 388 | # not significant symbols, so we just shift. 389 | cur_charseq.append(c) 390 | #create data structures to initialize an AMR 391 | node_value_list = [] 392 | relation_list = [] 393 | attribute_list = [] 394 | for v in node_name_list: 395 | if v not in node_dict: 396 | print("Error: Node name not found", v, file=ERROR_LOG) 397 | return None 398 | else: 399 | node_value_list.append(node_dict[v]) 400 | # build relation list and attribute list for this node 401 | node_rel_list = [] 402 | node_attr_list = [] 403 | if v in node_relation_dict1: 404 | for v1 in node_relation_dict1[v]: 405 | node_rel_list.append([v1[0], v1[1]]) 406 | if v in node_relation_dict2: 407 | for v2 in node_relation_dict2[v]: 408 | # if value is in quote, it is a constant value 409 | # strip the quote and put it in attribute map 410 | if v2[1][0] == "\"" and v2[1][-1] == "\"": 411 | node_attr_list.append([[v2[0]], v2[1][1:-1]]) 412 | # if value is a node name 413 | elif v2[1] in node_dict: 414 | node_rel_list.append([v2[0], v2[1]]) 415 | else: 416 | node_attr_list.append([v2[0], v2[1]]) 417 | # each node has a relation list and attribute list 418 | relation_list.append(node_rel_list) 419 | attribute_list.append(node_attr_list) 420 | # add TOP as an attribute. The attribute value is the top node value 421 | attribute_list[0].append(["TOP", node_value_list[0]]) 422 | result_amr = AMR(node_name_list, node_value_list, relation_list, attribute_list) 423 | return result_amr 424 | 425 | # test AMR parsing 426 | # run by amr.py [file containing AMR] 427 | # a unittest can also be used. 428 | if __name__ == "__main__": 429 | if len(sys.argv) < 2: 430 | print("No file given", file=ERROR_LOG) 431 | exit(1) 432 | amr_count = 1 433 | for line in open(sys.argv[1]): 434 | cur_line = line.strip() 435 | if cur_line == "" or cur_line.startswith("#"): 436 | continue 437 | print("AMR", amr_count, file=DEBUG_LOG) 438 | current = AMR.parse_AMR_line(cur_line) 439 | current.output_amr() 440 | amr_count += 1 441 | -------------------------------------------------------------------------------- /smatch/amr.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch/amr.pyc -------------------------------------------------------------------------------- /smatch/sample_file_list: -------------------------------------------------------------------------------- 1 | nw_wsj_0001_1 nw_wsj_0001_2 nw_wsj_0002_1 nw_wsj_0003_1 2 | -------------------------------------------------------------------------------- /smatch/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | try: 6 | from setuptools import setup, find_packages 7 | except ImportError: 8 | from disutils.core import setup 9 | 10 | 11 | here = os.path.abspath(os.path.dirname(__file__)) 12 | with open(os.path.join(here, "README.md")) as f: 13 | README = f.read() 14 | 15 | 16 | setup(name="smatch", 17 | version="1.0", 18 | description="Smatch (semantic match) tool", 19 | long_description=README, 20 | author="Shu Cai", 21 | author_email="shucai.work@gmail.com", 22 | url="https://github.com/snowblink14/smatch", 23 | license="MIT", 24 | py_modules=["smatch", "amr"], 25 | scripts=["smatch.py"], 26 | ) 27 | 28 | -------------------------------------------------------------------------------- /smatch/smatch-table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | 5 | import amr 6 | import sys 7 | import smatch 8 | import os 9 | import time 10 | 11 | ERROR_LOG = sys.stderr 12 | 13 | DEBUG_LOG = sys.stderr 14 | 15 | verbose = False 16 | 17 | # directory on isi machine 18 | # change if needed 19 | isi_dir_pre = "/nfs/web/isi.edu/cgi-bin/div3/mt/save-amr" 20 | 21 | 22 | def get_names(file_dir, files): 23 | """ 24 | Get the annotator name list based on a list of files 25 | Args: 26 | file_dir: AMR file folder 27 | files: a list of AMR names, e.g. nw_wsj_0001_1 28 | 29 | Returns: 30 | a list of user names who annotate all the files 31 | """ 32 | # for each user, check if they have files available 33 | # return user name list 34 | total_list = [] 35 | name_list = [] 36 | get_sub = False 37 | for path, subdir, dir_files in os.walk(file_dir): 38 | if not get_sub: 39 | total_list = subdir[:] 40 | get_sub = True 41 | else: 42 | break 43 | for user in total_list: 44 | has_file = True 45 | for f in files: 46 | file_path = file_dir + user + "/" + f + ".txt" 47 | if not os.path.exists(file_path): 48 | has_file = False 49 | break 50 | if has_file: 51 | name_list.append(user) 52 | if len(name_list) == 0: 53 | print("********Error: Cannot find any user who completes the files*************", file=ERROR_LOG) 54 | return name_list 55 | 56 | 57 | def compute_files(user1, user2, file_list, dir_pre, start_num): 58 | 59 | """ 60 | Compute the smatch scores for a file list between two users 61 | Args: 62 | user1: user 1 name 63 | user2: user 2 name 64 | file_list: file list 65 | dir_pre: the file location prefix 66 | start_num: the number of restarts in smatch 67 | Returns: 68 | smatch f score. 69 | 70 | """ 71 | match_total = 0 72 | test_total = 0 73 | gold_total = 0 74 | for fi in file_list: 75 | file1 = dir_pre + user1 + "/" + fi + ".txt" 76 | file2 = dir_pre + user2 + "/" + fi + ".txt" 77 | if not os.path.exists(file1): 78 | print("*********Error: ", file1, "does not exist*********", file=ERROR_LOG) 79 | return -1.00 80 | if not os.path.exists(file2): 81 | print("*********Error: ", file2, "does not exist*********", file=ERROR_LOG) 82 | return -1.00 83 | try: 84 | file1_h = open(file1, "r") 85 | file2_h = open(file2, "r") 86 | except IOError: 87 | print("Cannot open the files", file1, file2, file=ERROR_LOG) 88 | break 89 | cur_amr1 = smatch.get_amr_line(file1_h) 90 | cur_amr2 = smatch.get_amr_line(file2_h) 91 | if cur_amr1 == "": 92 | print("AMR 1 is empty", file=ERROR_LOG) 93 | continue 94 | if cur_amr2 == "": 95 | print("AMR 2 is empty", file=ERROR_LOG) 96 | continue 97 | amr1 = amr.AMR.parse_AMR_line(cur_amr1) 98 | amr2 = amr.AMR.parse_AMR_line(cur_amr2) 99 | test_label = "a" 100 | gold_label = "b" 101 | amr1.rename_node(test_label) 102 | amr2.rename_node(gold_label) 103 | (test_inst, test_rel1, test_rel2) = amr1.get_triples() 104 | (gold_inst, gold_rel1, gold_rel2) = amr2.get_triples() 105 | if verbose: 106 | print("Instance triples of file 1:", len(test_inst), file=DEBUG_LOG) 107 | print(test_inst, file=DEBUG_LOG) 108 | print("Attribute triples of file 1:", len(test_rel1), file=DEBUG_LOG) 109 | print(test_rel1, file=DEBUG_LOG) 110 | print("Relation triples of file 1:", len(test_rel2), file=DEBUG_LOG) 111 | print(test_rel2, file=DEBUG_LOG) 112 | print("Instance triples of file 2:", len(gold_inst), file=DEBUG_LOG) 113 | print(gold_inst, file=DEBUG_LOG) 114 | print("Attribute triples of file 2:", len(gold_rel1), file=DEBUG_LOG) 115 | print(gold_rel1, file=DEBUG_LOG) 116 | print("Relation triples of file 2:", len(gold_rel2), file=DEBUG_LOG) 117 | print(gold_rel2, file=DEBUG_LOG) 118 | (best_match, best_match_num) = smatch.get_best_match(test_inst, test_rel1, test_rel2, 119 | gold_inst, gold_rel1, gold_rel2, 120 | test_label, gold_label) 121 | if verbose: 122 | print("best match number", best_match_num, file=DEBUG_LOG) 123 | print("Best Match:", smatch.print_alignment(best_match, test_inst, gold_inst), file=DEBUG_LOG) 124 | match_total += best_match_num 125 | test_total += (len(test_inst) + len(test_rel1) + len(test_rel2)) 126 | gold_total += (len(gold_inst) + len(gold_rel1) + len(gold_rel2)) 127 | smatch.match_triple_dict.clear() 128 | (precision, recall, f_score) = smatch.compute_f(match_total, test_total, gold_total) 129 | return "%.2f" % f_score 130 | 131 | 132 | def get_max_width(table, index): 133 | return max([len(str(row[index])) for row in table]) 134 | 135 | 136 | def pprint_table(table): 137 | """ 138 | Print a table in pretty format 139 | 140 | """ 141 | col_paddings = [] 142 | for i in range(len(table[0])): 143 | col_paddings.append(get_max_width(table,i)) 144 | for row in table: 145 | print(row[0].ljust(col_paddings[0] + 1), end="") 146 | for i in range(1, len(row)): 147 | col = str(row[i]).rjust(col_paddings[i]+2) 148 | print(col, end='') 149 | print("\n") 150 | 151 | 152 | def build_arg_parser(): 153 | """ 154 | Build an argument parser using argparse. Use it when python version is 2.7 or later. 155 | 156 | """ 157 | parser = argparse.ArgumentParser(description="Smatch table calculator -- arguments") 158 | parser.add_argument("--fl", type=argparse.FileType('r'), help='AMR ID list file') 159 | parser.add_argument('-f', nargs='+', help='AMR IDs (at least one)') 160 | parser.add_argument("-p", nargs='*', help="User list (can be none)") 161 | parser.add_argument("--fd", default=isi_dir_pre, help="AMR File directory. Default=location on isi machine") 162 | parser.add_argument('-r', type=int, default=4, help='Restart number (Default:4)') 163 | parser.add_argument('-v', action='store_true', help='Verbose output (Default:False)') 164 | return parser 165 | 166 | 167 | def build_arg_parser2(): 168 | """ 169 | Build an argument parser using optparse. Use it when python version is 2.5 or 2.6. 170 | 171 | """ 172 | usage_str = "Smatch table calculator -- arguments" 173 | parser = optparse.OptionParser(usage=usage_str) 174 | parser.add_option("--fl", dest="fl", type="string", help='AMR ID list file') 175 | parser.add_option("-f", dest="f", type="string", action="callback", callback=cb, help="AMR IDs (at least one)") 176 | parser.add_option("-p", dest="p", type="string", action="callback", callback=cb, help="User list") 177 | parser.add_option("--fd", dest="fd", type="string", help="file directory") 178 | parser.add_option("-r", "--restart", dest="r", type="int", help='Restart number (Default: 4)') 179 | parser.add_option("-v", "--verbose", action='store_true', dest="v", help='Verbose output (Default:False)') 180 | parser.set_defaults(r=4, v=False, ms=False, fd=isi_dir_pre) 181 | return parser 182 | 183 | 184 | def cb(option, value, parser): 185 | """ 186 | Callback function to handle variable number of arguments in optparse 187 | 188 | """ 189 | arguments = [value] 190 | for arg in parser.rargs: 191 | if arg[0] != "-": 192 | arguments.append(arg) 193 | else: 194 | del parser.rargs[:len(arguments)] 195 | break 196 | if getattr(parser.values, option.dest): 197 | arguments.extend(getattr(parser.values, option.dest)) 198 | setattr(parser.values, option.dest, arguments) 199 | 200 | 201 | def check_args(args): 202 | """ 203 | Parse arguments and check if the arguments are valid 204 | 205 | """ 206 | if not os.path.exists(args.fd): 207 | print("Not a valid path", args.fd, file=ERROR_LOG) 208 | return [], [], False 209 | if args.fl is not None: 210 | # we already ensure the file can be opened and opened the file 211 | file_line = args.fl.readline() 212 | amr_ids = file_line.strip().split() 213 | elif args.f is None: 214 | print("No AMR ID was given", file=ERROR_LOG) 215 | return [], [], False 216 | else: 217 | amr_ids = args.f 218 | names = [] 219 | check_name = True 220 | if args.p is None: 221 | names = get_names(args.fd, amr_ids) 222 | # no need to check names 223 | check_name = False 224 | if len(names) == 0: 225 | print("Cannot find any user who tagged these AMR", file=ERROR_LOG) 226 | return [], [], False 227 | else: 228 | names = args.p 229 | if len(names) == 0: 230 | print("No user was given", file=ERROR_LOG) 231 | return [], [], False 232 | if len(names) == 1: 233 | print("Only one user is given. Smatch calculation requires at least two users.", file=ERROR_LOG) 234 | return [], [], False 235 | if "consensus" in names: 236 | con_index = names.index("consensus") 237 | names.pop(con_index) 238 | names.append("consensus") 239 | # check if all the AMR_id and user combinations are valid 240 | if check_name: 241 | pop_name = [] 242 | for i, name in enumerate(names): 243 | for amr in amr_ids: 244 | amr_path = args.fd + name + "/" + amr + ".txt" 245 | if not os.path.exists(amr_path): 246 | print("User", name, "fails to tag AMR", amr, file=ERROR_LOG) 247 | pop_name.append(i) 248 | break 249 | if len(pop_name) != 0: 250 | pop_num = 0 251 | for p in pop_name: 252 | print("Deleting user", names[p - pop_num], "from the name list", file=ERROR_LOG) 253 | names.pop(p - pop_num) 254 | pop_num += 1 255 | if len(names) < 2: 256 | print("Not enough users to evaluate. Smatch requires >2 users who tag all the AMRs", file=ERROR_LOG) 257 | return "", "", False 258 | return amr_ids, names, True 259 | 260 | 261 | def main(arguments): 262 | global verbose 263 | (ids, names, result) = check_args(arguments) 264 | if arguments.v: 265 | verbose = True 266 | if not result: 267 | return 0 268 | acc_time = 0 269 | len_name = len(names) 270 | table = [] 271 | for i in range(0, len_name + 1): 272 | table.append([]) 273 | table[0].append("") 274 | for i in range(0, len_name): 275 | table[0].append(names[i]) 276 | for i in range(0, len_name): 277 | table[i+1].append(names[i]) 278 | for j in range(0, len_name): 279 | if i != j: 280 | start = time.clock() 281 | table[i+1].append(compute_files(names[i], names[j], ids, args.fd, args.r)) 282 | end = time.clock() 283 | if table[i+1][-1] != -1.0: 284 | acc_time += end-start 285 | else: 286 | table[i+1].append("") 287 | # check table 288 | for i in range(0, len_name + 1): 289 | for j in range(0, len_name + 1): 290 | if i != j: 291 | if table[i][j] != table[j][i]: 292 | if table[i][j] > table[j][i]: 293 | table[j][i] = table[i][j] 294 | else: 295 | table[i][j] = table[j][i] 296 | pprint_table(table) 297 | return acc_time 298 | 299 | 300 | if __name__ == "__main__": 301 | whole_start = time.clock() 302 | parser = None 303 | args = None 304 | if sys.version_info[:2] != (2, 7): 305 | # requires python version >= 2.5 306 | if sys.version_info[0] != 2 or sys.version_info[1] < 5: 307 | print("This program requires python 2.5 or later to run. ", file=ERROR_LOG) 308 | exit(1) 309 | import optparse 310 | parser = build_arg_parser2() 311 | (args, opts) = parser.parse_args() 312 | file_handle = None 313 | if args.fl is not None: 314 | try: 315 | file_handle = open(args.fl, "r") 316 | args.fl = file_handle 317 | except IOError: 318 | print("The ID list file", args.fl, "does not exist", file=ERROR_LOG) 319 | args.fl = None 320 | # python version 2.7 321 | else: 322 | import argparse 323 | parser = build_arg_parser() 324 | args = parser.parse_args() 325 | # Regularize fd, add "/" at the end if needed 326 | if args.fd[-1] != "/": 327 | args.fd += "/" 328 | # acc_time is the smatch calculation time 329 | acc_time = main(args) 330 | whole_end = time.clock() 331 | # time of the whole running process 332 | whole_time = whole_end - whole_start 333 | # print if needed 334 | # print("Accumulated computation time", acc_time, file=ERROR_LOG) 335 | # print("Total time", whole_time, file=ERROR_LOG) 336 | # print("Percentage", float(acc_time)/float(whole_time), file=ERROR_LOG) 337 | 338 | -------------------------------------------------------------------------------- /smatch/smatch_fromlists.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch/smatch_fromlists.pyc -------------------------------------------------------------------------------- /smatch/smatch_tool_guideline.txt: -------------------------------------------------------------------------------- 1 | Smatch Tool Guideline 2 | 3 | Smatch is a tool to evaluate the semantic overlap between AMR (abstract meaning representation). It can be used to compute the inter agreements of AMRs, and the agreement between an automatic-generated AMR and a gold AMR. For multiple AMR pairs, the smatch tool can provide an overall score for all the AMR pairs. 4 | 5 | I. Content and web demo pages 6 | 7 | This directory contains the Smatch source code and documentation. 8 | 9 | Smatch Webpages 10 | 11 | Smatch tool webpage: http://amr.isi.edu/eval/smatch/compare.html (A quick tutorial can be found on the page) 12 | - input: two AMRs. 13 | - output: the smatch score and the matching/unmatching triples. 14 | 15 | Smatch table tool webpage: http://amr.isi.edu/eval/smatch/table.html 16 | - input: AMR IDs and users. 17 | - output: a table which consists of the smatch scores of every pair of users. 18 | 19 | II. Installation 20 | 21 | Python (version 2.5 or later) is required to run smatch tool. Python 2.7 is recommended. No compilation is necessary. 22 | 23 | III. Usage 24 | 25 | Smatch tool consists of three files written in python. 26 | 27 | 1. smatch.py: for computing the smatch score(s) for multiple AMRs in two files. 28 | 29 | Input: two files which contain AMRs. Each file may contain multiple AMRs, and every two AMRs are separated by a blank line. AMRs can be one-per-line or have multiple lines, as long as there is no blank line in one AMR. 30 | 31 | Input file format: see test_input1.txt, test_input2.txt in the smatch tool folder. AMRs are separated by one or more blank lines, so no blank lines are allowed inside an AMR. Lines starting with a hash (#) will be ignored. 32 | 33 | Output: Smatch score(s) computed 34 | 35 | Usage: python smatch.py [-h] -f F F [-r R] [-v] [-ms] 36 | 37 | arguments: 38 | 39 | -h: help 40 | 41 | -f: two files which contain multiple AMRs. A blank line is used to separate two AMRs. Required arguments. 42 | 43 | -r: restart numer of the heuristic search during computation, optional. Default value: 4. This argument must be a positive integer. Large restart number will reduce the chance of search error, but also increase the running time. Small restart number will reduce the running time as well as increase the change of search error. The default value is by far the best trade-off. User can set a large number if the AMR length is long (search space is large) and user does not need very high calculation speed. 44 | 45 | -v: verbose output, optional. Default value: false. The verbose information includes the triples of each AMR, the matching triple number found for each iterations, and the best matching triple number. It is useful when you try to understand how the program works. User will not need this option most of the time. 46 | 47 | --ms: multiple score, optional. Adding this option will result in a single smatch score for each AMR pair. Otherwise it will output one single weighted score based on all pairs of AMRs. AMRs are weighted according to their number of triples. 48 | Default value: false 49 | 50 | --pr: Output precision and recall as well as the f-score. Default:false 51 | 52 | A typical (and most common) example of running smatch.py: 53 | 54 | python smatch.py -f test_input1.txt test_input2.txt 55 | 56 | This folder includes sample files test_input1.txt and test_input2.txt, so you should be able to run the above command as is. The above command should get the following line: 57 | Document F-score: 0.81 58 | 59 | 2. amr.py: a class to represent AMR structure. It contains a function to parse lines to AMR structure. smatch.py calls it to parse AMRs. 60 | 61 | 3. smatch-table.py: it calls the smatch library to compute the smatch scores for a group of users and multiple AMR IDs, and output a table to show the AMR score between each pair of users. 62 | 63 | Input: AMR ID list and User list. AMR ID list can be stored in a file (-fl file) or given by the command line (-f AMR_ID1, AMR_ID2,...). User list are given by the command line (-p user1,user2,..). If no users are given, the program searches for all the users who annotates all AMRs we require. The user number should be at least 2. 64 | 65 | Input file format: AMR ID list (see sample_file_list the smatch tool folder) 66 | 67 | Output: A table which shows the overall AMR score between every pair of users. 68 | 69 | Usage: python smatch-table.py [-h] [--fl FL] [-f F [F ...]] [-p [P [P ...]]] 70 | [--fd FD] [-r R] [-v] 71 | 72 | optional arguments: 73 | 74 | -h, --help show this help message and exit 75 | 76 | --fl FL AMR ID list file (a file which contains one line of AMR IDs, separated by blank space) 77 | 78 | -f F [F ...] AMR IDs (at least one). If we already have valid AMR ID list file, this option will be ignored. 79 | 80 | -p [P [P ...]] User list (It can be unspecified. When the list is none, the program searches for all the users who annotates all AMRs we require) It is meaningless to give only one user since smatch-table computes agreement between each pair of users. So the number of P is at least 2. 81 | 82 | --fd FD AMR File directory. Default=location on isi file system 83 | 84 | -r R Restart number (Default:4), same as the -r option in smatch.py 85 | 86 | -v Verbose output (Default:False), same as the -v option in smatch.py 87 | 88 | 89 | A typical example of running smatch-table.py: 90 | 91 | python smatch-table.py --fd $amr_root_dir --fl sample_file_list -p ulf knight 92 | 93 | which will compare files 94 | $amr_root_dir/ulf/nw_wsj_0001_1.txt $amr_root_dir/knight/nw_wsj_0001_1.txt 95 | $amr_root_dir/ulf/nw_wsj_0001_2.txt $amr_root_dir/knight/nw_wsj_0001_2.txt 96 | etc. 97 | 98 | Note: smatch-table.py computes smatch scores for every pair of users, so its speed can be slow when the number of user is large or when -P option is not set (in this case we compute smatch scores for all users who annotates the AMRs we require). 99 | -------------------------------------------------------------------------------- /smatch/test_input1.txt: -------------------------------------------------------------------------------- 1 | # ::id isi_0001.1 ::date 2012-05-14T21:45:29 2 | # ::snt The boy wants the girl to believe him. 3 | (w / want-01 4 | :ARG0 (b / boy) 5 | :ARG1 (b2 / believe-01 6 | :ARG0 (g / girl) 7 | :ARG1 b)) 8 | 9 | # ::id isi_0001.25 ::date 2012-05-14T21:59:17 10 | # ::snt The boy is a hard worker. 11 | (p / person 12 | :domain (b / boy) 13 | :ARG0-of (w / work-01 14 | :manner (h / hard))) 15 | 16 | # ::id isi_0002.209 ::date 2013-05-16T17:19:07 17 | # ::snt The poet William Shakespeare was born in Stratford-upon-Avon. 18 | (b / bear-02 19 | :ARG1 (p / poet :name (n / name :op1 "William" :op2 "Shakespeare")) 20 | :location (c / city :name (n2 / name :op1 "Stratford-upon-Avon"))) 21 | 22 | -------------------------------------------------------------------------------- /smatch/test_input2.txt: -------------------------------------------------------------------------------- 1 | # ::id isi_0001.1 ::date 2012-05-14T21:45:29 2 | # ::snt The boy wants the girl to believe him. 3 | (w / want-01 4 | :ARG0 (b / boy) 5 | :ARG1 (b2 / believe-01 6 | :ARG0 (g / girl) 7 | :ARG1 (h / he))) 8 | 9 | # ::id isi_0001.25 ::date 2012-05-14T21:59:17 10 | # ::snt The boy is a hard worker. 11 | (w / worker 12 | :mod (h / hard) 13 | :domain (b / boy)) 14 | 15 | # ::id isi_0002.209 ::date 2013-05-16T17:19:07 16 | # ::snt The poet William Shakespeare was born in Stratford-upon-Avon. 17 | (b / bear-02 18 | :ARG1 (p / poet :name (n / name :op1 william :op2 "shakespeare")) 19 | :location (c / city :name (n2 / name :op1 "Stratford-upon-Avon"))) 20 | 21 | -------------------------------------------------------------------------------- /smatch/update_log: -------------------------------------------------------------------------------- 1 | Update: 08/22/2012 2 | Person involved: Shu Cai 3 | 4 | Minor bug fix of smatch.py. smatch-v2.py was created. 5 | 6 | smatch.py-> smatch-v1.py 7 | smatch-v2.py-> smatch.py 8 | 9 | No change of interface 10 | 11 | Update: 09/14/2012 12 | Person involved: Shu Cai 13 | 14 | Bug fix of smatch.py and smatch-table.py. smatch-v0.1.py smatch-v0.2.py smatch-v0.3.py smatch-v0.4.py smatch-table-v0.1.py smatch-table-v0.2.py was created. 15 | 16 | smatch.py now equals to smatch-v0.4.py 17 | smatch-table.py now equals to smatch-table-v0.2.py 18 | 19 | smatch.py runs with a smart initialization, which matches words with the same value first, then randomly select other variable mappings. 4 restarts is applied. 20 | 21 | Update: 03/17/2013 22 | Person involved: Shu Cai 23 | 24 | Interface change of smatch.py and smatch-table.py. Using this version does not require esem-format-check.pl. (All versions before v0.5 require esem-format-check.pl to check the format of AMR) Instead it needs amr.py. 25 | 26 | It now accepts one-AMR-per-line format as well as other formats of AMR. 27 | 28 | smatch.py now equals to smatch-v0.5.py 29 | smatch-table.py now equals to smatch-table-v0.3.py 30 | 31 | Update:03/19/2013 32 | Person involved: Shu Cai 33 | 34 | Document update. The latest documents are smatch_guide.txt and smatch_guide.pdf (same content) 35 | Add some sample files to the directory: sample_file_list, test_input1, test_input2 36 | 37 | Update: 03/20/2013 38 | Person involved: Shu Cai 39 | 40 | Minor changes to the documents: smatch_guide.txt and smatch_guide.pdf 41 | 42 | Update: 04/04/2013 43 | Person involved: Shu Cai 44 | 45 | Add Software_architecture.pdf. Minor changes to the smatch.py and smatch-table.py (comments and add --pr option) 46 | Minor changes to the README.txt and smatch_guide.pdf 47 | 48 | Update: 01/18/2015 49 | Person involved: Shu Cai 50 | Code cleanup and bug fix. Add detailed comment to the code. 51 | Thanks Yoav Artzi (yoav@cs.washington.edu) for finding a bug and fixing it. 52 | 53 | Update: 12/21/2015 54 | Person involved: Jon May 55 | Fixed treatment of quoted strings to allow special characters to be actually part of the string. 56 | Empty double quoted strings also allowed 57 | 58 | Update: 1/9/2016 59 | Person involved: Guntis Barzdins and Didzis Gosko 60 | Fixed small crash bug 61 | 62 | Update: 11/06/2016 63 | Person involved: Shu Cai 64 | Fix a bug: not supporting multiple relationships between two (same) nodes 65 | Thanks Marco Damonte (s1333293@sms.ed.ac.uk) for finding this bug! 66 | 67 | Update: 11/14/2016 68 | Person involved: Jon May 69 | Fix a bug: quoted and unquoted strings match (propagation of old bug to github) 70 | Thanks William Dolan 71 | 72 | Update: 12/14/2016 73 | Person involved: Shu Cai 74 | Fix a bug introduced in 11/06/2016: not supporting multiple same-name relationships 75 | Thanks Miguel Ballesteros (miguel.ballesteros@ibm.com) to bring this up. 76 | 77 | Update: 12/18/2016 78 | Person involved: Shu Cai 79 | Add an error message for AMR parsing error, and fix a bug introduced by a typo in the previous commit. 80 | 81 | Update: 01/08/2017 82 | Person involved: Shu Cai 83 | A bit refactoring and cleanup for easier debugging and better code quality. 84 | This change does not affect the functionality of smatch. -------------------------------------------------------------------------------- /smatch_old/.filt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/.filt -------------------------------------------------------------------------------- /smatch_old/.output_jamr.txt.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/.output_jamr.txt.swp -------------------------------------------------------------------------------- /smatch_old/.smatch.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/.smatch.py.swp -------------------------------------------------------------------------------- /smatch_old/.smatch_fromlists.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/.smatch_fromlists.py.swp -------------------------------------------------------------------------------- /smatch_old/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2015 Shu Cai and Kevin Knight 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /smatch_old/README.txt: -------------------------------------------------------------------------------- 1 | Smatch Tool Guideline 2 | 3 | Smatch is a tool to evaluate the semantic overlap between AMR (abstract meaning representation). It can be used to compute the inter agreements of AMRs, and the agreement between an automatic-generated AMR and a gold AMR. For multiple AMR pairs, the smatch tool can provide an overall score for all the AMR pairs. 4 | 5 | I. Content and web demo pages 6 | 7 | This directory contains the Smatch source code and documentation. 8 | 9 | Smatch Webpages 10 | 11 | Smatch tool webpage: http://amr.isi.edu/eval/smatch/compare.html (A quick tutorial can be found on the page) 12 | - input: two AMRs. 13 | - output: the smatch score and the matching/unmatching triples. 14 | 15 | Smatch table tool webpage: http://amr.isi.edu/eval/smatch/table.html 16 | - input: AMR IDs and users. 17 | - output: a table which consists of the smatch scores of every pair of users. 18 | 19 | II. Installation 20 | 21 | Python (version 2.5 or later) is required to run smatch tool. Python 2.7 is recommended. No compilation is necessary. 22 | 23 | III. Usage 24 | 25 | Smatch tool consists of three files written in python. 26 | 27 | 1. smatch.py: for computing the smatch score(s) for multiple AMRs in two files. 28 | 29 | Input: two files which contain AMRs. Each file may contain multiple AMRs, and every two AMRs are separated by a blank line. AMRs can be one-per-line or have multiple lines, as long as there is no blank line in one AMR. 30 | 31 | Input file format: see test_input1.txt, test_input2.txt in the smatch tool folder. AMRs are separated by one or more blank lines, so no blank lines are allowed inside an AMR. Lines starting with a hash (#) will be ignored. 32 | 33 | Output: Smatch score(s) computed 34 | 35 | Usage: python smatch.py [-h] -f F F [-r R] [-v] [-ms] 36 | 37 | arguments: 38 | 39 | -h: help 40 | 41 | -f: two files which contain multiple AMRs. A blank line is used to separate two AMRs. Required arguments. 42 | 43 | -r: restart numer of the heuristic search during computation, optional. Default value: 4. This argument must be a positive integer. Large restart number will reduce the chance of search error, but also increase the running time. Small restart number will reduce the running time as well as increase the change of search error. The default value is by far the best trade-off. User can set a large number if the AMR length is long (search space is large) and user does not need very high calculation speed. 44 | 45 | -v: verbose output, optional. Default value: false. The verbose information includes the triples of each AMR, the matching triple number found for each iterations, and the best matching triple number. It is useful when you try to understand how the program works. User will not need this option most of the time. 46 | 47 | --ms: multiple score, optional. Adding this option will result in a single smatch score for each AMR pair. Otherwise it will output one single weighted score based on all pairs of AMRs. AMRs are weighted according to their number of triples. 48 | Default value: false 49 | 50 | --pr: Output precision and recall as well as the f-score. Default:false 51 | 52 | A typical (and most common) example of running smatch.py: 53 | 54 | python smatch.py -f test_input1.txt test_input2.txt 55 | 56 | This folder includes sample files test_input1.txt and test_input2.txt, so you should be able to run the above command as is. The above command should get the following line: 57 | Document F-score: 0.81 58 | 59 | 2. amr.py: a class to represent AMR structure. It contains a function to parse lines to AMR structure. smatch.py calls it to parse AMRs. 60 | 61 | 3. smatch-table.py: it calls the smatch library to compute the smatch scores for a group of users and multiple AMR IDs, and output a table to show the AMR score between each pair of users. 62 | 63 | Input: AMR ID list and User list. AMR ID list can be stored in a file (-fl file) or given by the command line (-f AMR_ID1, AMR_ID2,...). User list are given by the command line (-p user1,user2,..). If no users are given, the program searches for all the users who annotates all AMRs we require. The user number should be at least 2. 64 | 65 | Input file format: AMR ID list (see sample_file_list the smatch tool folder) 66 | 67 | Output: A table which shows the overall AMR score between every pair of users. 68 | 69 | Usage: python smatch-table.py [-h] [--fl FL] [-f F [F ...]] [-p [P [P ...]]] 70 | [--fd FD] [-r R] [-v] 71 | 72 | optional arguments: 73 | 74 | -h, --help show this help message and exit 75 | 76 | --fl FL AMR ID list file (a file which contains one line of AMR IDs, separated by blank space) 77 | 78 | -f F [F ...] AMR IDs (at least one). If we already have valid AMR ID list file, this option will be ignored. 79 | 80 | -p [P [P ...]] User list (It can be unspecified. When the list is none, the program searches for all the users who annotates all AMRs we require) It is meaningless to give only one user since smatch-table computes agreement between each pair of users. So the number of P is at least 2. 81 | 82 | --fd FD AMR File directory. Default=location on isi file system 83 | 84 | -r R Restart number (Default:4), same as the -r option in smatch.py 85 | 86 | -v Verbose output (Default:False), same as the -v option in smatch.py 87 | 88 | 89 | A typical example of running smatch-table.py: 90 | 91 | python smatch-table.py --fd $amr_root_dir --fl sample_file_list -p ulf knight 92 | 93 | which will compare files 94 | $amr_root_dir/ulf/nw_wsj_0001_1.txt $amr_root_dir/knight/nw_wsj_0001_1.txt 95 | $amr_root_dir/ulf/nw_wsj_0001_2.txt $amr_root_dir/knight/nw_wsj_0001_2.txt 96 | etc. 97 | 98 | Note: smatch-table.py computes smatch scores for every pair of users, so its speed can be slow when the number of user is large or when -P option is not set (in this case we compute smatch scores for all users who annotates the AMRs we require). 99 | -------------------------------------------------------------------------------- /smatch_old/__init__.py: -------------------------------------------------------------------------------- 1 | from . import * 2 | -------------------------------------------------------------------------------- /smatch_old/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/__init__.pyc -------------------------------------------------------------------------------- /smatch_old/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /smatch_old/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /smatch_old/__pycache__/amr.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/__pycache__/amr.cpython-36.pyc -------------------------------------------------------------------------------- /smatch_old/__pycache__/amr.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/__pycache__/amr.cpython-37.pyc -------------------------------------------------------------------------------- /smatch_old/__pycache__/amr_edited.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/__pycache__/amr_edited.cpython-36.pyc -------------------------------------------------------------------------------- /smatch_old/__pycache__/smatch_fromlists.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/__pycache__/smatch_fromlists.cpython-36.pyc -------------------------------------------------------------------------------- /smatch_old/__pycache__/smatch_fromlists.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/__pycache__/smatch_fromlists.cpython-37.pyc -------------------------------------------------------------------------------- /smatch_old/amr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | 4 | """ 5 | AMR (Abstract Meaning Representation) structure 6 | For detailed description of AMR, see http://www.isi.edu/natural-language/amr/a.pdf 7 | 8 | """ 9 | 10 | from collections import defaultdict 11 | import sys 12 | 13 | # change this if needed 14 | ERROR_LOG = sys.stderr 15 | 16 | # change this if needed 17 | DEBUG_LOG = sys.stderr 18 | 19 | 20 | class AMR(object): 21 | """ 22 | AMR is a rooted, labeled graph to represent semantics. 23 | This class has the following members: 24 | nodes: list of node in the graph. Its ith element is the name of the ith node. For example, a node name 25 | could be "a1", "b", "g2", .etc 26 | node_values: list of node labels (values) of the graph. Its ith element is the value associated with node i in 27 | nodes list. In AMR, such value is usually a semantic concept (e.g. "boy", "want-01") 28 | root: root node name 29 | relations: list of edges connecting two nodes in the graph. Each entry is a link between two nodes, i.e. a triple 30 | . In AMR, such link denotes the relation between two semantic 31 | concepts. For example, "arg0" means that one of the concepts is the 0th argument of the other. 32 | attributes: list of edges connecting a node to an attribute name and its value. For example, if the polarity of 33 | some node is negative, there should be an edge connecting this node and "-". A triple < attribute name, 34 | node name, attribute value> is used to represent such attribute. It can also be viewed as a relation. 35 | 36 | """ 37 | def __init__(self, node_list=None, node_value_list=None, relation_list=None, attribute_list=None): 38 | """ 39 | node_list: names of nodes in AMR graph, e.g. "a11", "n" 40 | node_value_list: values of nodes in AMR graph, e.g. "group" for a node named "g" 41 | relation_list: list of relations between two nodes 42 | attribute_list: list of attributes (links between one node and one constant value) 43 | 44 | """ 45 | # initialize AMR graph nodes using list of nodes name 46 | # root, by default, is the first in var_list 47 | 48 | if node_list is None: 49 | self.nodes = [] 50 | self.root = None 51 | else: 52 | self.nodes = node_list[:] 53 | if len(node_list) != 0: 54 | self.root = node_list[0] 55 | else: 56 | self.root = None 57 | if node_value_list is None: 58 | self.node_values = [] 59 | else: 60 | self.node_values = node_value_list[:] 61 | if relation_list is None: 62 | self.relations = [] 63 | else: 64 | self.relations = relation_list[:] 65 | if attribute_list is None: 66 | self.attributes = [] 67 | else: 68 | self.attributes = attribute_list[:] 69 | 70 | def rename_node(self, prefix): 71 | """ 72 | Rename AMR graph nodes to prefix + node_index to avoid nodes with the same name in two different AMRs. 73 | 74 | """ 75 | node_map_dict = {} 76 | # map each node to its new name (e.g. "a1") 77 | for i in range(0, len(self.nodes)): 78 | node_map_dict[self.nodes[i]] = prefix + str(i) 79 | # update node name 80 | for i, v in enumerate(self.nodes): 81 | self.nodes[i] = node_map_dict[v] 82 | # update node name in relations 83 | for i, d in enumerate(self.relations): 84 | new_dict = {} 85 | for k, v in d.items(): 86 | new_dict[node_map_dict[k]] = v 87 | self.relations[i] = new_dict 88 | 89 | def get_triples(self): 90 | """ 91 | Get the triples in three lists. 92 | instance_triple: a triple representing an instance. E.g. instance(w, want-01) 93 | attribute triple: relation of attributes, e.g. polarity(w, - ) 94 | and relation triple, e.g. arg0 (w, b) 95 | 96 | """ 97 | instance_triple = [] 98 | relation_triple = [] 99 | attribute_triple = [] 100 | for i in range(len(self.nodes)): 101 | instance_triple.append(("instance", self.nodes[i], self.node_values[i])) 102 | # k is the other node this node has relation with 103 | # v is relation name 104 | for k, v in self.relations[i].items(): 105 | relation_triple.append((v, self.nodes[i], k)) 106 | # k2 is the attribute name 107 | # v2 is the attribute value 108 | for k2, v2 in self.attributes[i].items(): 109 | attribute_triple.append((k2, self.nodes[i], v2)) 110 | return instance_triple, attribute_triple, relation_triple 111 | 112 | 113 | def get_triples2(self): 114 | """ 115 | Get the triples in two lists: 116 | instance_triple: a triple representing an instance. E.g. instance(w, want-01) 117 | relation_triple: a triple representing all relations. E.g arg0 (w, b) or E.g. polarity(w, - ) 118 | Note that we do not differentiate between attribute triple and relation triple. Both are considered as relation 119 | triples. 120 | All triples are represented by (triple_type, argument 1 of the triple, argument 2 of the triple) 121 | 122 | """ 123 | instance_triple = [] 124 | relation_triple = [] 125 | for i in range(len(self.nodes)): 126 | # an instance triple is instance(node name, node value). 127 | # For example, instance(b, boy). 128 | instance_triple.append(("instance", self.nodes[i], self.node_values[i])) 129 | # k is the other node this node has relation with 130 | # v is relation name 131 | for k, v in self.relations[i].items(): 132 | relation_triple.append((v, self.nodes[i], k)) 133 | # k2 is the attribute name 134 | # v2 is the attribute value 135 | for k2, v2 in self.attributes[i].items(): 136 | relation_triple.append((k2, self.nodes[i], v2)) 137 | return instance_triple, relation_triple 138 | 139 | 140 | def __str__(self): 141 | """ 142 | Generate AMR string for better readability 143 | 144 | """ 145 | lines = [] 146 | for i in range(len(self.nodes)): 147 | lines.append("Node "+ str(i) + " " + self.nodes[i]) 148 | lines.append("Value: " + self.node_values[i]) 149 | lines.append("Relations:") 150 | for k, v in self.relations[i].items(): 151 | lines.append("Node " + k + " via " + v) 152 | for k2, v2 in self.attributes[i].items(): 153 | lines.append("Attribute: " + k2 + " value " + v2) 154 | return "\n".join(lines) 155 | 156 | def __repr__(self): 157 | return self.__str__() 158 | 159 | def output_amr(self): 160 | """ 161 | Output AMR string 162 | 163 | """ 164 | print >> DEBUG_LOG, self.__str__() 165 | 166 | 167 | @staticmethod 168 | def parse_AMR_line(line): 169 | """ 170 | Parse a AMR from line representation to an AMR object. 171 | This parsing algorithm scans the line once and process each character, in a shift-reduce style. 172 | 173 | """ 174 | # Current state. It denotes the last significant symbol encountered. 1 for (, 2 for :, 3 for /, 175 | # and 0 for start state or ')' 176 | # Last significant symbol is ( --- start processing node name 177 | # Last significant symbol is : --- start processing relation name 178 | # Last significant symbol is / --- start processing node value (concept name) 179 | # Last significant symbol is ) --- current node processing is complete 180 | # Note that if these symbols are inside parenthesis, they are not significant symbols. 181 | state = 0 182 | # node stack for parsing 183 | stack = [] 184 | # current not-yet-reduced character sequence 185 | cur_charseq = [] 186 | # key: node name value: node value 187 | node_dict = {} 188 | # node name list (order: occurrence of the node) 189 | node_name_list = [] 190 | # key: node name: value: list of (relation name, the other node name) 191 | node_relation_dict1 = defaultdict(list) 192 | # key: node name, value: list of (attribute name, const value) or (relation name, unseen node name) 193 | node_relation_dict2 = defaultdict(list) 194 | # current relation name 195 | cur_relation_name = "" 196 | # having unmatched quote string 197 | in_quote = False 198 | for i, c in enumerate(line.strip()): 199 | if c == " ": 200 | # allow space in relation name 201 | if state == 2: 202 | cur_charseq.append(c) 203 | continue 204 | if c == "\"": 205 | # flip in_quote value when a quote symbol is encountered 206 | # insert placeholder if in_quote from last symbol 207 | if in_quote: 208 | cur_charseq.append('_') 209 | in_quote = not in_quote 210 | elif c == "(": 211 | # not significant symbol if inside quote 212 | if in_quote: 213 | cur_charseq.append(c) 214 | continue 215 | # get the attribute name 216 | # e.g :arg0 (x ... 217 | # at this point we get "arg0" 218 | if state == 2: 219 | # in this state, current relation name should be empty 220 | if cur_relation_name != "": 221 | print >> ERROR_LOG, "Format error when processing ", line[0:i+1] 222 | return None 223 | # update current relation name for future use 224 | cur_relation_name = "".join(cur_charseq).strip() 225 | cur_charseq[:] = [] 226 | state = 1 227 | elif c == ":": 228 | # not significant symbol if inside quote 229 | if in_quote: 230 | cur_charseq.append(c) 231 | continue 232 | # Last significant symbol is "/". Now we encounter ":" 233 | # Example: 234 | # :OR (o2 / *OR* 235 | # :mod (o3 / official) 236 | # gets node value "*OR*" at this point 237 | if state == 3: 238 | node_value = "".join(cur_charseq) 239 | # clear current char sequence 240 | cur_charseq[:] = [] 241 | # pop node name ("o2" in the above example) 242 | cur_node_name = stack[-1] 243 | # update node name/value map 244 | node_dict[cur_node_name] = node_value 245 | # Last significant symbol is ":". Now we encounter ":" 246 | # Example: 247 | # :op1 w :quant 30 248 | # or :day 14 :month 3 249 | # the problem is that we cannot decide if node value is attribute value (constant) 250 | # or node value (variable) at this moment 251 | elif state == 2: 252 | temp_attr_value = "".join(cur_charseq) 253 | cur_charseq[:] = [] 254 | parts = temp_attr_value.split() 255 | if len(parts) < 2: 256 | print >> ERROR_LOG, "Error in processing; part len < 2", line[0:i+1] 257 | return None 258 | # For the above example, node name is "op1", and node value is "w" 259 | # Note that this node name might not be encountered before 260 | relation_name = parts[0].strip() 261 | relation_value = parts[1].strip() 262 | # We need to link upper level node to the current 263 | # top of stack is upper level node 264 | if len(stack) == 0: 265 | print >> ERROR_LOG, "Error in processing", line[:i], relation_name, relation_value 266 | return None 267 | # if we have not seen this node name before 268 | if relation_value not in node_dict: 269 | node_relation_dict2[stack[-1]].append((relation_name, relation_value)) 270 | else: 271 | node_relation_dict1[stack[-1]].append((relation_name, relation_value)) 272 | state = 2 273 | elif c == "/": 274 | if in_quote: 275 | cur_charseq.append(c) 276 | continue 277 | # Last significant symbol is "(". Now we encounter "/" 278 | # Example: 279 | # (d / default-01 280 | # get "d" here 281 | if state == 1: 282 | node_name = "".join(cur_charseq) 283 | cur_charseq[:] = [] 284 | # if this node name is already in node_dict, it is duplicate 285 | if node_name in node_dict: 286 | print >> ERROR_LOG, "Duplicate node name ", node_name, " in parsing AMR" 287 | return None 288 | # push the node name to stack 289 | stack.append(node_name) 290 | # add it to node name list 291 | node_name_list.append(node_name) 292 | # if this node is part of the relation 293 | # Example: 294 | # :arg1 (n / nation) 295 | # cur_relation_name is arg1 296 | # node name is n 297 | # we have a relation arg1(upper level node, n) 298 | if cur_relation_name != "": 299 | # if relation name ends with "-of", e.g."arg0-of", 300 | # it is reverse of some relation. For example, if a is "arg0-of" b, 301 | # we can also say b is "arg0" a. 302 | # If the relation name ends with "-of", we store the reverse relation. 303 | if not cur_relation_name.endswith("-of"): 304 | # stack[-2] is upper_level node we encountered, as we just add node_name to stack 305 | node_relation_dict1[stack[-2]].append((cur_relation_name, node_name)) 306 | else: 307 | # cur_relation_name[:-3] is to delete "-of" 308 | node_relation_dict1[node_name].append((cur_relation_name[:-3], stack[-2])) 309 | # clear current_relation_name 310 | cur_relation_name = "" 311 | else: 312 | # error if in other state 313 | print >> ERROR_LOG, "Error in parsing AMR", line[0:i+1] 314 | return None 315 | state = 3 316 | elif c == ")": 317 | if in_quote: 318 | cur_charseq.append(c) 319 | continue 320 | # stack should be non-empty to find upper level node 321 | if len(stack) == 0: 322 | print >> ERROR_LOG, "Unmatched parenthesis at position", i, "in processing", line[0:i+1] 323 | return None 324 | # Last significant symbol is ":". Now we encounter ")" 325 | # Example: 326 | # :op2 "Brown") or :op2 w) 327 | # get \"Brown\" or w here 328 | if state == 2: 329 | temp_attr_value = "".join(cur_charseq) 330 | cur_charseq[:] = [] 331 | parts = temp_attr_value.split() 332 | if len(parts) < 2: 333 | print >> ERROR_LOG, "Error processing", line[:i+1], temp_attr_value 334 | return None 335 | relation_name = parts[0].strip() 336 | relation_value = parts[1].strip() 337 | # store reverse of the relation 338 | # we are sure relation_value is a node here, as "-of" relation is only between two nodes 339 | if relation_name.endswith("-of"): 340 | node_relation_dict1[relation_value].append((relation_name[:-3], stack[-1])) 341 | # attribute value not seen before 342 | # Note that it might be a constant attribute value, or an unseen node 343 | # process this after we have seen all the node names 344 | elif relation_value not in node_dict: 345 | node_relation_dict2[stack[-1]].append((relation_name, relation_value)) 346 | else: 347 | node_relation_dict1[stack[-1]].append((relation_name, relation_value)) 348 | # Last significant symbol is "/". Now we encounter ")" 349 | # Example: 350 | # :arg1 (n / nation) 351 | # we get "nation" here 352 | elif state == 3: 353 | node_value = "".join(cur_charseq) 354 | cur_charseq[:] = [] 355 | cur_node_name = stack[-1] 356 | # map node name to its value 357 | node_dict[cur_node_name] = node_value 358 | # pop from stack, as the current node has been processed 359 | stack.pop() 360 | cur_relation_name = "" 361 | state = 0 362 | else: 363 | # not significant symbols, so we just shift. 364 | cur_charseq.append(c) 365 | #create data structures to initialize an AMR 366 | node_value_list = [] 367 | relation_list = [] 368 | attribute_list = [] 369 | for v in node_name_list: 370 | if v not in node_dict: 371 | print >> ERROR_LOG, "Error: Node name not found", v 372 | return None 373 | else: 374 | node_value_list.append(node_dict[v]) 375 | # build relation map and attribute map for this node 376 | relation_dict = {} 377 | attribute_dict = {} 378 | if v in node_relation_dict1: 379 | for v1 in node_relation_dict1[v]: 380 | relation_dict[v1[1]] = v1[0] 381 | if v in node_relation_dict2: 382 | for v2 in node_relation_dict2[v]: 383 | # if value is in quote, it is a constant value 384 | # strip the quote and put it in attribute map 385 | if v2[1][0] == "\"" and v2[1][-1] == "\"": 386 | attribute_dict[v2[0]] = v2[1][1:-1] 387 | # if value is a node name 388 | elif v2[1] in node_dict: 389 | relation_dict[v2[1]] = v2[0] 390 | else: 391 | attribute_dict[v2[0]] = v2[1] 392 | # each node has a relation map and attribute map 393 | relation_list.append(relation_dict) 394 | attribute_list.append(attribute_dict) 395 | # add TOP as an attribute. The attribute value is the top node value 396 | attribute_list[0]["TOP"] = node_value_list[0] 397 | result_amr = AMR(node_name_list, node_value_list, relation_list, attribute_list) 398 | return result_amr 399 | 400 | # test AMR parsing 401 | # a unittest can also be used. 402 | if __name__ == "__main__": 403 | if len(sys.argv) < 2: 404 | print >> ERROR_LOG, "No file given" 405 | exit(1) 406 | amr_count = 1 407 | for line in open(sys.argv[1]): 408 | cur_line = line.strip() 409 | if cur_line == "" or cur_line.startswith("#"): 410 | continue 411 | print >> DEBUG_LOG, "AMR", amr_count 412 | current = AMR.parse_AMR_line(cur_line) 413 | current.output_amr() 414 | amr_count += 1 415 | -------------------------------------------------------------------------------- /smatch_old/amr.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/amr.pyc -------------------------------------------------------------------------------- /smatch_old/amr_edited.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | 4 | """ 5 | AMR (Abstract Meaning Representation) structure 6 | For detailed description of AMR, see http://www.isi.edu/natural-language/amr/a.pdf 7 | 8 | """ 9 | import collections 10 | from collections import defaultdict 11 | import sys 12 | 13 | # change this if needed 14 | ERROR_LOG = sys.stderr 15 | 16 | # change this if needed 17 | DEBUG_LOG = sys.stderr 18 | 19 | 20 | class AMR(object): 21 | """ 22 | AMR is a rooted, labeled graph to represent semantics. 23 | This class has the following members: 24 | nodes: list of node in the graph. Its ith element is the name of the ith node. For example, a node name 25 | could be "a1", "b", "g2", .etc 26 | node_values: list of node labels (values) of the graph. Its ith element is the value associated with node i in 27 | nodes list. In AMR, such value is usually a semantic concept (e.g. "boy", "want-01") 28 | root: root node name 29 | relations: list of edges connecting two nodes in the graph. Each entry is a link between two nodes, i.e. a triple 30 | . In AMR, such link denotes the relation between two semantic 31 | concepts. For example, "arg0" means that one of the concepts is the 0th argument of the other. 32 | attributes: list of edges connecting a node to an attribute name and its value. For example, if the polarity of 33 | some node is negative, there should be an edge connecting this node and "-". A triple < attribute name, 34 | node name, attribute value> is used to represent such attribute. It can also be viewed as a relation. 35 | 36 | """ 37 | def __init__(self, node_list=None, node_value_list=None, relation_list=None, attribute_list=None, reent=None, allrelations=None): 38 | """ 39 | node_list: names of nodes in AMR graph, e.g. "a11", "n" 40 | node_value_list: values of nodes in AMR graph, e.g. "group" for a node named "g" 41 | relation_list: list of relations between two nodes 42 | attribute_list: list of attributes (links between one node and one constant value) 43 | 44 | """ 45 | # initialize AMR graph nodes using list of nodes name 46 | # root, by default, is the first in var_list 47 | attribute_list2 = [] 48 | for dct in attribute_list: 49 | dct2 = collections.OrderedDict() 50 | for item in dct: 51 | if len(dct[item]) > 1 and dct[item].endswith("_"): 52 | dct[item] = '"' + dct[item][0:-1] + '"' 53 | dct2[item] = dct[item] 54 | attribute_list2.append(dct2) 55 | reent2 = [] 56 | for r in reent: 57 | if len(r[2]) > 1 and r[2].endswith("_"): 58 | reent2.append((r[0], r[1], '"' + r[2][0:-1] + '"')) 59 | else: 60 | reent2.append(r) 61 | allrelations2 = [] 62 | for r in allrelations: 63 | if len(r[2]) > 1 and r[2].endswith("_"): 64 | allrelations2.append((r[0], r[1], '"' + r[2][0:-1] + '"')) 65 | else: 66 | allrelations2.append(r) 67 | if node_list is None: 68 | self.nodes = [] 69 | self.root = None 70 | else: 71 | self.nodes = node_list[:] 72 | if len(node_list) != 0: 73 | self.root = node_list[0] 74 | else: 75 | self.root = None 76 | if node_value_list is None: 77 | self.node_values = [] 78 | else: 79 | self.node_values = node_value_list[:] 80 | if relation_list is None: 81 | self.relations = [] 82 | else: 83 | self.relations = relation_list[:] 84 | if attribute_list2 is None: 85 | self.attributes = [] 86 | else: 87 | self.attributes = attribute_list2[:] 88 | 89 | self.reent= reent2 90 | self.allrelations = allrelations2 91 | 92 | def rename_node(self, prefix): 93 | """ 94 | Rename AMR graph nodes to prefix + node_index to avoid nodes with the same name in two different AMRs. 95 | 96 | """ 97 | node_map_dict = {} 98 | # map each node to its new name (e.g. "a1") 99 | for i in range(0, len(self.nodes)): 100 | node_map_dict[self.nodes[i]] = prefix + str(i) 101 | # update node name 102 | for i, v in enumerate(self.nodes): 103 | self.nodes[i] = node_map_dict[v] 104 | # update node name in relations 105 | for i, d in enumerate(self.relations): 106 | new_dict = {} 107 | for k, v_lst in d.items(): 108 | for v in v_lst: 109 | if node_map_dict[k] not in new_dict: 110 | new_dict[node_map_dict[k]] = [v] 111 | else: 112 | new_dict[node_map_dict[k]].append(v) 113 | self.relations[i] = new_dict 114 | 115 | def get_triples(self): 116 | """ 117 | Get the triples in three lists. 118 | instance_triple: a triple representing an instance. E.g. instance(w, want-01) 119 | attribute triple: relation of attributes, e.g. polarity(w, - ) 120 | and relation triple, e.g. arg0 (w, b) 121 | 122 | """ 123 | instance_triple = [] 124 | relation_triple = [] 125 | attribute_triple = [] 126 | for i in range(len(self.nodes)): 127 | instance_triple.append(("instance", self.nodes[i], self.node_values[i])) 128 | # k is the other node this node has relation with 129 | # v is relation name 130 | for k, v_lst in self.relations[i].items(): 131 | for v in v_lst: 132 | relation_triple.append((v, self.nodes[i], k)) 133 | # k2 is the attribute name 134 | # v2 is the attribute value 135 | for k2, v2 in self.attributes[i].items(): 136 | attribute_triple.append((k2, self.nodes[i], v2)) 137 | return instance_triple, attribute_triple, relation_triple 138 | 139 | 140 | def get_triples2(self): 141 | """ 142 | Get the triples in two lists: 143 | instance_triple: a triple representing an instance. E.g. instance(w, want-01) 144 | relation_triple: a triple representing all relations. E.g arg0 (w, b) or E.g. polarity(w, - ) 145 | Note that we do not differentiate between attribute triple and relation triple. Both are considered as relation 146 | triples. 147 | All triples are represented by (triple_type, argument 1 of the triple, argument 2 of the triple) 148 | 149 | """ 150 | instance_triple = [] 151 | relation_triple = [] 152 | for i in range(len(self.nodes)): 153 | # an instance triple is instance(node name, node value). 154 | # For example, instance(b, boy). 155 | instance_triple.append(("instance", self.nodes[i], self.node_values[i])) 156 | # k is the other node this node has relation with 157 | # v is relation name 158 | for k, v_lst in self.relations[i].items(): 159 | for v in v_lst: 160 | relation_triple.append((v, self.nodes[i], k)) 161 | # k2 is the attribute name 162 | # v2 is the attribute value 163 | for k2, v2 in self.attributes[i].items(): 164 | relation_triple.append((k2, self.nodes[i], v2)) 165 | return instance_triple, relation_triple 166 | 167 | def get_triples3(self): 168 | relation_triple = [(self.nodes[0],"TOP",self.node_values[0])] 169 | relation_triple.extend(self.allrelations) 170 | return relation_triple 171 | 172 | def __str__(self): 173 | """ 174 | Generate AMR string for better readability 175 | 176 | """ 177 | lines = [] 178 | for i in range(len(self.nodes)): 179 | lines.append("Node "+ str(i) + " " + self.nodes[i]) 180 | lines.append("Value: " + self.node_values[i]) 181 | lines.append("Relations:") 182 | for k, v_lst in self.relations[i].items(): 183 | for v in v_lst: 184 | lines.append("Node " + k + " via " + v) 185 | for k2, v2 in self.attributes[i].items(): 186 | lines.append("Attribute: " + k2 + " value " + v2) 187 | return "\n".join(lines) 188 | 189 | def __repr__(self): 190 | return self.__str__() 191 | 192 | def output_amr(self): 193 | """ 194 | Output AMR string 195 | 196 | """ 197 | print >> DEBUG_LOG, self.__str__() 198 | 199 | 200 | @staticmethod 201 | def parse_AMR_line(line, normalize_inv = True): 202 | """ 203 | Parse a AMR from line representation to an AMR object. 204 | This parsing algorithm scans the line once and process each character, in a shift-reduce style. 205 | 206 | """ 207 | # Current state. It denotes the last significant symbol encountered. 1 for (, 2 for :, 3 for /, 208 | # and 0 for start state or ')' 209 | # Last significant symbol is ( --- start processing node name 210 | # Last significant symbol is : --- start processing relation name 211 | # Last significant symbol is / --- start processing node value (concept name) 212 | # Last significant symbol is ) --- current node processing is complete 213 | # Note that if these symbols are inside parenthesis, they are not significant symbols. 214 | state = 0 215 | stack = [] 216 | # current not-yet-reduced character sequence 217 | cur_charseq = [] 218 | # key: node name value: node value 219 | node_dict = collections.OrderedDict() 220 | # node name list (order: occurrence of the node) 221 | node_name_list = [] 222 | # key: node name: value: list of (relation name, the other node name) 223 | node_relation_dict1 = defaultdict(list) 224 | # key: node name, value: list of (attribute name, const value) or (relation name, unseen node name) 225 | node_relation_dict2 = defaultdict(list) 226 | allrelations = [] 227 | reent = [] 228 | # current relation name 229 | cur_relation_name = "" 230 | # having unmatched quote string 231 | in_quote = False 232 | for i, c in enumerate(line.strip()): 233 | if c == " ": 234 | # allow space in relation name 235 | if state == 2: 236 | cur_charseq.append(c) 237 | continue 238 | if c == "\"": 239 | # flip in_quote value when a quote symbol is encountered 240 | # insert placeholder if in_quote from last symbol 241 | if in_quote: 242 | cur_charseq.append('_') 243 | in_quote = not in_quote 244 | elif c == "(": 245 | # not significant symbol if inside quote 246 | if in_quote: 247 | cur_charseq.append(c) 248 | continue 249 | # get the attribute name 250 | # e.g :arg0 (x ... 251 | # at this point we get "arg0" 252 | if state == 2: 253 | # in this state, current relation name should be empty 254 | if cur_relation_name != "": 255 | print >> ERROR_LOG, "Format error when processing ", line[0:i+1] 256 | return None 257 | # update current relation name for future use 258 | cur_relation_name = "".join(cur_charseq).strip() 259 | cur_charseq[:] = [] 260 | state = 1 261 | elif c == ":": 262 | # not significant symbol if inside quote 263 | if in_quote: 264 | cur_charseq.append(c) 265 | continue 266 | # Last significant symbol is "/". Now we encounter ":" 267 | # Example: 268 | # :OR (o2 / *OR* 269 | # :mod (o3 / official) 270 | # gets node value "*OR*" at this point 271 | if state == 3: 272 | node_value = "".join(cur_charseq) 273 | # clear current char sequence 274 | cur_charseq[:] = [] 275 | # pop node name ("o2" in the above example) 276 | cur_node_name = stack[-1] 277 | # update node name/value map 278 | node_dict[cur_node_name] = node_value 279 | # Last significant symbol is ":". Now we encounter ":" 280 | # Example: 281 | # :op1 w :quant 30 282 | # or :day 14 :month 3 283 | # the problem is that we cannot decide if node value is attribute value (constant) 284 | # or node value (variable) at this moment 285 | elif state == 2: 286 | temp_attr_value = "".join(cur_charseq) 287 | cur_charseq[:] = [] 288 | parts = temp_attr_value.split() 289 | if len(parts) < 2: 290 | print >> ERROR_LOG, "Error in processing; part len < 2", line[0:i+1] 291 | return None 292 | # For the above example, node name is "op1", and node value is "w" 293 | # Note that this node name might not be encountered before 294 | relation_name = parts[0].strip() 295 | relation_value = parts[1].strip() 296 | # We need to link upper level node to the current 297 | # top of stack is upper level node 298 | if len(stack) == 0: 299 | print >> ERROR_LOG, "Error in processing", line[:i], relation_name, relation_value 300 | return None 301 | # if we have not seen this node name before 302 | if relation_name.endswith("-of") and normalize_inv: 303 | allrelations.append((relation_value,relation_name[:-3],stack[-1])) 304 | if relation_value not in node_dict: 305 | node_relation_dict2[relation_value].append((relation_name[:-3], stack[-1])) 306 | else: 307 | node_relation_dict1[relation_value].append((relation_name[:-3], stack[-1])) 308 | reent.append((relation_value,relation_name[:-3],stack[-1])) 309 | else: 310 | allrelations.append((stack[-1],relation_name,relation_value)) 311 | if relation_value not in node_dict: 312 | node_relation_dict2[stack[-1]].append((relation_name, relation_value)) 313 | else: 314 | node_relation_dict1[stack[-1]].append((relation_name, relation_value)) 315 | reent.append((stack[-1],relation_name,relation_value)) 316 | state = 2 317 | elif c == "/": 318 | if in_quote: 319 | cur_charseq.append(c) 320 | continue 321 | # Last significant symbol is "(". Now we encounter "/" 322 | # Example: 323 | # (d / default-01 324 | # get "d" here 325 | if state == 1: 326 | node_name = "".join(cur_charseq) 327 | cur_charseq[:] = [] 328 | # if this node name is already in node_dict, it is duplicate 329 | if node_name in node_dict: 330 | print >> ERROR_LOG, "Duplicate node name ", node_name, " in parsing AMR" 331 | return None 332 | # push the node name to stack 333 | stack.append(node_name) 334 | # add it to node name list 335 | node_name_list.append(node_name) 336 | # if this node is part of the relation 337 | # Example: 338 | # :arg1 (n / nation) 339 | # cur_relation_name is arg1 340 | # node name is n 341 | # we have a relation arg1(upper level node, n) 342 | if cur_relation_name != "": 343 | # if relation name ends with "-of", e.g."arg0-of", 344 | # it is reverse of some relation. For example, if a is "arg0-of" b, 345 | # we can also say b is "arg0" a. 346 | # If the relation name ends with "-of", we store the reverse relation. 347 | if (not cur_relation_name.endswith("-of")) or normalize_inv == False: 348 | # stack[-2] is upper_level node we encountered, as we just add node_name to stack 349 | node_relation_dict1[stack[-2]].append((cur_relation_name, node_name)) 350 | allrelations.append((stack[-2],cur_relation_name, node_name)) 351 | #if node_name in node_name_list: 352 | # reent.append((stack[-2],cur_relation_name,node_name)) 353 | else: 354 | # cur_relation_name[:-3] is to delete "-of" 355 | node_relation_dict1[node_name].append((cur_relation_name[:-3], stack[-2])) 356 | allrelations.append((node_name,cur_relation_name[:-3], stack[-2])) 357 | # clear current_relation_name 358 | cur_relation_name = "" 359 | else: 360 | # error if in other state 361 | print >> ERROR_LOG, "Error in parsing AMR", line[0:i+1] 362 | return None 363 | state = 3 364 | elif c == ")": 365 | if in_quote: 366 | cur_charseq.append(c) 367 | continue 368 | # stack should be non-empty to find upper level node 369 | if len(stack) == 0: 370 | print >> ERROR_LOG, "Unmatched parenthesis at position", i, "in processing", line[0:i+1] 371 | return None 372 | # Last significant symbol is ":". Now we encounter ")" 373 | # Example: 374 | # :op2 "Brown") or :op2 w) 375 | # get \"Brown\" or w here 376 | if state == 2: 377 | temp_attr_value = "".join(cur_charseq) 378 | cur_charseq[:] = [] 379 | parts = temp_attr_value.split() 380 | if len(parts) < 2: 381 | print >> ERROR_LOG, "Error processing", line[:i+1], temp_attr_value 382 | return None 383 | relation_name = parts[0].strip() 384 | relation_value = parts[1].strip() 385 | # store reverse of the relation 386 | # we are sure relation_value is a node here, as "-of" relation is only between two nodes 387 | if relation_name.endswith("-of") and normalize_inv: 388 | allrelations.append((relation_value,relation_name[:-3], stack[-1])) 389 | node_relation_dict1[relation_value].append((relation_name[:-3], stack[-1])) 390 | # attribute value not seen before 391 | # Note that it might be a constant attribute value, or an unseen node 392 | # process this after we have seen all the node names 393 | else: 394 | allrelations.append((stack[-1],relation_name, relation_value)) 395 | if relation_value not in node_dict: 396 | node_relation_dict2[stack[-1]].append((relation_name, relation_value)) 397 | else: 398 | node_relation_dict1[stack[-1]].append((relation_name, relation_value)) 399 | #if relation_value in node_name_list: 400 | reent.append((stack[-1],relation_name,relation_value)) 401 | # Last significant symbol is "/". Now we encounter ")" 402 | # Example: 403 | # :arg1 (n / nation) 404 | # we get "nation" here 405 | elif state == 3: 406 | node_value = "".join(cur_charseq) 407 | cur_charseq[:] = [] 408 | cur_node_name = stack[-1] 409 | # map node name to its value 410 | node_dict[cur_node_name] = node_value 411 | #if node_value in node_name_list: 412 | # reent.append((stack[-1],relation_name,relation_value)) 413 | # pop from stack, as the current node has been processed 414 | stack.pop() 415 | cur_relation_name = "" 416 | state = 0 417 | else: 418 | # not significant symbols, so we just shift. 419 | cur_charseq.append(c) 420 | #create data structures to initialize an AMR 421 | node_value_list = [] 422 | relation_list = [] 423 | attribute_list = [] 424 | for v in node_name_list: 425 | if v not in node_dict: 426 | print >> ERROR_LOG, "Error: Node name not found", v 427 | return None 428 | else: 429 | node_value_list.append(node_dict[v]) 430 | # build relation map and attribute map for this node 431 | relation_dict = collections.OrderedDict() 432 | attribute_dict = collections.OrderedDict() 433 | if v in node_relation_dict1: 434 | for v1 in node_relation_dict1[v]: 435 | if v1[1] not in relation_dict: 436 | relation_dict[v1[1]] = [v1[0]] 437 | else: 438 | relation_dict[v1[1]].append(v1[0]) 439 | if v in node_relation_dict2: 440 | for v2 in node_relation_dict2[v]: 441 | # if value is in quote, it is a constant value 442 | # strip the quote and put it in attribute map 443 | if v2[1][0] == "\"" and v2[1][-1] == "\"": 444 | attribute_dict[v2[0]] = v2[1][1:-1] 445 | # if value is a node name 446 | elif v2[1] in node_dict: 447 | if v2[1] not in relation_dict: 448 | relation_dict[v2[1]] = [v2[0]] 449 | else: 450 | relation_dict[v2[1]].append(v2[0]) 451 | else: 452 | attribute_dict[v2[0]] = v2[1] 453 | # each node has a relation map and attribute map 454 | relation_list.append(relation_dict) 455 | attribute_list.append(attribute_dict) 456 | # add TOP as an attribute. The attribute value is the top node value 457 | attribute_list[0]["TOP"] = node_value_list[0] 458 | result_amr = AMR(node_name_list, node_value_list, relation_list, attribute_list, reent, allrelations) 459 | return result_amr 460 | 461 | # test AMR parsing 462 | # a unittest can also be used. 463 | if __name__ == "__main__": 464 | if len(sys.argv) < 2: 465 | print >> ERROR_LOG, "No file given" 466 | exit(1) 467 | amr_count = 1 468 | for line in open(sys.argv[1]): 469 | cur_line = line.strip() 470 | if cur_line == "" or cur_line.startswith("#"): 471 | continue 472 | print >> DEBUG_LOG, "AMR", amr_count 473 | current = AMR.parse_AMR_line(cur_line) 474 | current.output_amr() 475 | amr_count += 1 -------------------------------------------------------------------------------- /smatch_old/amr_edited.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/amr_edited.pyc -------------------------------------------------------------------------------- /smatch_old/sample_file_list: -------------------------------------------------------------------------------- 1 | nw_wsj_0001_1 nw_wsj_0001_2 nw_wsj_0002_1 nw_wsj_0003_1 2 | -------------------------------------------------------------------------------- /smatch_old/smatch-table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import amr 4 | import sys 5 | import subprocess 6 | import smatch 7 | import os 8 | import random 9 | import time 10 | 11 | ERROR_LOG = sys.stderr 12 | 13 | DEBUG_LOG = sys.stderr 14 | 15 | verbose = False 16 | 17 | # directory on isi machine 18 | # change if needed 19 | isi_dir_pre = "/nfs/web/isi.edu/cgi-bin/div3/mt/save-amr" 20 | 21 | 22 | def get_names(file_dir, files): 23 | """ 24 | Get the annotator name list based on a list of files 25 | Args: 26 | file_dir: AMR file folder 27 | files: a list of AMR names, e.g. nw_wsj_0001_1 28 | 29 | Returns: 30 | a list of user names who annotate all the files 31 | """ 32 | # for each user, check if they have files available 33 | # return user name list 34 | total_list = [] 35 | name_list = [] 36 | get_sub = False 37 | for path, subdir, dir_files in os.walk(file_dir): 38 | if not get_sub: 39 | total_list = subdir[:] 40 | get_sub = True 41 | else: 42 | break 43 | for user in total_list: 44 | has_file = True 45 | for f in files: 46 | file_path = file_dir + user + "/" + f + ".txt" 47 | if not os.path.exists(file_path): 48 | has_file = False 49 | break 50 | if has_file: 51 | name_list.append(user) 52 | if len(name_list) == 0: 53 | print >> ERROR_LOG, "********Error: Cannot find any user who completes the files*************" 54 | return name_list 55 | 56 | 57 | def compute_files(user1, user2, file_list, dir_pre, start_num): 58 | 59 | """ 60 | Compute the smatch scores for a file list between two users 61 | Args: 62 | user1: user 1 name 63 | user2: user 2 name 64 | file_list: file list 65 | dir_pre: the file location prefix 66 | start_num: the number of restarts in smatch 67 | Returns: 68 | smatch f score. 69 | 70 | """ 71 | match_total = 0 72 | test_total = 0 73 | gold_total = 0 74 | for fi in file_list: 75 | file1 = dir_pre + user1 + "/" + fi + ".txt" 76 | file2 = dir_pre + user2 + "/" + fi + ".txt" 77 | if not os.path.exists(file1): 78 | print >> ERROR_LOG, "*********Error: ", file1, "does not exist*********" 79 | return -1.00 80 | if not os.path.exists(file2): 81 | print >> ERROR_LOG, "*********Error: ", file2, "does not exist*********" 82 | return -1.00 83 | try: 84 | file1_h = open(file1, "r") 85 | file2_h = open(file2, "r") 86 | except IOError: 87 | print >> ERROR_LOG, "Cannot open the files", file1, file2 88 | break 89 | cur_amr1 = smatch.get_amr_line(file1_h) 90 | cur_amr2 = smatch.get_amr_line(file2_h) 91 | if cur_amr1 == "": 92 | print >> ERROR_LOG, "AMR 1 is empty" 93 | continue 94 | if cur_amr2 == "": 95 | print >> ERROR_LOG, "AMR 2 is empty" 96 | continue 97 | amr1 = amr.AMR.parse_AMR_line(cur_amr1) 98 | amr2 = amr.AMR.parse_AMR_line(cur_amr2) 99 | test_label = "a" 100 | gold_label = "b" 101 | amr1.rename_node(test_label) 102 | amr2.rename_node(gold_label) 103 | (test_inst, test_rel1, test_rel2) = amr1.get_triples() 104 | (gold_inst, gold_rel1, gold_rel2) = amr2.get_triples() 105 | if verbose: 106 | print >> DEBUG_LOG, "Instance triples of file 1:", len(test_inst) 107 | print >> DEBUG_LOG, test_inst 108 | print >> DEBUG_LOG, "Attribute triples of file 1:", len(test_rel1) 109 | print >> DEBUG_LOG, test_rel1 110 | print >> DEBUG_LOG, "Relation triples of file 1:", len(test_rel2) 111 | print >> DEBUG_LOG, test_rel2 112 | print >> DEBUG_LOG, "Instance triples of file 2:", len(gold_inst) 113 | print >> DEBUG_LOG, gold_inst 114 | print >> DEBUG_LOG, "Attribute triples of file 2:", len(gold_rel1) 115 | print >> DEBUG_LOG, gold_rel1 116 | print >> DEBUG_LOG, "Relation triples of file 2:", len(gold_rel2) 117 | print >> DEBUG_LOG, gold_rel2 118 | (best_match, best_match_num) = smatch.get_best_match(test_inst, test_rel1, test_rel2, 119 | gold_inst, gold_rel1, gold_rel2, 120 | test_label, gold_label) 121 | if verbose: 122 | print >> DEBUG_LOG, "best match number", best_match_num 123 | print >> DEBUG_LOG, "Best Match:", smatch.print_alignment(best_match, test_inst, gold_inst) 124 | match_total += best_match_num 125 | test_total += (len(test_inst) + len(test_rel1) + len(test_rel2)) 126 | gold_total += (len(gold_inst) + len(gold_rel1) + len(gold_rel2)) 127 | smatch.match_triple_dict.clear() 128 | (precision, recall, f_score) = smatch.compute_f(match_total, test_total, gold_total) 129 | return "%.2f" % f_score 130 | 131 | 132 | def get_max_width(table, index): 133 | return max([len(str(row[index])) for row in table]) 134 | 135 | 136 | def pprint_table(table): 137 | """ 138 | Print a table in pretty format 139 | 140 | """ 141 | col_paddings = [] 142 | for i in range(len(table[0])): 143 | col_paddings.append(get_max_width(table,i)) 144 | for row in table: 145 | print row[0].ljust(col_paddings[0] + 1), 146 | for i in range(1, len(row)): 147 | col = str(row[i]).rjust(col_paddings[i]+2) 148 | print col, 149 | print "\n" 150 | 151 | def build_arg_parser(): 152 | """ 153 | Build an argument parser using argparse. Use it when python version is 2.7 or later. 154 | 155 | """ 156 | parser = argparse.ArgumentParser(description="Smatch table calculator -- arguments") 157 | parser.add_argument("--fl", type=argparse.FileType('r'), help='AMR ID list file') 158 | parser.add_argument('-f', nargs='+', help='AMR IDs (at least one)') 159 | parser.add_argument("-p", nargs='*', help="User list (can be none)") 160 | parser.add_argument("--fd", default=isi_dir_pre, help="AMR File directory. Default=location on isi machine") 161 | parser.add_argument('-r', type=int, default=4, help='Restart number (Default:4)') 162 | parser.add_argument('-v', action='store_true', help='Verbose output (Default:False)') 163 | return parser 164 | 165 | def build_arg_parser2(): 166 | """ 167 | Build an argument parser using optparse. Use it when python version is 2.5 or 2.6. 168 | 169 | """ 170 | usage_str = "Smatch table calculator -- arguments" 171 | parser = optparse.OptionParser(usage=usage_str) 172 | parser.add_option("--fl", dest="fl", type="string", help='AMR ID list file') 173 | parser.add_option("-f", dest="f", type="string", action="callback", callback=cb, help="AMR IDs (at least one)") 174 | parser.add_option("-p", dest="p", type="string", action="callback", callback=cb, help="User list") 175 | parser.add_option("--fd", dest="fd", type="string", help="file directory") 176 | parser.add_option("-r", "--restart", dest="r", type="int", help='Restart number (Default: 4)') 177 | parser.add_option("-v", "--verbose", action='store_true', dest="v", help='Verbose output (Default:False)') 178 | parser.set_defaults(r=4, v=False, ms=False, fd=isi_dir_pre) 179 | return parser 180 | 181 | 182 | def cb(option, value, parser): 183 | """ 184 | Callback function to handle variable number of arguments in optparse 185 | 186 | """ 187 | arguments = [value] 188 | for arg in parser.rargs: 189 | if arg[0] != "-": 190 | arguments.append(arg) 191 | else: 192 | del parser.rargs[:len(arguments)] 193 | break 194 | if getattr(parser.values, option.dest): 195 | arguments.extend(getattr(parser.values, option.dest)) 196 | setattr(parser.values, option.dest, arguments) 197 | 198 | 199 | def check_args(args): 200 | """ 201 | Parse arguments and check if the arguments are valid 202 | 203 | """ 204 | if not os.path.exists(args.fd): 205 | print >> ERROR_LOG, "Not a valid path", args.fd 206 | return [], [], False 207 | if args.fl is not None: 208 | # we already ensure the file can be opened and opened the file 209 | file_line = args.fl.readline() 210 | amr_ids = file_line.strip().split() 211 | elif args.f is None: 212 | print >> ERROR_LOG, "No AMR ID was given" 213 | return [], [], False 214 | else: 215 | amr_ids = args.f 216 | names = [] 217 | check_name = True 218 | if args.p is None: 219 | names = get_names(args.fd, amr_ids) 220 | # no need to check names 221 | check_name = False 222 | if len(names) == 0: 223 | print >> ERROR_LOG, "Cannot find any user who tagged these AMR" 224 | return [], [], False 225 | else: 226 | names = args.p 227 | if len(names) == 0: 228 | print >> ERROR_LOG, "No user was given" 229 | return [], [], False 230 | if len(names) == 1: 231 | print >> ERROR_LOG, "Only one user is given. Smatch calculation requires at least two users." 232 | return [], [], False 233 | if "consensus" in names: 234 | con_index = names.index("consensus") 235 | names.pop(con_index) 236 | names.append("consensus") 237 | # check if all the AMR_id and user combinations are valid 238 | if check_name: 239 | pop_name = [] 240 | for i, name in enumerate(names): 241 | for amr in amr_ids: 242 | amr_path = args.fd + name + "/" + amr + ".txt" 243 | if not os.path.exists(amr_path): 244 | print >> ERROR_LOG, "User", name, "fails to tag AMR", amr 245 | pop_name.append(i) 246 | break 247 | if len(pop_name) != 0: 248 | pop_num = 0 249 | for p in pop_name: 250 | print >> ERROR_LOG, "Deleting user", names[p - pop_num], "from the name list" 251 | names.pop(p - pop_num) 252 | pop_num += 1 253 | if len(names) < 2: 254 | print >> ERROR_LOG, "Not enough users to evaluate. Smatch requires >2 users who tag all the AMRs" 255 | return "", "", False 256 | return amr_ids, names, True 257 | 258 | 259 | def main(arguments): 260 | global verbose 261 | (ids, names, result) = check_args(arguments) 262 | if arguments.v: 263 | verbose = True 264 | if not result: 265 | return 0 266 | acc_time = 0 267 | len_name = len(names) 268 | table = [] 269 | for i in range(0, len_name + 1): 270 | table.append([]) 271 | table[0].append("") 272 | for i in range(0, len_name): 273 | table[0].append(names[i]) 274 | for i in range(0, len_name): 275 | table[i+1].append(names[i]) 276 | for j in range(0, len_name): 277 | if i != j: 278 | start = time.clock() 279 | table[i+1].append(compute_files(names[i], names[j], ids, args.fd, args.r)) 280 | end = time.clock() 281 | if table[i+1][-1] != -1.0: 282 | acc_time += end-start 283 | else: 284 | table[i+1].append("") 285 | # check table 286 | for i in range(0, len_name + 1): 287 | for j in range(0, len_name + 1): 288 | if i != j: 289 | if table[i][j] != table[j][i]: 290 | if table[i][j] > table[j][i]: 291 | table[j][i] = table[i][j] 292 | else: 293 | table[i][j] = table[j][i] 294 | pprint_table(table) 295 | return acc_time 296 | 297 | 298 | if __name__ == "__main__": 299 | whole_start = time.clock() 300 | parser = None 301 | args = None 302 | if sys.version_info[:2] != (2, 7): 303 | # requires python version >= 2.5 304 | if sys.version_info[0] != 2 or sys.version_info[1] < 5: 305 | print >> ERROR_LOG, "This program requires python 2.5 or later to run. " 306 | exit(1) 307 | import optparse 308 | parser = build_arg_parser2() 309 | (args, opts) = parser.parse_args() 310 | file_handle = None 311 | if args.fl is not None: 312 | try: 313 | file_handle = open(args.fl, "r") 314 | args.fl = file_handle 315 | except IOError: 316 | print >> ERROR_LOG, "The ID list file", args.fl, "does not exist" 317 | args.fl = None 318 | # python version 2.7 319 | else: 320 | import argparse 321 | parser = build_arg_parser() 322 | args = parser.parse_args() 323 | # Regularize fd, add "/" at the end if needed 324 | if args.fd[-1] != "/": 325 | args.fd += "/" 326 | # acc_time is the smatch calculation time 327 | acc_time = main(args) 328 | whole_end = time.clock() 329 | # time of the whole running process 330 | whole_time = whole_end - whole_start 331 | # print if needed 332 | # print >> ERROR_LOG, "Accumulated computation time", acc_time 333 | # print >> ERROR_LOG, "Total time", whole_time 334 | # print >> ERROR_LOG, "Percentage", float(acc_time)/float(whole_time) 335 | 336 | -------------------------------------------------------------------------------- /smatch_old/smatch_edited.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | 4 | """ 5 | This script computes smatch score between two AMRs. 6 | For detailed description of smatch, see http://www.isi.edu/natural-language/amr/smatch-13.pdf 7 | 8 | """ 9 | import cPickle as pickle 10 | import amr 11 | import os 12 | import random 13 | import sys 14 | import time 15 | 16 | # total number of iteration in smatch computation 17 | iteration_num = 5 18 | 19 | # verbose output switch. 20 | # Default false (no verbose output) 21 | verbose = False 22 | 23 | # single score output switch. 24 | # Default true (compute a single score for all AMRs in two files) 25 | single_score = True 26 | 27 | # precision and recall output switch. 28 | # Default false (do not output precision and recall, just output F score) 29 | pr_flag = False 30 | 31 | # Error log location 32 | ERROR_LOG = sys.stderr 33 | 34 | # Debug log location 35 | DEBUG_LOG = sys.stderr 36 | 37 | # dictionary to save pre-computed node mapping and its resulting triple match count 38 | # key: tuples of node mapping 39 | # value: the matching triple count 40 | match_triple_dict = {} 41 | 42 | def parse_relations(rels, v2c): 43 | var_list = [] 44 | conc_list = [] 45 | for r in rels: 46 | if str(r[1]) not in var_list and str(r[1]) != "TOP" and r[1] in v2c: 47 | var_list.append(str(r[1])) 48 | conc_list.append(str(v2c[r[1]])) 49 | if str(r[2]) not in var_list and r[2] in v2c: 50 | var_list.append(str(r[2])) 51 | conc_list.append(str(v2c[r[2]])) 52 | k = 0 53 | rel_dict = []*len(var_list) 54 | att_dict = []*len(var_list) 55 | for v in var_list: 56 | rel_dict.append({}) 57 | att_dict.append({}) 58 | for i in rels: 59 | if str(i[1]) == str(v) and i[2] in v2c: 60 | rel_dict[k][str(i[2])] = i[0] 61 | att_dict[k][i[0]] = str(v2c[i[2]]) 62 | k += 1 63 | return amr.AMR(var_list, conc_list, rel_dict, att_dict) 64 | 65 | def get_amr_line(input_f): 66 | """ 67 | Read the file containing AMRs. AMRs are separated by a blank line. 68 | Each call of get_amr_line() returns the next available AMR (in one-line form). 69 | Note: this function does not verify if the AMR is valid 70 | 71 | """ 72 | cur_amr = [] 73 | has_content = False 74 | for line in input_f: 75 | line = line.strip() 76 | if line == "": 77 | if not has_content: 78 | # empty lines before current AMR 79 | continue 80 | else: 81 | # end of current AMR 82 | break 83 | if line.strip().startswith("#"): 84 | # ignore the comment line (starting with "#") in the AMR file 85 | continue 86 | else: 87 | has_content = True 88 | cur_amr.append(line.strip()) 89 | return "".join(cur_amr) 90 | 91 | 92 | def build_arg_parser(): 93 | """ 94 | Build an argument parser using argparse. Use it when python version is 2.7 or later. 95 | 96 | """ 97 | parser = argparse.ArgumentParser(description="Smatch calculator -- arguments") 98 | parser.add_argument('-f', nargs=2, required=True, 99 | help='Two files containing AMR pairs. AMRs in each file are separated by a single blank line') 100 | parser.add_argument('-r', type=int, default=4, help='Restart number (Default:4)') 101 | parser.add_argument('-v', action='store_true', help='Verbose output (Default:false)') 102 | parser.add_argument('--ms', action='store_true', default=False, 103 | help='Output multiple scores (one AMR pair a score)' \ 104 | 'instead of a single document-level smatch score (Default: false)') 105 | parser.add_argument('--pr', action='store_true', default=False, 106 | help="Output precision and recall as well as the f-score. Default: false") 107 | return parser 108 | 109 | 110 | def build_arg_parser2(): 111 | """ 112 | Build an argument parser using optparse. Use it when python version is 2.5 or 2.6. 113 | 114 | """ 115 | usage_str = "Smatch calculator -- arguments" 116 | parser = optparse.OptionParser(usage=usage_str) 117 | parser.add_option("-f", "--files", nargs=2, dest="f", type="string", 118 | help='Two files containing AMR pairs. AMRs in each file are ' \ 119 | 'separated by a single blank line. This option is required.') 120 | parser.add_option("-r", "--restart", dest="r", type="int", help='Restart number (Default: 4)') 121 | parser.add_option("-v", "--verbose", action='store_true', dest="v", help='Verbose output (Default:False)') 122 | parser.add_option("--ms", "--multiple_score", action='store_true', dest="ms", 123 | help='Output multiple scores (one AMR pair a score) instead of ' \ 124 | 'a single document-level smatch score (Default: False)') 125 | parser.add_option('--pr', "--precision_recall", action='store_true', dest="pr", 126 | help="Output precision and recall as well as the f-score. Default: false") 127 | parser.set_defaults(r=4, v=False, ms=False, pr=False) 128 | return parser 129 | 130 | 131 | def get_best_match(instance1, attribute1, relation1, 132 | instance2, attribute2, relation2, 133 | prefix1, prefix2): 134 | """ 135 | Get the highest triple match number between two sets of triples via hill-climbing. 136 | Arguments: 137 | instance1: instance triples of AMR 1 ("instance", node name, node value) 138 | attribute1: attribute triples of AMR 1 (attribute name, node name, attribute value) 139 | relation1: relation triples of AMR 1 (relation name, node 1 name, node 2 name) 140 | instance2: instance triples of AMR 2 ("instance", node name, node value) 141 | attribute2: attribute triples of AMR 2 (attribute name, node name, attribute value) 142 | relation2: relation triples of AMR 2 (relation name, node 1 name, node 2 name) 143 | prefix1: prefix label for AMR 1 144 | prefix2: prefix label for AMR 2 145 | Returns: 146 | best_match: the node mapping that results in the highest triple matching number 147 | best_match_num: the highest triple matching number 148 | 149 | """ 150 | # Compute candidate pool - all possible node match candidates. 151 | # In the hill-climbing, we only consider candidate in this pool to save computing time. 152 | # weight_dict is a dictionary that maps a pair of node 153 | (candidate_mappings, weight_dict) = compute_pool(instance1, attribute1, relation1, 154 | instance2, attribute2, relation2, 155 | prefix1, prefix2) 156 | 157 | if verbose: 158 | print >> DEBUG_LOG, "Candidate mappings:" 159 | print >> DEBUG_LOG, candidate_mappings 160 | print >> DEBUG_LOG, "Weight dictionary" 161 | print >> DEBUG_LOG, weight_dict 162 | best_match_num = 0 163 | # initialize best match mapping 164 | # the ith entry is the node index in AMR 2 which maps to the ith node in AMR 1 165 | best_mapping = [-1] * len(instance1) 166 | for i in range(0, iteration_num): 167 | if verbose: 168 | print >> DEBUG_LOG, "Iteration", i 169 | if i == 0: 170 | # smart initialization used for the first round 171 | cur_mapping = smart_init_mapping(candidate_mappings, instance1, instance2) 172 | else: 173 | # random initialization for the other round 174 | cur_mapping = random_init_mapping(candidate_mappings) 175 | # compute current triple match number 176 | match_num = compute_match(cur_mapping, weight_dict) 177 | if verbose: 178 | print >> DEBUG_LOG, "Node mapping at start", cur_mapping 179 | print >> DEBUG_LOG, "Triple match number at start:", match_num 180 | while True: 181 | # get best gain 182 | (gain, new_mapping) = get_best_gain(cur_mapping, candidate_mappings, weight_dict, 183 | len(instance2), match_num) 184 | if verbose: 185 | print >> DEBUG_LOG, "Gain after the hill-climbing", gain 186 | # hill-climbing until there will be no gain for new node mapping 187 | if gain <= 0: 188 | break 189 | # otherwise update match_num and mapping 190 | match_num += gain 191 | cur_mapping = new_mapping[:] 192 | if verbose: 193 | print >> DEBUG_LOG, "Update triple match number to:", match_num 194 | print >> DEBUG_LOG, "Current mapping:", cur_mapping 195 | if match_num > best_match_num: 196 | best_mapping = cur_mapping[:] 197 | best_match_num = match_num 198 | return best_mapping, best_match_num 199 | 200 | 201 | def compute_pool(instance1, attribute1, relation1, 202 | instance2, attribute2, relation2, 203 | prefix1, prefix2): 204 | """ 205 | compute all possible node mapping candidates and their weights (the triple matching number gain resulting from 206 | mapping one node in AMR 1 to another node in AMR2) 207 | 208 | Arguments: 209 | instance1: instance triples of AMR 1 210 | attribute1: attribute triples of AMR 1 (attribute name, node name, attribute value) 211 | relation1: relation triples of AMR 1 (relation name, node 1 name, node 2 name) 212 | instance2: instance triples of AMR 2 213 | attribute2: attribute triples of AMR 2 (attribute name, node name, attribute value) 214 | relation2: relation triples of AMR 2 (relation name, node 1 name, node 2 name 215 | prefix1: prefix label for AMR 1 216 | prefix2: prefix label for AMR 2 217 | Returns: 218 | candidate_mapping: a list of candidate nodes. 219 | The ith element contains the node indices (in AMR 2) the ith node (in AMR 1) can map to. 220 | (resulting in non-zero triple match) 221 | weight_dict: a dictionary which contains the matching triple number for every pair of node mapping. The key 222 | is a node pair. The value is another dictionary. key {-1} is triple match resulting from this node 223 | pair alone (instance triples and attribute triples), and other keys are node pairs that can result 224 | in relation triple match together with the first node pair. 225 | 226 | 227 | """ 228 | candidate_mapping = [] 229 | weight_dict = {} 230 | for i in range(0, len(instance1)): 231 | # each candidate mapping is a set of node indices 232 | candidate_mapping.append(set()) 233 | for j in range(0, len(instance2)): 234 | # if both triples are instance triples and have the same value 235 | if instance1[i][0].lower() == instance2[j][0].lower() \ 236 | and instance1[i][2].lower() == instance2[j][2].lower(): 237 | # get node index by stripping the prefix 238 | node1_index = int(instance1[i][1][len(prefix1):]) 239 | node2_index = int(instance2[j][1][len(prefix2):]) 240 | candidate_mapping[node1_index].add(node2_index) 241 | node_pair = (node1_index, node2_index) 242 | # use -1 as key in weight_dict for instance triples and attribute triples 243 | if node_pair in weight_dict: 244 | weight_dict[node_pair][-1] += 1 245 | else: 246 | weight_dict[node_pair] = {} 247 | weight_dict[node_pair][-1] = 1 248 | for i in range(0, len(attribute1)): 249 | for j in range(0, len(attribute2)): 250 | # if both attribute relation triple have the same relation name and value 251 | if attribute1[i][0].lower() == attribute2[j][0].lower() \ 252 | and attribute1[i][2].lower() == attribute2[j][2].lower(): 253 | node1_index = int(attribute1[i][1][len(prefix1):]) 254 | node2_index = int(attribute2[j][1][len(prefix2):]) 255 | candidate_mapping[node1_index].add(node2_index) 256 | node_pair = (node1_index, node2_index) 257 | # use -1 as key in weight_dict for instance triples and attribute triples 258 | if node_pair in weight_dict: 259 | weight_dict[node_pair][-1] += 1 260 | else: 261 | weight_dict[node_pair] = {} 262 | weight_dict[node_pair][-1] = 1 263 | for i in range(0, len(relation1)): 264 | for j in range(0, len(relation2)): 265 | # if both relation share the same name 266 | if relation1[i][0].lower() == relation2[j][0].lower(): 267 | node1_index_amr1 = int(relation1[i][1][len(prefix1):]) 268 | node1_index_amr2 = int(relation2[j][1][len(prefix2):]) 269 | node2_index_amr1 = int(relation1[i][2][len(prefix1):]) 270 | node2_index_amr2 = int(relation2[j][2][len(prefix2):]) 271 | # add mapping between two nodes 272 | candidate_mapping[node1_index_amr1].add(node1_index_amr2) 273 | candidate_mapping[node2_index_amr1].add(node2_index_amr2) 274 | node_pair1 = (node1_index_amr1, node1_index_amr2) 275 | node_pair2 = (node2_index_amr1, node2_index_amr2) 276 | if node_pair2 != node_pair1: 277 | # update weight_dict weight. Note that we need to update both entries for future search 278 | # i.e weight_dict[node_pair1][node_pair2] 279 | # weight_dict[node_pair2][node_pair1] 280 | if node1_index_amr1 > node2_index_amr1: 281 | # swap node_pair1 and node_pair2 282 | node_pair1 = (node2_index_amr1, node2_index_amr2) 283 | node_pair2 = (node1_index_amr1, node1_index_amr2) 284 | if node_pair1 in weight_dict: 285 | if node_pair2 in weight_dict[node_pair1]: 286 | weight_dict[node_pair1][node_pair2] += 1 287 | else: 288 | weight_dict[node_pair1][node_pair2] = 1 289 | else: 290 | weight_dict[node_pair1] = {} 291 | weight_dict[node_pair1][-1] = 0 292 | weight_dict[node_pair1][node_pair2] = 1 293 | if node_pair2 in weight_dict: 294 | if node_pair1 in weight_dict[node_pair2]: 295 | weight_dict[node_pair2][node_pair1] += 1 296 | else: 297 | weight_dict[node_pair2][node_pair1] = 1 298 | else: 299 | weight_dict[node_pair2] = {} 300 | weight_dict[node_pair2][-1] = 0 301 | weight_dict[node_pair2][node_pair1] = 1 302 | else: 303 | # two node pairs are the same. So we only update weight_dict once. 304 | # this generally should not happen. 305 | if node_pair1 in weight_dict: 306 | weight_dict[node_pair1][-1] += 1 307 | else: 308 | weight_dict[node_pair1] = {} 309 | weight_dict[node_pair1][-1] = 1 310 | return candidate_mapping, weight_dict 311 | 312 | 313 | def smart_init_mapping(candidate_mapping, instance1, instance2): 314 | """ 315 | Initialize mapping based on the concept mapping (smart initialization) 316 | Arguments: 317 | candidate_mapping: candidate node match list 318 | instance1: instance triples of AMR 1 319 | instance2: instance triples of AMR 2 320 | Returns: 321 | initialized node mapping between two AMRs 322 | 323 | """ 324 | random.seed() 325 | matched_dict = {} 326 | result = [] 327 | # list to store node indices that have no concept match 328 | no_word_match = [] 329 | for i, candidates in enumerate(candidate_mapping): 330 | if len(candidates) == 0: 331 | # no possible mapping 332 | result.append(-1) 333 | continue 334 | # node value in instance triples of AMR 1 335 | value1 = instance1[i][2] 336 | for node_index in candidates: 337 | value2 = instance2[node_index][2] 338 | # find the first instance triple match in the candidates 339 | # instance triple match is having the same concept value 340 | if value1 == value2: 341 | if node_index not in matched_dict: 342 | result.append(node_index) 343 | matched_dict[node_index] = 1 344 | break 345 | if len(result) == i: 346 | no_word_match.append(i) 347 | result.append(-1) 348 | # if no concept match, generate a random mapping 349 | for i in no_word_match: 350 | candidates = list(candidate_mapping[i]) 351 | while len(candidates) > 0: 352 | # get a random node index from candidates 353 | rid = random.randint(0, len(candidates) - 1) 354 | if candidates[rid] in matched_dict: 355 | candidates.pop(rid) 356 | else: 357 | matched_dict[candidates[rid]] = 1 358 | result[i] = candidates[rid] 359 | break 360 | return result 361 | 362 | 363 | def random_init_mapping(candidate_mapping): 364 | """ 365 | Generate a random node mapping. 366 | Args: 367 | candidate_mapping: candidate_mapping: candidate node match list 368 | Returns: 369 | randomly-generated node mapping between two AMRs 370 | 371 | """ 372 | # if needed, a fixed seed could be passed here to generate same random (to help debugging) 373 | random.seed() 374 | matched_dict = {} 375 | result = [] 376 | for c in candidate_mapping: 377 | candidates = list(c) 378 | if len(candidates) == 0: 379 | # -1 indicates no possible mapping 380 | result.append(-1) 381 | continue 382 | found = False 383 | while len(candidates) > 0: 384 | # randomly generate an index in [0, length of candidates) 385 | rid = random.randint(0, len(candidates) - 1) 386 | # check if it has already been matched 387 | if candidates[rid] in matched_dict: 388 | candidates.pop(rid) 389 | else: 390 | matched_dict[candidates[rid]] = 1 391 | result.append(candidates[rid]) 392 | found = True 393 | break 394 | if not found: 395 | result.append(-1) 396 | return result 397 | 398 | 399 | def compute_match(mapping, weight_dict): 400 | """ 401 | Given a node mapping, compute match number based on weight_dict. 402 | Args: 403 | mappings: a list of node index in AMR 2. The ith element (value j) means node i in AMR 1 maps to node j in AMR 2. 404 | Returns: 405 | matching triple number 406 | Complexity: O(m*n) , m is the node number of AMR 1, n is the node number of AMR 2 407 | 408 | """ 409 | # If this mapping has been investigated before, retrieve the value instead of re-computing. 410 | if verbose: 411 | print >> DEBUG_LOG, "Computing match for mapping" 412 | print >> DEBUG_LOG, mapping 413 | if tuple(mapping) in match_triple_dict: 414 | if verbose: 415 | print >> DEBUG_LOG, "saved value", match_triple_dict[tuple(mapping)] 416 | return match_triple_dict[tuple(mapping)] 417 | match_num = 0 418 | # i is node index in AMR 1, m is node index in AMR 2 419 | for i, m in enumerate(mapping): 420 | if m == -1: 421 | # no node maps to this node 422 | continue 423 | # node i in AMR 1 maps to node m in AMR 2 424 | current_node_pair = (i, m) 425 | if current_node_pair not in weight_dict: 426 | continue 427 | if verbose: 428 | print >> DEBUG_LOG, "node_pair", current_node_pair 429 | for key in weight_dict[current_node_pair]: 430 | if key == -1: 431 | # matching triple resulting from instance/attribute triples 432 | match_num += weight_dict[current_node_pair][key] 433 | if verbose: 434 | print >> DEBUG_LOG, "instance/attribute match", weight_dict[current_node_pair][key] 435 | # only consider node index larger than i to avoid duplicates 436 | # as we store both weight_dict[node_pair1][node_pair2] and 437 | # weight_dict[node_pair2][node_pair1] for a relation 438 | elif key[0] < i: 439 | continue 440 | elif mapping[key[0]] == key[1]: 441 | match_num += weight_dict[current_node_pair][key] 442 | if verbose: 443 | print >> DEBUG_LOG, "relation match with", key, weight_dict[current_node_pair][key] 444 | if verbose: 445 | print >> DEBUG_LOG, "match computing complete, result:", match_num 446 | # update match_triple_dict 447 | match_triple_dict[tuple(mapping)] = match_num 448 | return match_num 449 | 450 | 451 | def move_gain(mapping, node_id, old_id, new_id, weight_dict, match_num): 452 | """ 453 | Compute the triple match number gain from the move operation 454 | Arguments: 455 | mapping: current node mapping 456 | node_id: remapped node in AMR 1 457 | old_id: original node id in AMR 2 to which node_id is mapped 458 | new_id: new node in to which node_id is mapped 459 | weight_dict: weight dictionary 460 | match_num: the original triple matching number 461 | Returns: 462 | the triple match gain number (might be negative) 463 | 464 | """ 465 | # new node mapping after moving 466 | new_mapping = (node_id, new_id) 467 | # node mapping before moving 468 | old_mapping = (node_id, old_id) 469 | # new nodes mapping list (all node pairs) 470 | new_mapping_list = mapping[:] 471 | new_mapping_list[node_id] = new_id 472 | # if this mapping is already been investigated, use saved one to avoid duplicate computing 473 | if tuple(new_mapping_list) in match_triple_dict: 474 | return match_triple_dict[tuple(new_mapping_list)] - match_num 475 | gain = 0 476 | # add the triple match incurred by new_mapping to gain 477 | if new_mapping in weight_dict: 478 | for key in weight_dict[new_mapping]: 479 | if key == -1: 480 | # instance/attribute triple match 481 | gain += weight_dict[new_mapping][-1] 482 | elif new_mapping_list[key[0]] == key[1]: 483 | # relation gain incurred by new_mapping and another node pair in new_mapping_list 484 | gain += weight_dict[new_mapping][key] 485 | # deduct the triple match incurred by old_mapping from gain 486 | if old_mapping in weight_dict: 487 | for k in weight_dict[old_mapping]: 488 | if k == -1: 489 | gain -= weight_dict[old_mapping][-1] 490 | elif mapping[k[0]] == k[1]: 491 | gain -= weight_dict[old_mapping][k] 492 | # update match number dictionary 493 | match_triple_dict[tuple(new_mapping_list)] = match_num + gain 494 | return gain 495 | 496 | 497 | def swap_gain(mapping, node_id1, mapping_id1, node_id2, mapping_id2, weight_dict, match_num): 498 | """ 499 | Compute the triple match number gain from the swapping 500 | Arguments: 501 | mapping: current node mapping list 502 | node_id1: node 1 index in AMR 1 503 | mapping_id1: the node index in AMR 2 node 1 maps to (in the current mapping) 504 | node_id2: node 2 index in AMR 1 505 | mapping_id2: the node index in AMR 2 node 2 maps to (in the current mapping) 506 | weight_dict: weight dictionary 507 | match_num: the original matching triple number 508 | Returns: 509 | the gain number (might be negative) 510 | 511 | """ 512 | new_mapping_list = mapping[:] 513 | # Before swapping, node_id1 maps to mapping_id1, and node_id2 maps to mapping_id2 514 | # After swapping, node_id1 maps to mapping_id2 and node_id2 maps to mapping_id1 515 | new_mapping_list[node_id1] = mapping_id2 516 | new_mapping_list[node_id2] = mapping_id1 517 | if tuple(new_mapping_list) in match_triple_dict: 518 | return match_triple_dict[tuple(new_mapping_list)] - match_num 519 | gain = 0 520 | new_mapping1 = (node_id1, mapping_id2) 521 | new_mapping2 = (node_id2, mapping_id1) 522 | old_mapping1 = (node_id1, mapping_id1) 523 | old_mapping2 = (node_id2, mapping_id2) 524 | if node_id1 > node_id2: 525 | new_mapping2 = (node_id1, mapping_id2) 526 | new_mapping1 = (node_id2, mapping_id1) 527 | old_mapping1 = (node_id2, mapping_id2) 528 | old_mapping2 = (node_id1, mapping_id1) 529 | if new_mapping1 in weight_dict: 530 | for key in weight_dict[new_mapping1]: 531 | if key == -1: 532 | gain += weight_dict[new_mapping1][-1] 533 | elif new_mapping_list[key[0]] == key[1]: 534 | gain += weight_dict[new_mapping1][key] 535 | if new_mapping2 in weight_dict: 536 | for key in weight_dict[new_mapping2]: 537 | if key == -1: 538 | gain += weight_dict[new_mapping2][-1] 539 | # to avoid duplicate 540 | elif key[0] == node_id1: 541 | continue 542 | elif new_mapping_list[key[0]] == key[1]: 543 | gain += weight_dict[new_mapping2][key] 544 | if old_mapping1 in weight_dict: 545 | for key in weight_dict[old_mapping1]: 546 | if key == -1: 547 | gain -= weight_dict[old_mapping1][-1] 548 | elif mapping[key[0]] == key[1]: 549 | gain -= weight_dict[old_mapping1][key] 550 | if old_mapping2 in weight_dict: 551 | for key in weight_dict[old_mapping2]: 552 | if key == -1: 553 | gain -= weight_dict[old_mapping2][-1] 554 | # to avoid duplicate 555 | elif key[0] == node_id1: 556 | continue 557 | elif mapping[key[0]] == key[1]: 558 | gain -= weight_dict[old_mapping2][key] 559 | match_triple_dict[tuple(new_mapping_list)] = match_num + gain 560 | return gain 561 | 562 | 563 | def get_best_gain(mapping, candidate_mappings, weight_dict, instance_len, cur_match_num): 564 | """ 565 | Hill-climbing method to return the best gain swap/move can get 566 | Arguments: 567 | mapping: current node mapping 568 | candidate_mappings: the candidates mapping list 569 | weight_dict: the weight dictionary 570 | instance_len: the number of the nodes in AMR 2 571 | cur_match_num: current triple match number 572 | Returns: 573 | the best gain we can get via swap/move operation 574 | 575 | """ 576 | largest_gain = 0 577 | # True: using swap; False: using move 578 | use_swap = True 579 | # the node to be moved/swapped 580 | node1 = None 581 | # store the other node affected. In swap, this other node is the node swapping with node1. In move, this other 582 | # node is the node node1 will move to. 583 | node2 = None 584 | # unmatched nodes in AMR 2 585 | unmatched = set(range(0, instance_len)) 586 | # exclude nodes in current mapping 587 | # get unmatched nodes 588 | for nid in mapping: 589 | if nid in unmatched: 590 | unmatched.remove(nid) 591 | for i, nid in enumerate(mapping): 592 | # current node i in AMR 1 maps to node nid in AMR 2 593 | for nm in unmatched: 594 | if nm in candidate_mappings[i]: 595 | # remap i to another unmatched node (move) 596 | # (i, m) -> (i, nm) 597 | if verbose: 598 | print >> DEBUG_LOG, "Remap node", i, "from ", nid, "to", nm 599 | mv_gain = move_gain(mapping, i, nid, nm, weight_dict, cur_match_num) 600 | if verbose: 601 | print >> DEBUG_LOG, "Move gain:", mv_gain 602 | new_mapping = mapping[:] 603 | new_mapping[i] = nm 604 | new_match_num = compute_match(new_mapping, weight_dict) 605 | if new_match_num != cur_match_num + mv_gain: 606 | print >> ERROR_LOG, mapping, new_mapping 607 | print >> ERROR_LOG, "Inconsistency in computing: move gain", cur_match_num, mv_gain, \ 608 | new_match_num 609 | if mv_gain > largest_gain: 610 | largest_gain = mv_gain 611 | node1 = i 612 | node2 = nm 613 | use_swap = False 614 | # compute swap gain 615 | for i, m in enumerate(mapping): 616 | for j in range(i+1, len(mapping)): 617 | m2 = mapping[j] 618 | # swap operation (i, m) (j, m2) -> (i, m2) (j, m) 619 | # j starts from i+1, to avoid duplicate swap 620 | if verbose: 621 | print >> DEBUG_LOG, "Swap node", i, "and", j 622 | print >> DEBUG_LOG, "Before swapping:", i, "-", m, ",", j, "-", m2 623 | print >> DEBUG_LOG, mapping 624 | print >> DEBUG_LOG, "After swapping:", i, "-", m2, ",", j, "-", m 625 | sw_gain = swap_gain(mapping, i, m, j, m2, weight_dict, cur_match_num) 626 | if verbose: 627 | print >> DEBUG_LOG, "Swap gain:", sw_gain 628 | new_mapping = mapping[:] 629 | new_mapping[i] = m2 630 | new_mapping[j] = m 631 | print >> DEBUG_LOG, new_mapping 632 | new_match_num = compute_match(new_mapping, weight_dict) 633 | if new_match_num != cur_match_num + sw_gain: 634 | print >> ERROR_LOG, match, new_match 635 | print >> ERROR_LOG, "Inconsistency in computing: swap gain", cur_match_num, sw_gain, new_match_num 636 | if sw_gain > largest_gain: 637 | largest_gain = sw_gain 638 | node1 = i 639 | node2 = j 640 | use_swap = True 641 | # generate a new mapping based on swap/move 642 | cur_mapping = mapping[:] 643 | if node1 is not None: 644 | if use_swap: 645 | if verbose: 646 | print >> DEBUG_LOG, "Use swap gain" 647 | temp = cur_mapping[node1] 648 | cur_mapping[node1] = cur_mapping[node2] 649 | cur_mapping[node2] = temp 650 | else: 651 | if verbose: 652 | print >> DEBUG_LOG, "Use move gain" 653 | cur_mapping[node1] = node2 654 | else: 655 | if verbose: 656 | print >> DEBUG_LOG, "no move/swap gain found" 657 | if verbose: 658 | print >> DEBUG_LOG, "Original mapping", mapping 659 | print >> DEBUG_LOG, "Current mapping", cur_mapping 660 | return largest_gain, cur_mapping 661 | 662 | 663 | def print_alignment(mapping, instance1, instance2): 664 | """ 665 | print the alignment based on a node mapping 666 | Args: 667 | match: current node mapping list 668 | instance1: nodes of AMR 1 669 | instance2: nodes of AMR 2 670 | 671 | """ 672 | result = [] 673 | for i, m in enumerate(mapping): 674 | if m == -1: 675 | result.append(instance1[i][1] + "(" + instance1[i][2] + ")" + "-Null") 676 | else: 677 | result.append(instance1[i][1] + "(" + instance1[i][2] + ")" + "-" 678 | + instance2[m][1] + "(" + instance2[m][2] + ")") 679 | return " ".join(result) 680 | 681 | 682 | def compute_f(match_num, test_num, gold_num): 683 | """ 684 | Compute the f-score based on the matching triple number, 685 | triple number of AMR set 1, 686 | triple number of AMR set 2 687 | Args: 688 | match_num: matching triple number 689 | test_num: triple number of AMR 1 (test file) 690 | gold_num: triple number of AMR 2 (gold file) 691 | Returns: 692 | precision: match_num/test_num 693 | recall: match_num/gold_num 694 | f_score: 2*precision*recall/(precision+recall) 695 | """ 696 | if test_num == 0 or gold_num == 0: 697 | return 0.00, 0.00, 0.00 698 | precision = (0.000 + match_num) / (test_num + 0.000) 699 | recall = (0.000 + match_num) / (gold_num + 0.000) 700 | if (precision + recall) != 0: 701 | f_score = 2 * precision * recall / (precision + recall) 702 | if verbose: 703 | print >> DEBUG_LOG, "F-score:", f_score 704 | return precision, recall, f_score 705 | else: 706 | if verbose: 707 | print >> DEBUG_LOG, "F-score:", "0.0" 708 | return precision, recall, 0.00 709 | 710 | 711 | def main(list1, list2): 712 | """ 713 | Main function of smatch score calculation 714 | 715 | """ 716 | global verbose 717 | global iteration_num 718 | global single_score 719 | global pr_flag 720 | global match_triple_dict 721 | # set the iteration number 722 | # total iteration number = restart number + 1 723 | iteration_num = 5 724 | #if arguments.ms: 725 | # single_score = False 726 | #if arguments.v: 727 | # verbose = True 728 | #if arguments.pr: 729 | pr_flag = True 730 | # matching triple number 731 | total_match_num = 0 732 | # triple number in test file 733 | total_test_num = 0 734 | # triple number in gold file 735 | total_gold_num = 0 736 | # sentence number 737 | sent_num = 1 738 | for l1, l2 in zip(list1,list2): 739 | lst_amr1, dic_amr1 = l1 740 | lst_amr2, dic_amr2 = l2 741 | amr1 = parse_relations(lst_amr1, dic_amr1) 742 | amr2 = parse_relations(lst_amr2, dic_amr2) 743 | prefix1 = "a" 744 | prefix2 = "b" 745 | # Rename node to "a1", "a2", .etc 746 | amr1.rename_node(prefix1) 747 | # Renaming node to "b1", "b2", .etc 748 | amr2.rename_node(prefix2) 749 | (instance1, attributes1, relation1) = amr1.get_triples() 750 | (instance2, attributes2, relation2) = amr2.get_triples() 751 | if verbose: 752 | # print parse results of two AMRs 753 | print >> DEBUG_LOG, "AMR pair", sent_num 754 | print >> DEBUG_LOG, "============================================" 755 | #print >> DEBUG_LOG, "AMR 1 (one-line):", cur_amr1 756 | #print >> DEBUG_LOG, "AMR 2 (one-line):", cur_amr2 757 | print >> DEBUG_LOG, "Instance triples of AMR 1:", len(instance1) 758 | print >> DEBUG_LOG, instance1 759 | print >> DEBUG_LOG, "Attribute triples of AMR 1:", len(attributes1) 760 | print >> DEBUG_LOG, attributes1 761 | print >> DEBUG_LOG, "Relation triples of AMR 1:", len(relation1) 762 | print >> DEBUG_LOG, relation1 763 | print >> DEBUG_LOG, "Instance triples of AMR 2:", len(instance2) 764 | print >> DEBUG_LOG, instance2 765 | print >> DEBUG_LOG, "Attribute triples of AMR 2:", len(attributes2) 766 | print >> DEBUG_LOG, attributes2 767 | print >> DEBUG_LOG, "Relation triples of AMR 2:", len(relation2) 768 | print >> DEBUG_LOG, relation2 769 | (best_mapping, best_match_num) = get_best_match(instance1, attributes1, relation1, 770 | instance2, attributes2, relation2, 771 | prefix1, prefix2) 772 | if verbose: 773 | print >> DEBUG_LOG, "best match number", best_match_num 774 | print >> DEBUG_LOG, "best node mapping", best_mapping 775 | print >> DEBUG_LOG, "Best node mapping alignment:", print_alignment(best_mapping, instance1, instance2) 776 | test_triple_num = len(instance1) + len(attributes1) + len(relation1) 777 | gold_triple_num = len(instance2) + len(attributes2) + len(relation2) 778 | if not single_score: 779 | # if each AMR pair should have a score, compute and output it here 780 | (precision, recall, best_f_score) = compute_f(best_match_num, 781 | test_triple_num, 782 | gold_triple_num) 783 | #print "Sentence", sent_num 784 | if pr_flag: 785 | print "Precision: %.2f" % precision 786 | print "Recall: %.2f" % recall 787 | # print "Smatch score: %.2f" % best_f_score 788 | print "%.4f" % best_f_score 789 | total_match_num += best_match_num 790 | total_test_num += test_triple_num 791 | total_gold_num += gold_triple_num 792 | # clear the matching triple dictionary for the next AMR pair 793 | match_triple_dict.clear() 794 | sent_num += 1 795 | if verbose: 796 | print >> DEBUG_LOG, "Total match number, total triple number in AMR 1, and total triple number in AMR 2:" 797 | print >> DEBUG_LOG, total_match_num, total_test_num, total_gold_num 798 | print >> DEBUG_LOG, "---------------------------------------------------------------------------------" 799 | # output document-level smatch score (a single f-score for all AMR pairs in two files) 800 | return compute_f(total_match_num, total_test_num, total_gold_num) 801 | -------------------------------------------------------------------------------- /smatch_old/smatch_edited.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/smatch_edited.pyc -------------------------------------------------------------------------------- /smatch_old/smatch_fromannot.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/smatch_fromannot.pyc -------------------------------------------------------------------------------- /smatch_old/smatch_fromlists.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/smatch_fromlists.pyc -------------------------------------------------------------------------------- /smatch_old/smatch_fromsubgraphs.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/smatch_old/smatch_fromsubgraphs.pyc -------------------------------------------------------------------------------- /smatch_old/update_log: -------------------------------------------------------------------------------- 1 | Update: 08/22/2012 2 | Person involved: Shu Cai 3 | 4 | Minor bug fix of smatch.py. smatch-v2.py was created. 5 | 6 | smatch.py-> smatch-v1.py 7 | smatch-v2.py-> smatch.py 8 | 9 | No change of interface 10 | 11 | Update: 09/14/2012 12 | Person involved: Shu Cai 13 | 14 | Bug fix of smatch.py and smatch-table.py. smatch-v0.1.py smatch-v0.2.py smatch-v0.3.py smatch-v0.4.py smatch-table-v0.1.py smatch-table-v0.2.py was created. 15 | 16 | smatch.py now equals to smatch-v0.4.py 17 | smatch-table.py now equals to smatch-table-v0.2.py 18 | 19 | smatch.py runs with a smart initialization, which matches words with the same value first, then randomly select other variable mappings. 4 restarts is applied. 20 | 21 | Update: 03/17/2013 22 | Person involved: Shu Cai 23 | 24 | Interface change of smatch.py and smatch-table.py. Using this version does not require esem-format-check.pl. (All versions before v0.5 require esem-format-check.pl to check the format of AMR) Instead it needs amr.py. 25 | 26 | It now accepts one-AMR-per-line format as well as other formats of AMR. 27 | 28 | smatch.py now equals to smatch-v0.5.py 29 | smatch-table.py now equals to smatch-table-v0.3.py 30 | 31 | Update:03/19/2013 32 | Person involved: Shu Cai 33 | 34 | Document update. The latest documents are smatch_guide.txt and smatch_guide.pdf (same content) 35 | Add some sample files to the directory: sample_file_list, test_input1, test_input2 36 | 37 | Update: 03/20/2013 38 | Person involved: Shu Cai 39 | 40 | Minor changes to the documents: smatch_guide.txt and smatch_guide.pdf 41 | 42 | Update: 04/04/2013 43 | Person involved: Shu Cai 44 | 45 | Add Software_architecture.pdf. Minor changes to the smatch.py and smatch-table.py (comments and add --pr option) 46 | Minor changes to the README.txt and smatch_guide.pdf 47 | 48 | Update: 01/18/2015 49 | Person involved: Shu Cai 50 | Code cleanup and bug fix. Add detailed comment to the code. 51 | Thanks Yoav Artzi (yoav@cs.washington.edu) for finding a bug and fixing it. 52 | 53 | Update: 12/21/2015 54 | Person involved: Jon May 55 | Fixed treatment of quoted strings to allow special characters to be actually part of the string. 56 | Empty double quoted strings also allowed 57 | 58 | Update: 1/9/2016 59 | Person involved: Guntis Barzdins and Didzis Gosko 60 | Fixed small crash bug 61 | 62 | -------------------------------------------------------------------------------- /test_input1.txt: -------------------------------------------------------------------------------- 1 | # ::id isi_0001.1 ::date 2012-05-14T21:45:29 2 | # ::snt The boy wants the girl to believe him. 3 | (w / want-01 4 | :ARG0 (b / boy) 5 | :ARG1 (b2 / believe-01 6 | :ARG0 (g / girl) 7 | :ARG1 b)) 8 | 9 | # ::id isi_0001.25 ::date 2012-05-14T21:59:17 10 | # ::snt The boy is a hard worker. 11 | (p / person 12 | :domain (b / boy) 13 | :ARG0-of (w / work-01 14 | :manner (h / hard))) 15 | 16 | # ::id isi_0002.209 ::date 2013-05-16T17:19:07 17 | # ::snt The poet William Shakespeare was born in Stratford-upon-Avon. 18 | (b / bear-02 19 | :ARG1 (p / poet :name (n / name :op1 "William" :op2 "Shakespeare")) 20 | :location (c / city :name (n2 / name :op1 "Stratford-upon-Avon"))) 21 | 22 | -------------------------------------------------------------------------------- /test_input2.txt: -------------------------------------------------------------------------------- 1 | # ::id isi_0001.1 ::date 2012-05-14T21:45:29 2 | # ::snt The boy wants the girl to believe him. 3 | (w / want-01 4 | :ARG0 (b / boy) 5 | :ARG1 (b2 / believe-01 6 | :ARG0 (g / girl) 7 | :ARG1 (h / he))) 8 | 9 | # ::id isi_0001.25 ::date 2012-05-14T21:59:17 10 | # ::snt The boy is a hard worker. 11 | (w / worker 12 | :mod (h / hard) 13 | :domain (b / boy)) 14 | 15 | # ::id isi_0002.209 ::date 2013-05-16T17:19:07 16 | # ::snt The poet William Shakespeare was born in Stratford-upon-Avon. 17 | (b / bear-02 18 | :ARG1 (p / poet :name (n / name :op1 william :op2 "shakespeare")) 19 | :location (c / city :name (n2 / name :op1 "Stratford-upon-Avon"))) 20 | 21 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding=utf-8 3 | 4 | ''' 5 | Various routines used by scores.py 6 | ''' 7 | 8 | def disambig(lst): 9 | lst2 = [] 10 | for v in lst: 11 | idx = 1 12 | v_idx = v + '_0' 13 | while str(v_idx) in lst2: 14 | v_idx = v + '_' + str(idx) 15 | idx += 1 16 | lst2.append(str(v_idx)) 17 | return lst2 18 | 19 | def concepts(v2c_dict): 20 | return [str(v) for v in v2c_dict.values()] 21 | 22 | def namedent(v2c_dict, triples): 23 | return [str(v2c_dict[v1]) for (l,v1,v2) in triples if l == "name"] 24 | 25 | def negations(v2c_dict, triples): 26 | return [v2c_dict[v1] for (l,v1,v2) in triples if l == "polarity"] 27 | 28 | def wikification(triples): 29 | return [v2 for (l,v1,v2) in triples if l == "wiki"] 30 | 31 | def reentrancies(v2c_dict, triples): 32 | lst = [] 33 | vrs = [] 34 | for n in v2c_dict.keys(): 35 | parents = [(l,v1,v2) for (l,v1,v2) in triples if v2 == n and l != "instance"] 36 | if len(parents) > 1: 37 | #extract triples involving this (multi-parent) node 38 | for t in parents: 39 | lst.append(t) 40 | vrs.extend([t[1],t[2]]) 41 | #collect var/concept pairs for all extracted nodes 42 | dict1 = {} 43 | for i in v2c_dict: 44 | if i in vrs: 45 | dict1[i] = v2c_dict[i] 46 | return (lst, dict1) 47 | 48 | def srl(v2c_dict, triples): 49 | lst = [] 50 | vrs = [] 51 | for t in triples: 52 | if t[0].startswith("ARG"): 53 | #although the smatch code we use inverts the -of relations 54 | #there seems to be cases where this is not done so we invert 55 | #them here 56 | if t[0].endswith("of"): 57 | lst.append((t[0][0:-3],t[2],t[1])) 58 | vrs.extend([t[2],t[1]]) 59 | else: 60 | lst.append(t) 61 | vrs.extend([t[1],t[2]]) 62 | 63 | #collect var/concept pairs for all extracted nodes 64 | dict1 = {} 65 | for i in v2c_dict: 66 | if i in vrs: 67 | dict1[i] = v2c_dict[i] 68 | return (lst, dict1) 69 | 70 | def var2concept(amr): 71 | v2c = {} 72 | for n, v in zip(amr.nodes, amr.node_values): 73 | v2c[n] = v 74 | return v2c 75 | -------------------------------------------------------------------------------- /utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdtux89/amr-evaluation/8bc6b2e9e4170412f91c33281410163441f97cba/utils.pyc --------------------------------------------------------------------------------