├── .appveyor.yml
├── .travis.yml
├── LICENSE
├── Makefile
├── README.md
├── analyzer.py
├── codec
├── __init__.py
├── amr.py
├── conllu.py
├── eds.py
├── mrp.py
├── norec.py
├── pmb.py
├── sdp.py
├── treex.py
└── ucca.py
├── data
├── sample
│ ├── Makefile
│ ├── README.txt
│ ├── amr
│ │ ├── wsj.amr
│ │ └── wsj.mrp
│ ├── dm
│ │ ├── wsj.mrp
│ │ └── wsj.sdp
│ ├── eds
│ │ ├── wsj.eds
│ │ └── wsj.mrp
│ ├── norec
│ │ └── train.json
│ ├── psd
│ │ ├── wsj.mrp
│ │ └── wsj.sdp
│ ├── ucca
│ │ ├── wsj.mrp
│ │ └── xml
│ │ │ ├── files.txt
│ │ │ ├── wsj_0001.1.xml
│ │ │ ├── wsj_0001.2.xml
│ │ │ ├── wsj_0002.1.xml
│ │ │ ├── wsj_0003.1.xml
│ │ │ ├── wsj_0003.10.xml
│ │ │ ├── wsj_0003.11.xml
│ │ │ ├── wsj_0003.12.xml
│ │ │ ├── wsj_0003.13.xml
│ │ │ ├── wsj_0003.14.xml
│ │ │ ├── wsj_0003.15.xml
│ │ │ ├── wsj_0003.16.xml
│ │ │ ├── wsj_0003.17.xml
│ │ │ ├── wsj_0003.18.xml
│ │ │ ├── wsj_0003.19.xml
│ │ │ ├── wsj_0003.2.xml
│ │ │ ├── wsj_0003.20.xml
│ │ │ ├── wsj_0003.21.xml
│ │ │ ├── wsj_0003.22.xml
│ │ │ ├── wsj_0003.23.xml
│ │ │ ├── wsj_0003.24.xml
│ │ │ ├── wsj_0003.25.xml
│ │ │ ├── wsj_0003.26.xml
│ │ │ ├── wsj_0003.27.xml
│ │ │ ├── wsj_0003.28.xml
│ │ │ ├── wsj_0003.29.xml
│ │ │ ├── wsj_0003.3.xml
│ │ │ ├── wsj_0003.30.xml
│ │ │ ├── wsj_0003.4.xml
│ │ │ ├── wsj_0003.5.xml
│ │ │ ├── wsj_0003.7.xml
│ │ │ ├── wsj_0003.8.xml
│ │ │ ├── wsj_0003.9.xml
│ │ │ ├── wsj_0004.1.xml
│ │ │ ├── wsj_0004.10.xml
│ │ │ ├── wsj_0004.11.xml
│ │ │ ├── wsj_0004.12.xml
│ │ │ ├── wsj_0004.14.xml
│ │ │ ├── wsj_0004.15.xml
│ │ │ ├── wsj_0004.16.xml
│ │ │ ├── wsj_0004.17.xml
│ │ │ ├── wsj_0004.2.xml
│ │ │ ├── wsj_0004.4.xml
│ │ │ ├── wsj_0004.5.xml
│ │ │ ├── wsj_0004.6.xml
│ │ │ ├── wsj_0004.7.xml
│ │ │ ├── wsj_0004.8.xml
│ │ │ ├── wsj_0004.9.xml
│ │ │ ├── wsj_0005.1.xml
│ │ │ ├── wsj_0005.2.xml
│ │ │ ├── wsj_0005.3.xml
│ │ │ ├── wsj_0007.1.xml
│ │ │ ├── wsj_0007.2.xml
│ │ │ ├── wsj_0007.3.xml
│ │ │ ├── wsj_0007.4.xml
│ │ │ ├── wsj_0008.1.xml
│ │ │ ├── wsj_0008.2.xml
│ │ │ ├── wsj_0008.3.xml
│ │ │ ├── wsj_0008.4.xml
│ │ │ ├── wsj_0008.5.xml
│ │ │ ├── wsj_0008.6.xml
│ │ │ ├── wsj_0009.1.xml
│ │ │ ├── wsj_0009.2.xml
│ │ │ ├── wsj_0009.3.xml
│ │ │ ├── wsj_0009.4.xml
│ │ │ ├── wsj_0010.1.xml
│ │ │ ├── wsj_0010.10.xml
│ │ │ ├── wsj_0010.11.xml
│ │ │ ├── wsj_0010.12.xml
│ │ │ ├── wsj_0010.13.xml
│ │ │ ├── wsj_0010.15.xml
│ │ │ ├── wsj_0010.16.xml
│ │ │ ├── wsj_0010.17.xml
│ │ │ ├── wsj_0010.18.xml
│ │ │ ├── wsj_0010.19.xml
│ │ │ ├── wsj_0010.2.xml
│ │ │ ├── wsj_0010.20.xml
│ │ │ ├── wsj_0010.3.xml
│ │ │ ├── wsj_0010.6.xml
│ │ │ ├── wsj_0010.7.xml
│ │ │ ├── wsj_0010.8.xml
│ │ │ ├── wsj_0011.1.xml
│ │ │ ├── wsj_0011.2.xml
│ │ │ ├── wsj_0011.4.xml
│ │ │ ├── wsj_0011.5.xml
│ │ │ ├── wsj_0011.6.xml
│ │ │ ├── wsj_0011.7.xml
│ │ │ ├── wsj_0011.8.xml
│ │ │ ├── wsj_0012.1.xml
│ │ │ ├── wsj_0012.2.xml
│ │ │ ├── wsj_0012.3.xml
│ │ │ ├── wsj_0012.4.xml
│ │ │ └── wsj_0012.5.xml
│ ├── wsj.ids
│ └── wsj.txt
├── score
│ ├── Makefile
│ ├── amr
│ │ ├── 233.gold.amr
│ │ ├── 233.gold.dot
│ │ ├── 233.gold.pdf
│ │ ├── 233.system.amr
│ │ ├── 233.system.dot
│ │ ├── 233.system.pdf
│ │ ├── coli.gold.amr
│ │ ├── coli.system.amr
│ │ ├── first.gold.amr
│ │ ├── first.system.amr
│ │ ├── partial.gold.mrp
│ │ ├── partial.system.mrp
│ │ ├── test1.amr
│ │ ├── test1.mrp
│ │ ├── test2.amr
│ │ └── test2.mrp
│ ├── dm
│ │ ├── empty.gold.mrp
│ │ ├── empty.peking.mrp
│ │ └── peking.wsj.sdp
│ ├── eds
│ │ ├── lpps.102990.png
│ │ ├── lpps.peking.mrp
│ │ ├── wsj.pet.eds
│ │ └── wsj.pet.mrp
│ ├── lpps.mrp
│ ├── psd
│ │ ├── 107480.foxik.mrp
│ │ ├── 107480.gold.mrp
│ │ └── peking.brown.sdp
│ ├── revisions.txt
│ ├── test.slurm
│ └── ucca
│ │ ├── anchors.gold.mrp
│ │ ├── anchors.tupa.mrp
│ │ ├── ewt.gold.mrp
│ │ ├── ewt.tupa.mrp
│ │ ├── id.mrp
│ │ ├── koller.mrp
│ │ ├── small.gold.mrp
│ │ ├── small.gold.pdf
│ │ ├── small.tupa.mrp
│ │ ├── small.tupa.pdf
│ │ ├── test.gold.mrp
│ │ ├── test.gold.pdf
│ │ ├── test.tupa.mrp
│ │ └── test.tupa.pdf
├── validate
│ ├── Makefile
│ └── eds
│ │ └── wsj.mrp
└── wsj.txt
├── graph.py
├── inspector.py
├── main.py
├── score
├── __init__.py
├── core.py
├── edm.py
├── lib
│ ├── counter.pdf
│ ├── damonte.pdf
│ ├── edm.pdf
│ ├── sdp.pdf
│ ├── sema.pdf
│ ├── sembleu.pdf
│ ├── smatch.pdf
│ └── ucca.pdf
├── mces.py
├── rrhc.py
├── sdp.py
├── smatch.py
└── ucca.py
├── setup.py
├── smatch
├── LICENSE.txt
├── README.md
├── __init__.py
├── amr.py
└── smatch.py
├── treewidth.py
├── ucca
├── README.md
├── __init__.py
├── convert.py
├── core.py
├── ioutil.py
├── layer0.py
├── layer1.py
├── normalization.py
└── textutil.py
├── validate
├── __init__.py
├── amr.py
├── core.py
├── eds.py
├── sdp.py
├── ucca.py
└── utilities.py
└── version.py
/.appveyor.yml:
--------------------------------------------------------------------------------
1 | environment:
2 | PYTHON: C:\Python37-x64
3 | matrix:
4 | - TEST: "score dm.edm.json"
5 | - TEST: "score eds.edm.json"
6 | - TEST: "score eds.smatch.json"
7 | - TEST: "score eds.mrp.json"
8 | - TEST: "score dm.sdp.json"
9 | - TEST: "score ucca.ucca.json"
10 | - TEST: "score ucca.smatch.json"
11 | - TEST: "score ucca.mrp.json"
12 | - TEST: "score test.smatch.json"
13 | - TEST: "score coli.smatch.json"
14 | - TEST: "score coli.mrp.json"
15 | - TEST: "score unit"
16 | - TEST: "sample all"
17 | - TEST: "validate all"
18 |
19 | init:
20 | - cmd: choco install make
21 | - set PATH=%PYTHON%;%PYTHON%\Scripts;%PATH%
22 | - cmd: copy %PYTHON%\python.exe %PYTHON%\python3.exe
23 |
24 | install:
25 | - pip install .
26 |
27 | build: off
28 |
29 | test_script:
30 | - make -C data/%TEST%
31 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: trusty
2 | sudo: false
3 | group: edge
4 | language: python
5 | python: 3.6
6 | install: pip install .
7 | env:
8 | - TEST="score dm.edm.json"
9 | - TEST="score eds.edm.json"
10 | - TEST="score eds.smatch.json"
11 | - TEST="score eds.mrp.json"
12 | - TEST="score dm.sdp.json"
13 | - TEST="score ucca.ucca.json"
14 | - TEST="score ucca.smatch.json"
15 | - TEST="score ucca.mrp.json"
16 | - TEST="score test.smatch.json"
17 | - TEST="score coli.smatch.json"
18 | - TEST="score coli.mrp.json"
19 | - TEST="score unit"
20 | - TEST="sample all"
21 | - TEST="validate all"
22 | script:
23 | - make -C data/$TEST
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: history regression
2 |
3 | history:
4 | git log --pretty=tformat:"%H %ae %ai %s" -- score/mces.py
5 |
6 | regression:
7 | [ -d etc ] || mkdir etc; \
8 | [ -d tmp ] || mkdir tmp; \
9 | for i in $$(awk '{print $$1}' data/score/revisions.txt); do \
10 | [ -d etc/$${i} ] || mkdir etc/$${i}; \
11 | ( cd tmp; \
12 | [ -d $${i} ] || git clone git@github.com:cfmrp/mtool.git $${i}; \
13 | cd $${i}; git checkout $${i}; \
14 | cd data/score; sbatch ../../../../data/score/test.slurm; ) \
15 | done
16 |
--------------------------------------------------------------------------------
/codec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/codec/__init__.py
--------------------------------------------------------------------------------
/codec/eds.py:
--------------------------------------------------------------------------------
1 | import os.path;
2 | import re;
3 |
4 | from graph import Graph;
5 |
6 | EDS_MATCHER = re.compile(r'(.+?)(?$");
10 |
11 | def read_instances(fp):
12 | top_handle, predicates = None, [];
13 | sentence_id = None;
14 | try:
15 | sentence_id = int(os.path.splitext(os.path.basename(fp.name))[0]);
16 | except:
17 | pass;
18 | first_curly = True
19 | for line in fp:
20 | line = line.strip()
21 | if len(line) == 0:
22 | pass
23 | elif line.startswith("#"):
24 | sentence_id = line[1:]
25 | first_curly = True
26 | elif line.startswith("{"):
27 | colon = line.index(":")
28 | assert colon >= 0
29 | top_handle = line[1:colon].strip()
30 | elif line.endswith("}"):
31 | assert len(line) == 1
32 | if first_curly:
33 | assert sentence_id is not None
34 | assert top_handle is not None
35 | assert len(predicates) > 0
36 | yield (sentence_id, top_handle, predicates)
37 | sentence_id, top_handle, predicates = None, None, []
38 | first_curly = False
39 | else:
40 | match = EDS_MATCHER.match(line)
41 | assert match is not None
42 | node_id, label, arguments = match.groups()
43 | arguments = [tuple(arg.split()) for arg in arguments.split(',') if len(arg) > 0]
44 | predicates.append((node_id, label.strip(), arguments))
45 |
46 | def instance2graph(instance, reify = False, text = None):
47 | sentence_id, top, predicates = instance;
48 | anchors = None;
49 | graph = Graph(sentence_id, flavor = 1, framework = "eds");
50 | if text: graph.add_input(text);
51 | handle2node = {};
52 | for handle, label, _ in predicates:
53 | assert handle not in handle2node
54 | properties = None;
55 | values = None;
56 | match = PROPERTIES_MATCHER.search(label);
57 | if match:
58 | label = label[:match.start()];
59 | fields = match.group(1).replace(",", "").split();
60 | properties, values = list(), list();
61 | for i, field in enumerate(fields[1:]):
62 | if i % 2 == 0: properties.append(field);
63 | else: values.append(field);
64 | carg = None;
65 | match = CARG_MATCHER.search(label);
66 | if match:
67 | label = label[:match.start()];
68 | if not reify:
69 | properties = ["CARG"] + properties;
70 | values = [match.group(1)] + values;
71 | else:
72 | carg = match.group(1);
73 | anchors = None;
74 | match = LNK_MATCHER.search(label);
75 | if match:
76 | label = label[:match.start()];
77 | anchors = [{"from": int(match.group(1)), "to": int(match.group(2))}];
78 | handle2node[handle] = \
79 | graph.add_node(label = label, properties = properties, values = values, anchors = anchors);
80 | if carg and reify:
81 | carg = graph.add_node(label = carg, anchors = anchors);
82 | source = handle2node[handle].id;
83 | target = carg.id;
84 | graph.add_edge(source, target, "CARG");
85 | handle2node[top].is_top = True
86 | for src_handle, _, arguments in predicates:
87 | src = handle2node[src_handle].id
88 | for relation, tgt_handle in arguments:
89 | tgt = handle2node[tgt_handle].id
90 | graph.add_edge(src, tgt, relation)
91 | return graph
92 |
93 | def read(fp, reify = False, text = None):
94 | for instance in read_instances(fp):
95 | yield instance2graph(instance, reify, text), None
96 |
--------------------------------------------------------------------------------
/codec/mrp.py:
--------------------------------------------------------------------------------
1 | import json;
2 | import operator;
3 | import os;
4 | import sys;
5 |
6 | from graph import Graph
7 |
8 | def read(fp, text = None, robust = False):
9 | input, i = None, 0;
10 | def compute(form):
11 | nonlocal i;
12 | m = None;
13 | j = input.find(form, i);
14 | if j >= i:
15 | i, m = j, len(form);
16 | else:
17 | base = form;
18 | k, l = len(input), 0;
19 | for old, new in {("‘", "`"), ("‘", "'"), ("’", "'"), ("`", "'"),
20 | ("“", "\""), ("”", "\""),
21 | ("–", "--"), ("–", "---"), ("—", "---"),
22 | ("…", "..."), ("…", ". . .")}:
23 | form = base.replace(old, new);
24 | j = input.find(form, i);
25 | if j >= i and j < k: k, l = j, len(form);
26 | if k < len(input): i, m = k, l;
27 | if m:
28 | match = {"from": i, "to": i + m};
29 | i += m;
30 | return match;
31 | else:
32 | raise Exception("failed to anchor |{}| in |{}|{}| ({})"
33 | "".format(form, input[:i], input[i:], i));
34 |
35 | def anchor(graph, old, new):
36 | nonlocal input, i;
37 | strings = dict();
38 | for node in graph.nodes:
39 | for j in range(len(node.anchors) if node.anchors else 0):
40 | start, end = node.anchors[j]["from"], node.anchors[j]["to"];
41 | strings[(start, end)] = old[start:end];
42 | input, i = new, 0;
43 | for key in sorted(strings.keys(), key = operator.itemgetter(0, 1)):
44 | strings[key] = compute(strings[key]);
45 | for node in graph.nodes:
46 | for j in range(len(node.anchors) if node.anchors else 0):
47 | node.anchors[j] \
48 | = strings[(node.anchors[j]["from"], node.anchors[j]["to"])];
49 |
50 | for j, line in enumerate(fp):
51 | try:
52 | graph = Graph.decode(json.loads(line.rstrip()), robust = robust);
53 | if text is not None:
54 | if graph.input in text:
55 | graph.id = text[graph.input];
56 | else:
57 | old = graph.input;
58 | graph.add_input(text);
59 | anchor(graph, old, graph.input);
60 | yield graph, None;
61 | except Exception as error:
62 | print("codec.mrp.read(): ignoring line {}: {}"
63 | "".format(j, error), file = sys.stderr);
64 |
--------------------------------------------------------------------------------
/codec/norec.py:
--------------------------------------------------------------------------------
1 | import json;
2 | import operator;
3 | import os;
4 | import sys;
5 |
6 | from graph import Graph;
7 |
8 | def read(fp, text = None, reify = False, strict = False):
9 | def anchor(node):
10 | anchors = list();
11 | for string in node[1]:
12 | string = string.split(":");
13 | anchors.append({"from": int(string[0]), "to": int(string[1])});
14 | return anchors;
15 |
16 | for native in json.load(fp):
17 | map = dict();
18 | try:
19 | graph = Graph(native["sent_id"], flavor = 1, framework = "norec");
20 | graph.add_input(native["text"]);
21 | if reify:
22 | top = graph.add_node(top = True);
23 | for opinion in native["opinions"]:
24 | expression = opinion["Polar_expression"];
25 | properties, values = list(), list();
26 | if not reify:
27 | properties = ["polarity"];
28 | values = [opinion["Polarity"]];
29 | expression = graph.add_node(label = "expression",
30 | top = not reify,
31 | properties = properties, values = values,
32 | anchors = anchor(expression));
33 | if reify:
34 | graph.add_edge(top.id, expression.id, opinion["Polarity"]);
35 | source = opinion["Source"];
36 | if len(source[1]):
37 | key = tuple(source[1]);
38 | if strict and key in map:
39 | source = map[key];
40 | else:
41 | source = graph.add_node(label = "source" if not strict else None,
42 | anchors = anchor(source));
43 | map[key] = source;
44 | graph.add_edge(expression.id, source.id, "source" if strict else None);
45 | target = opinion["Target"];
46 | if len(target[1]):
47 | key = tuple(target[1]);
48 | if strict and key in map:
49 | target = map[key];
50 | else:
51 | target = graph.add_node(label = "target" if not strict else None,
52 | anchors = anchor(target));
53 | map[key] = target;
54 | graph.add_edge(expression.id, target.id, "target" if strict else None);
55 | yield graph, None;
56 | except Exception as error:
57 | print("codec.norec.read(): ignoring {}: {}"
58 | "".format(native, error), file = sys.stderr);
59 |
--------------------------------------------------------------------------------
/codec/sdp.py:
--------------------------------------------------------------------------------
1 | from graph import Graph;
2 |
3 | def read_matrix(file):
4 | rows = [];
5 | for line in file:
6 | line = line.rstrip();
7 | if len(line) == 0:
8 | return rows;
9 | else:
10 | rows.append(line.split("\t"));
11 | return rows or None
12 |
13 | def read_matrices(file):
14 | file.readline().rstrip();
15 | matrix = read_matrix(file);
16 | while matrix:
17 | yield matrix;
18 | matrix = read_matrix(file);
19 |
20 | def matrix2graph(matrix, framework = None, text = None):
21 | graph = Graph(matrix[0][0][1:], flavor = 0, framework = framework);
22 | predicates = [];
23 | for id, row in enumerate(matrix[1:]):
24 | lemma, pos, frame, top = row[2], row[3], row[6], row[4] == '+';
25 | if lemma == "_": lemma = row[1];
26 | properties = {"pos": pos};
27 | if frame != "_": properties["frame"] = frame;
28 | node = graph.add_node(id, label = lemma,
29 | properties = list(properties.keys()),
30 | values = list(properties.values()),
31 | top = top, anchors = [row[1]] if text else None);
32 | if row[5] == '+':
33 | predicates.append(id);
34 | for tgt, row in enumerate(matrix[1:]):
35 | for pred, label in enumerate(row[7:]):
36 | if label != '_':
37 | src = predicates[pred];
38 | edge = graph.add_edge(src, tgt, label);
39 | if text:
40 | graph.add_input(text);
41 | graph.anchor();
42 | #
43 | # finally, purge singleton (isolated) nodes
44 | #
45 | graph.nodes = [node for node in graph.nodes if not node.is_singleton()];
46 | return graph;
47 |
48 | def read(fp, framework = None, text = None):
49 | for matrix in read_matrices(fp):
50 | yield matrix2graph(matrix, framework, text), None;
51 |
--------------------------------------------------------------------------------
/codec/treex.py:
--------------------------------------------------------------------------------
1 | from operator import itemgetter;
2 | import os.path;
3 | import re;
4 | import xml.etree.ElementTree as ET;
5 |
6 | from graph import Graph;
7 |
8 | def walk(id, node, parent, nodes, edges, ns):
9 | i = node.get("id");
10 | o = node.findtext(ns + "ord");
11 | if i is None or o is None and parent is not None:
12 | raise Exception("treex.walk(): "
13 | "missing ‘id’ or ‘ord’ values while decoding tree #{}; exit."
14 | "".format(id));
15 | nodes.append((i, int(o) if o is not None else 0, node));
16 |
17 | if edges is not None:
18 | functor = node.findtext(ns + "functor");
19 | if parent is not None and functor is not None:
20 | edges.append((parent, i, functor));
21 |
22 | children = node.find(ns + "children");
23 | if children is not None:
24 | for child in children:
25 | if child.tag == ns + "LM":
26 | walk(id, child, i, nodes, edges, ns);
27 | if children.find(ns + "LM") is None:
28 | walk(id, children, i, nodes, edges, ns);
29 |
30 | def read(fp, text = None):
31 | ns = "{http://ufal.mff.cuni.cz/pdt/pml/}";
32 |
33 | #
34 | # _fix_me_
35 | # factor out the anchor()ing code into a reusable form. (oe; 4-apr-20)
36 | #
37 | n = None;
38 | i = 0;
39 |
40 | def skip():
41 | nonlocal i;
42 | while i < n and graph.input[i] in {" ", "\t"}:
43 | i += 1;
44 |
45 | def scan(candidates):
46 | for candidate in candidates:
47 | if graph.input.startswith(candidate, i):
48 | return len(candidate);
49 |
50 | def anchor(form):
51 | nonlocal i;
52 | skip();
53 | m = None;
54 | if graph.input.startswith(form, i):
55 | m = len(form);
56 | else:
57 | for old, new in {("‘", "`"), ("’", "'")}:
58 | form = form.replace(old, new);
59 | if graph.input.startswith(form, i):
60 | m = len(form);
61 | break;
62 | if not m:
63 | m = scan({"“", "\"", "``"}) or scan({"‘", "`"}) \
64 | or scan({"”", "\"", "''"}) or scan({"’", "'"}) \
65 | or scan({"—", "—", "---", "--"}) \
66 | or scan({"…", "...", ". . ."});
67 | if m:
68 | anchor = {"from": i, "to": i + m};
69 | i += m;
70 | skip();
71 | return anchor;
72 | else:
73 | raise Exception("{}: failed to anchor |{}| in |{}| ({})"
74 | "".format(graph.id, form, graph.input, i));
75 |
76 | tree = ET.parse(fp).getroot();
77 | bundles = tree.find(ns + "bundles");
78 | for item in bundles.findall(ns + "LM"):
79 | id = item.get("id");
80 | graph = Graph(id, flavor = 0, framework = "ptg");
81 | surface = list(); nodes = list(); edges = list();
82 | for zone in item.iter(ns + "zone"):
83 | if zone.get("language") == "en":
84 | sentence = zone.findtext(ns + "sentence");
85 | trees = zone.find(ns + "trees");
86 | if trees is not None:
87 | atree = trees.find(ns + "a_tree");
88 | ttree = trees.find(ns + "t_tree");
89 | root = atree.find(ns + "children");
90 | top = ttree.find(ns + "children");
91 | # print(id, sentence, atree, ttree, root, top);
92 | if root is None or top is None:
93 | raise Exception("treex.read(): "
94 | "missing ‘a_tree’ or ‘t_tree’ values while decoding tree #{}; exit."
95 | "".format(id));
96 | walk(id, root, None, surface, None, ns);
97 | walk(id, top, None, nodes, edges, ns);
98 | #
99 | # determine character-based anchors for all .surface. (analytical) tokens
100 | #
101 | anchoring = dict();
102 | if sentence is not None:
103 | graph.add_input(sentence);
104 | n = len(graph.input);
105 | i = 0;
106 | for node in sorted(surface, key = itemgetter(1)):
107 | anchoring[node[0]] = anchor(node[2].findtext(ns + "form"));
108 |
109 | #
110 | # now process tectogrammatical nodes in surface order (as indicated in the
111 | # annotations): map to consecutive numerical identifiers; retrieve anchors
112 | # from corresponding analytical nodes; and create actual (new) graph nodes.
113 | #
114 | mapping = {};
115 | to = 0;
116 | for node in sorted(nodes, key = itemgetter(1)):
117 | mapping[node[0]] = i = len(mapping);
118 | properties = dict();
119 |
120 | a = node[2].find(ns + "a");
121 | if a is not None:
122 | anchors = list();
123 | for lex in a:
124 | if len(lex) == 0:
125 | anchors.append(anchoring[lex.text]);
126 | else:
127 | for lm in lex.findall(ns + "LM"):
128 | anchors.append(anchoring[lm.text]);
129 | anchors = sorted(anchors, key = itemgetter("to"));
130 | to = anchors[-1]["to"];
131 | else:
132 | #
133 | # _fix_me_
134 | # discuss anchoring of generated nodes: currently, for uniformity, we
135 | # anchor them to an empty string immediately after the final character
136 | # of the preceding non-generated node. but this arguably introduces a
137 | # vacuous piece of information, unless one were to argue that it rather
138 | # is an encoding of the node status for generated nodes? (oe; 4-apr-20)
139 | #
140 | anchors = [{"from": to, "to": to}];
141 |
142 | #
143 | # the node label comes from the tectogrammatical lemma
144 | #
145 | lemma = node[2].findtext(ns + "t_lemma");
146 |
147 | frame = node[2].findtext(ns + "val_frame.rf");
148 | #
149 | # where present (mostly on verbs), extract the valency frame identifier
150 | # _fix_me_
151 | # for compatibility with earlier PSD releases, strip prefix that seems to
152 | # identify the valency dictionary. (oe; 4-apr-20)
153 | #
154 | if frame is not None:
155 | if "#" in frame:
156 | properties["frame"] = frame[frame.index("#") + 1:];
157 | else:
158 | properties["frame"] = frame;
159 |
160 | #
161 | # selectively expose grammatemes as node-local properties, but ignore
162 | # (vanilla but very high-frequent) default values
163 | #
164 | grammatemes = node[2].find(ns + "gram");
165 | if grammatemes is not None:
166 | for property, default in [("tense", {"nil"}), ("negation", {"neg0"})]:
167 | match = grammatemes.findtext(ns + property);
168 | if match is not None and match not in default:
169 | properties[property] = match;
170 |
171 | graph.add_node(id = i, label = lemma, anchors = anchors,
172 | properties = properties.keys(),
173 | values = properties.values(),
174 | top = node[0] == top.get("id"));
175 |
176 | #
177 | # similarly, record all edges, now using mapped identifiers
178 | #
179 | for source, target, label in edges:
180 | graph.add_edge(mapping[source], mapping[target], label);
181 |
182 | #
183 | # in a second pass (so that all internal identifiers are mapped already),
184 | # create edges reflecting coreference annotations.
185 | #
186 | for node in nodes:
187 | coref = node[2].findtext(ns + "coref_gram.rf");
188 | if coref is not None:
189 | graph.add_edge(mapping[node[0]], mapping[coref], "coref_gram");
190 |
191 | yield graph, None;
192 |
--------------------------------------------------------------------------------
/codec/ucca.py:
--------------------------------------------------------------------------------
1 | import re;
2 | import sys
3 | import xml.etree.ElementTree as ET
4 | from itertools import groupby
5 | from operator import attrgetter;
6 | from pathlib import Path;
7 |
8 | from graph import Graph;
9 | from ucca import core, layer0, layer1, textutil;
10 | from ucca.convert import to_standard
11 | from ucca.ioutil import get_passages;
12 |
13 |
14 | def convert_id(id, prefix):
15 | m = re.search(r'wsj_([0-9]+)\.([0-9]+)', id);
16 | if m:
17 | return "2%04d%03d" % (int(m.group(1)), int(m.group(2)));
18 | elif prefix:
19 | return prefix + id;
20 | else:
21 | return id;
22 |
23 |
24 | def passage2graph(passage, text=None, prefix=None):
25 | graph = Graph(convert_id(passage.ID, prefix), flavor=1, framework="ucca");
26 | l0 = passage.layer(layer0.LAYER_ID);
27 | l1 = passage.layer(layer1.LAYER_ID);
28 | unit_id_to_node_id = {};
29 |
30 | n = None;
31 | if text:
32 | graph.add_input(text);
33 | n = len(graph.input);
34 | i = 0;
35 |
36 | def skip():
37 | nonlocal i;
38 | while i < n and graph.input[i] in {" ", "\t"}:
39 | i += 1;
40 |
41 | def scan(candidates):
42 | for candidate in candidates:
43 | if graph.input.startswith(candidate, i):
44 | return len(candidate);
45 |
46 | def anchor(form):
47 | nonlocal i;
48 | skip();
49 | m = None;
50 | if graph.input.startswith(form, i):
51 | m = len(form);
52 | else:
53 | for old, new in {("‘", "`"), ("’", "'")}:
54 | form = form.replace(old, new);
55 | if graph.input.startswith(form, i):
56 | m = len(form);
57 | break;
58 | if not m:
59 | m = scan({"“", "\"", "``"}) or scan({"‘", "`"}) \
60 | or scan({"”", "\"", "''"}) or scan({"’", "'"}) \
61 | or scan({"—", "—", "---", "--"}) \
62 | or scan({"…", "...", ". . ."});
63 | if m:
64 | anchor = {"from": i, "to": i + m};
65 | i += m;
66 | skip();
67 | return anchor;
68 | else:
69 | raise Exception("{}: failed to anchor |{}| in |{}| ({})"
70 | "".format(graph.id, form, graph.input, i));
71 |
72 | non_terminals = [unit for unit in l1.all if unit.tag in (layer1.NodeTags.Foundational, layer1.NodeTags.Punctuation)]
73 | for token in sorted(l0.all, key=attrgetter("position")):
74 | for unit in non_terminals:
75 | if not unit.attrib.get("implicit"):
76 | for edge in unit:
77 | if "Terminal" in edge.tags and token.ID == edge.child.ID:
78 | if unit.ID in unit_id_to_node_id:
79 | node = graph.find_node(unit_id_to_node_id[unit.ID]);
80 | if graph.input:
81 | node.anchors.append(anchor(token.text));
82 | else:
83 | node = graph.add_node(anchors=[anchor(token.text)] if graph.input else None);
84 | unit_id_to_node_id[unit.ID] = node.id;
85 | for unit in sorted(non_terminals, key=attrgetter("start_position", "end_position")):
86 | if not unit.attrib.get("implicit") and unit.ID not in unit_id_to_node_id:
87 | node = graph.add_node();
88 | unit_id_to_node_id[unit.ID] = node.id;
89 | for unit in non_terminals:
90 | for edge in unit:
91 | for tag in edge.tags:
92 | if tag != "Terminal":
93 | if edge.child.ID in unit_id_to_node_id:
94 | attributes, values = None, None;
95 | if edge.attrib.get("remote"):
96 | attributes = ["remote"];
97 | values = [True];
98 | graph.add_edge(unit_id_to_node_id[unit.ID],
99 | unit_id_to_node_id[edge.child.ID],
100 | tag,
101 | attributes=attributes,
102 | values=values);
103 | else:
104 | #
105 | # quietly ignore edges to implicit nodes
106 | #
107 | pass;
108 | for unit in l1.heads:
109 | node_id = unit_id_to_node_id.get(unit.ID)
110 | if node_id is not None:
111 | graph.nodes[node_id].is_top = True;
112 | return graph
113 |
114 |
115 | def read(fp, text=None, prefix=None):
116 | parent = Path(fp.name).parent;
117 | paths = [parent / file.strip() for file in fp];
118 | for passage in get_passages(map(str, paths)):
119 | try:
120 | graph = passage2graph(passage, text, prefix);
121 | except Exception as exception:
122 | print(exception);
123 | continue;
124 | yield graph, None;
125 |
126 |
127 | def is_punct(node):
128 | for edge in node.incoming_edges or ():
129 | if edge.lab.upper() == "U":
130 | return True
131 | return False
132 |
133 |
134 | def is_remote(edge):
135 | for attribute, value in zip(edge.attributes or (), edge.values or ()):
136 | if attribute == "remote" and value != "false":
137 | return True
138 | return False
139 |
140 |
141 | def is_implicit(node):
142 | for prop, value in zip(node.properties or (), node.values or ()):
143 | if prop == "implicit" and value != "false":
144 | return True
145 | return False
146 |
147 | def is_primary_root(node):
148 | return all(is_remote(edge) for edge in node.incoming_edges)
149 |
150 | def graph2passage(graph, input):
151 | passage = core.Passage(graph.id)
152 | l0 = layer0.Layer0(passage)
153 | anchors = {(anchor["from"], anchor["to"], is_punct(node)) for node in graph.nodes for anchor in node.anchors or ()}
154 | terminals = {(i, j): l0.add_terminal(text=input[i:j], punct=punct) for i, j, punct in sorted(anchors)}
155 |
156 | l1 = layer1.Layer1(passage)
157 | queue = [(node, None if node.is_top else layer1.FoundationalNode(root=l1.root,
158 | tag=layer1.NodeTags.Foundational,
159 | ID=l1.next_id()))
160 | for node in graph.nodes if is_primary_root(node)]
161 |
162 |
163 | id_to_unit = {node.id: unit for (node, unit) in queue}
164 | remotes = []
165 | while queue:
166 | parent, parent_unit = queue.pop(0)
167 | for tgt, edges in groupby(sorted(parent.outgoing_edges, key=attrgetter("tgt")), key=attrgetter("tgt")):
168 | edges = list(edges)
169 | labels = [edge.lab for edge in edges]
170 | if is_remote(edges[0]):
171 | remotes.append((parent_unit, labels, tgt))
172 | else:
173 | child = graph.find_node(tgt)
174 | child_unit = id_to_unit[tgt] = l1.add_fnode_multiple(parent_unit, labels, implicit=is_implicit(child))
175 | queue.append((child, child_unit))
176 | for anchor in parent.anchors or ():
177 | if parent_unit is None: # Terminal children of the root are not valid in UCCA, so warn but be faithful
178 | print("graph2passage(): anchors of the root node converted to Terminal children in ‘{}’."
179 | "".format(graph.id), file=sys.stderr)
180 | parent_unit = l1.heads[0]
181 | parent_unit.add(layer1.EdgeTags.Terminal, terminals[anchor["from"], anchor["to"]])
182 | for parent, labels, tgt in remotes:
183 | l1.add_remote_multiple(parent, labels, id_to_unit[tgt])
184 | return passage
185 |
186 |
187 | def write(graph, input, file):
188 | passage = graph2passage(graph, input)
189 | root = to_standard(passage)
190 | xml_string = ET.tostring(root).decode()
191 | output = textutil.indent_xml(xml_string)
192 | file.write(output)
193 |
--------------------------------------------------------------------------------
/data/sample/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: amr/pdf dm/pdf eds/pdf psd/pdf ucca/pdf \
2 | clean release all
3 |
4 | amr/wsj.mrp: wsj.ids ../wsj.txt amr/wsj.amr
5 | for i in $$(cat wsj.ids); do \
6 | ../../main.py --text ../wsj.txt --read amr \
7 | --id $$i --write mrp ./amr/wsj.amr; \
8 | done > $@;
9 |
10 | amr/pdf:
11 | [ ! -d amr/dot ] && mkdir amr/dot;
12 | [ ! -d amr/pdf ] && mkdir amr/pdf;
13 | for i in $$(cat wsj.ids); do \
14 | ../../main.py --text ../wsj.txt --read amr \
15 | --id $$i --write dot \
16 | ./amr/wsj.amr ./amr/dot/$$i.dot; \
17 | done
18 | rm $$(find ./amr/dot -size 0);
19 | for i in ./amr/dot/*.dot; do \
20 | j=$$(basename $$i .dot); \
21 | dot -Tpdf $$i > ./amr/pdf/$${j}.pdf; \
22 | done
23 |
24 | dm/wsj.mrp: wsj.ids ../wsj.txt dm/wsj.sdp
25 | for i in $$(cat wsj.ids); do \
26 | ../../main.py --text ../wsj.txt --read dm \
27 | --id $$i --write mrp ./dm/wsj.sdp; \
28 | done > $@;
29 |
30 | dm/pdf:
31 | [ ! -d dm/dot ] && mkdir dm/dot;
32 | [ ! -d dm/pdf ] && mkdir dm/pdf;
33 | for i in $$(cat wsj.ids); do \
34 | ../../main.py --text ../wsj.txt --read dm \
35 | --id $$i --write dot \
36 | ./dm/wsj.sdp ./dm/dot/$$i.dot; \
37 | done
38 | for i in ./dm/dot/*.dot; do \
39 | j=$$(basename $$i .dot); \
40 | dot -Tpdf $$i > ./dm/pdf/$${j}.pdf; \
41 | done
42 |
43 | eds/wsj.mrp: wsj.ids ../wsj.txt eds/wsj.eds
44 | for i in $$(cat wsj.ids); do \
45 | ../../main.py --text ../wsj.txt --read eds \
46 | --id $$i --write mrp ./eds/wsj.eds; \
47 | done > $@;
48 |
49 | eds/pdf:
50 | [ ! -d eds/dot ] && mkdir eds/dot;
51 | [ ! -d eds/pdf ] && mkdir eds/pdf;
52 | for i in $$(cat wsj.ids); do \
53 | ../../main.py --text ../wsj.txt --read eds \
54 | --id $$i --write dot \
55 | ./eds/wsj.eds ./eds/dot/$$i.dot; \
56 | done
57 | for i in ./eds/dot/*.dot; do \
58 | j=$$(basename $$i .dot); \
59 | dot -Tpdf $$i > ./eds/pdf/$${j}.pdf; \
60 | done
61 |
62 | psd/wsj.mrp: wsj.ids ../wsj.txt psd/wsj.sdp
63 | for i in $$(cat wsj.ids); do \
64 | ../../main.py --text ../wsj.txt --read psd \
65 | --id $$i --write mrp ./psd/wsj.sdp; \
66 | done > $@;
67 |
68 | psd/pdf:
69 | [ ! -d psd/dot ] && mkdir psd/dot;
70 | [ ! -d psd/pdf ] && mkdir psd/pdf;
71 | for i in $$(cat wsj.ids); do \
72 | ../../main.py --text ../wsj.txt --read dm \
73 | --id $$i --write dot \
74 | ./psd/wsj.sdp ./psd/dot/$$i.dot; \
75 | done
76 | for i in ./psd/dot/*.dot; do \
77 | j=$$(basename $$i .dot); \
78 | dot -Tpdf $$i > ./psd/pdf/$${j}.pdf; \
79 | done
80 |
81 | ucca/wsj.mrp: wsj.ids ../wsj.txt ucca/xml/files.txt ucca/xml/*.xml
82 | for i in $$(cat wsj.ids); do \
83 | ../../main.py --text ../wsj.txt --read ucca \
84 | --id $$i --write mrp ./ucca/xml/files.txt; \
85 | done > $@;
86 |
87 | ucca/pdf:
88 | [ ! -d ucca/dot ] && mkdir ucca/dot;
89 | [ ! -d ucca/pdf ] && mkdir ucca/pdf;
90 | for i in $$(cat wsj.ids); do \
91 | ../../main.py --text ../wsj.txt --read ucca \
92 | --id $$i --write dot --strings \
93 | ./ucca/xml/files.txt ./ucca/dot/$$i.dot; \
94 | done
95 | rm $$(find ./ucca/dot -size 0);
96 | for i in ./ucca/dot/*.dot; do \
97 | j=$$(basename $$i .dot); \
98 | dot -Tpdf $$i > ./ucca/pdf/$${j}.pdf; \
99 | done
100 |
101 | clean:
102 | rm */wsj.mrp */dot/*.dot */pdf/*pdf
103 |
104 | release:
105 | tar zpScvf ../public/sample.tgz --transform='s@^@mrp/2019/sample/@'\
106 | README.txt Makefile \
107 | amr/wsj.mrp dm/wsj.mrp eds/wsj.mrp psd/wsj.mrp ucca/wsj.mrp \
108 | amr/dot amr/pdf dm/dot dm/pdf eds/dot eds/pdf \
109 | psd/dot psd/pdf ucca/dot ucca/pdf
110 |
111 | all: amr/wsj.mrp dm/wsj.mrp eds/wsj.mrp psd/wsj.mrp ucca/wsj.mrp
112 |
113 |
--------------------------------------------------------------------------------
/data/sample/README.txt:
--------------------------------------------------------------------------------
1 |
2 | CoNLL 2019 Shared Task: Meaning Representation Parsing --- Sample Graphs
3 |
4 | Version 0.9; April 9, 2019
5 |
6 |
7 | Overview
8 | ========
9 |
10 | This directory contains a collection of 89 sample graphs in the five framworks
11 | represented in the task: AMR, DM, EDS, PSD, and UCCA. The sentences are drawn
12 | from Section 00 of (the Penn Treebank selection from) the venerable Wall Street
13 | Journal (WSJ) Corpus. We only include sentences for which all five graph banks
14 | provide annotations.
15 |
16 | The purpose of this sample data is twofold: (a) exemplify the uniform graph
17 | representation format (serialized in JSON) adopted for the task and (b) enable
18 | in-depth linguistic comparison across frameworks.
19 |
20 | For general information on the file format, please see:
21 |
22 | http://mrp.nlpl.eu/index.php?page=4#format
23 |
24 |
25 | Contents
26 | ========
27 |
28 | The main contents in this release are the JSON files:
29 |
30 | $ ls -l */*.mrp
31 | -rw-r--r--. 1 oe oe 145935 Apr 8 00:11 amr/wsj.mrp
32 | -rw-r--r--. 1 oe oe 290495 Apr 8 00:12 dm/wsj.mrp
33 | -rw-r--r--. 1 oe oe 334885 Apr 8 00:13 eds/wsj.mrp
34 | -rw-r--r--. 1 oe oe 225669 Apr 8 00:14 psd/wsj.mrp
35 | -rw-r--r--. 1 oe oe 254101 Apr 9 16:07 ucca/wsj.mrp
36 |
37 | Each file contains the 89 graphs in the intersection of all frameworks (87 in
38 | the case for UCCA, for the time being). These graph serializations are in what
39 | is called the JSON Lines format, effectively a stream of JSON objects with line
40 | breaks as the separator character between objects.
41 |
42 | To ease human inspection of these graphs, this package also provides graphical
43 | renderings of all graphs, as separate files (one per sentence) in the ‘dot/’
44 | and ‘pdf/’ sub-directories for each framework. These visualizations have been
45 | created using the MRP graph toolkit, which will be released by mid-May 2019.
46 |
47 |
48 | Known Limitations
49 | =================
50 |
51 | None, for the time being.
52 |
53 |
54 | Release History
55 | ===============
56 |
57 | [Version 0.9; April 9, 2018]
58 |
59 | + First release of sample graphs in five frameworks: AMR, DM, EDS, UCCA, and PSD.
60 |
61 |
62 | Contact
63 | =======
64 |
65 | For questions or comments, please do not hesitate to email the task organizers
66 | at: ‘mrp-organizers@nlpl.eu’.
67 |
68 | Omri Abend
69 | Jan Hajič
70 | Daniel Hershcovich
71 | Marco Kuhlmann
72 | Stephan Oepen
73 | Tim O'Gorman
74 | Nianwen Xue
75 |
--------------------------------------------------------------------------------
/data/sample/ucca/xml/files.txt:
--------------------------------------------------------------------------------
1 | wsj_0001.1.xml
2 | wsj_0001.2.xml
3 | wsj_0002.1.xml
4 | wsj_0003.1.xml
5 | wsj_0003.2.xml
6 | wsj_0003.3.xml
7 | wsj_0003.4.xml
8 | wsj_0003.5.xml
9 | wsj_0003.7.xml
10 | wsj_0003.8.xml
11 | wsj_0003.9.xml
12 | wsj_0003.10.xml
13 | wsj_0003.11.xml
14 | wsj_0003.12.xml
15 | wsj_0003.13.xml
16 | wsj_0003.14.xml
17 | wsj_0003.15.xml
18 | wsj_0003.16.xml
19 | wsj_0003.17.xml
20 | wsj_0003.18.xml
21 | wsj_0003.19.xml
22 | wsj_0003.20.xml
23 | wsj_0003.21.xml
24 | wsj_0003.22.xml
25 | wsj_0003.23.xml
26 | wsj_0003.24.xml
27 | wsj_0003.25.xml
28 | wsj_0003.26.xml
29 | wsj_0003.27.xml
30 | wsj_0003.28.xml
31 | wsj_0003.29.xml
32 | wsj_0003.30.xml
33 | wsj_0004.1.xml
34 | wsj_0004.2.xml
35 | wsj_0004.4.xml
36 | wsj_0004.5.xml
37 | wsj_0004.6.xml
38 | wsj_0004.7.xml
39 | wsj_0004.8.xml
40 | wsj_0004.9.xml
41 | wsj_0004.10.xml
42 | wsj_0004.11.xml
43 | wsj_0004.12.xml
44 | wsj_0004.14.xml
45 | wsj_0004.15.xml
46 | wsj_0004.16.xml
47 | wsj_0004.17.xml
48 | wsj_0005.1.xml
49 | wsj_0005.2.xml
50 | wsj_0005.3.xml
51 | wsj_0007.1.xml
52 | wsj_0007.2.xml
53 | wsj_0007.3.xml
54 | wsj_0007.4.xml
55 | wsj_0008.1.xml
56 | wsj_0008.2.xml
57 | wsj_0008.3.xml
58 | wsj_0008.4.xml
59 | wsj_0008.5.xml
60 | wsj_0008.6.xml
61 | wsj_0009.1.xml
62 | wsj_0009.2.xml
63 | wsj_0009.3.xml
64 | wsj_0009.4.xml
65 | wsj_0010.1.xml
66 | wsj_0010.2.xml
67 | wsj_0010.3.xml
68 | wsj_0010.6.xml
69 | wsj_0010.7.xml
70 | wsj_0010.8.xml
71 | wsj_0010.10.xml
72 | wsj_0010.11.xml
73 | wsj_0010.12.xml
74 | wsj_0010.13.xml
75 | wsj_0010.15.xml
76 | wsj_0010.16.xml
77 | wsj_0010.17.xml
78 | wsj_0010.18.xml
79 | wsj_0010.19.xml
80 | wsj_0010.20.xml
81 | wsj_0011.1.xml
82 | wsj_0011.2.xml
83 | wsj_0011.4.xml
84 | wsj_0011.5.xml
85 | wsj_0011.6.xml
86 | wsj_0011.7.xml
87 | wsj_0011.8.xml
88 | wsj_0012.1.xml
89 | wsj_0012.2.xml
90 | wsj_0012.3.xml
91 | wsj_0012.4.xml
92 | wsj_0012.5.xml
--------------------------------------------------------------------------------
/data/sample/ucca/xml/wsj_0001.2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
--------------------------------------------------------------------------------
/data/sample/ucca/xml/wsj_0003.25.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
--------------------------------------------------------------------------------
/data/sample/ucca/xml/wsj_0003.30.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
--------------------------------------------------------------------------------
/data/sample/ucca/xml/wsj_0003.7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
--------------------------------------------------------------------------------
/data/sample/ucca/xml/wsj_0005.3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
--------------------------------------------------------------------------------
/data/sample/ucca/xml/wsj_0010.2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
--------------------------------------------------------------------------------
/data/sample/ucca/xml/wsj_0010.8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
--------------------------------------------------------------------------------
/data/sample/ucca/xml/wsj_0011.8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
--------------------------------------------------------------------------------
/data/sample/wsj.ids:
--------------------------------------------------------------------------------
1 | 20001001
2 | 20001002
3 | 20003001
4 | 20003002
5 | 20003003
6 | 20003005
7 | 20003007
8 | 20003008
9 | 20003009
10 | 20003010
11 | 20003011
12 | 20003012
13 | 20003013
14 | 20003014
15 | 20003015
16 | 20003016
17 | 20003017
18 | 20003018
19 | 20003019
20 | 20003020
21 | 20003021
22 | 20003022
23 | 20003023
24 | 20003024
25 | 20003025
26 | 20003026
27 | 20003027
28 | 20003028
29 | 20003029
30 | 20003030
31 | 20004001
32 | 20004002
33 | 20004004
34 | 20004005
35 | 20004006
36 | 20004007
37 | 20004008
38 | 20004009
39 | 20004010
40 | 20004011
41 | 20004012
42 | 20004014
43 | 20004015
44 | 20004016
45 | 20004017
46 | 20005001
47 | 20005002
48 | 20005003
49 | 20006001
50 | 20006002
51 | 20007002
52 | 20007003
53 | 20007004
54 | 20008001
55 | 20008002
56 | 20008003
57 | 20008004
58 | 20008005
59 | 20008006
60 | 20009001
61 | 20009002
62 | 20009003
63 | 20009004
64 | 20010001
65 | 20010002
66 | 20010003
67 | 20010006
68 | 20010007
69 | 20010008
70 | 20010010
71 | 20010011
72 | 20010012
73 | 20010013
74 | 20010015
75 | 20010016
76 | 20010017
77 | 20010018
78 | 20010019
79 | 20010020
80 | 20011001
81 | 20011002
82 | 20011004
83 | 20011005
84 | 20011006
85 | 20011007
86 | 20011008
87 | 20012002
88 | 20012004
89 | 20012005
90 |
--------------------------------------------------------------------------------
/data/score/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: dm.edm.json eds.edm.json eds.smatch.json eds.mrp.json \
2 | dm.sdp.json peking.sdp.json peking.smatch.json peking.mrp.json \
3 | ucca.ucca.json ucca.smatch.json ucca.mrp.json \
4 | test.smatch.json coli.smatch.json coli.mrp.json \
5 | clean oe all
6 |
7 | TRACE ?= --trace --trace
8 |
9 | dm.edm.json:
10 | time python3 -u ../../main.py $(TRACE) --score edm \
11 | --read mrp --gold ../sample/eds/wsj.mrp \
12 | ../sample/dm/wsj.mrp $@ 2>&1 | tee dm.edm.log
13 |
14 | eds.edm.json:
15 | time python3 -u ../../main.py $(TRACE) --score edm \
16 | --read mrp --gold ../sample/eds/wsj.mrp \
17 | eds/wsj.pet.mrp $@ 2>&1 | tee eds.edm.log
18 |
19 | eds.smatch.json:
20 | time python3 -u ../../main.py $(TRACE) --score smatch \
21 | --read mrp --gold ../sample/eds/wsj.mrp \
22 | eds/wsj.pet.mrp $@ 2>&1 | tee eds.smatch.log
23 |
24 | eds.mrp.json:
25 | time python3 -u ../../main.py $(TRACE) --score mrp \
26 | --read mrp --gold ../sample/eds/wsj.mrp \
27 | eds/wsj.pet.mrp $@ 2>&1 | tee eds.mrp.log
28 |
29 | dm.sdp.json:
30 | time python3 -u ../../main.py $(TRACE) --score sdp \
31 | --read mrp --gold ../sample/psd/wsj.mrp \
32 | ../sample/dm/wsj.mrp $@ 2>&1 | tee dm.sdp.log
33 |
34 | peking.sdp.json:
35 | time python3 -u ../../main.py $(TRACE) --score sdp --text ../wsj.txt \
36 | --format dm --gold ../../../evaluation/dm/wsj.sdp \
37 | --read dm dm/peking.wsj.sdp $@ 2>&1 | tee peking.sdp.log
38 |
39 | peking.smatch.json:
40 | time python3 -u ../../main.py $(TRACE) --score smatch --text ../wsj.txt \
41 | --format dm --gold ../../../evaluation/dm/wsj.sdp \
42 | --read dm dm/peking.wsj.sdp $@ 2>&1 | tee peking.smatch.log
43 |
44 | peking.mrp.json:
45 | time python3 -u ../../main.py $(TRACE) --score mrp --text ../wsj.txt \
46 | --format dm --gold ../../../evaluation/dm/wsj.sdp \
47 | --read dm dm/peking.wsj.sdp $@ 2>&1 | tee peking.mrp.log
48 |
49 | ucca.ucca.json:
50 | time python3 -u ../../main.py --n 500 $(TRACE) --score ucca \
51 | --read mrp --gold ucca/ewt.gold.mrp \
52 | ucca/ewt.tupa.mrp $@ 2>&1 | tee ucca.ucca.log
53 |
54 | ucca.smatch.json:
55 | time python3 -u ../../main.py --n 500 $(TRACE) --score smatch \
56 | --read mrp --gold ucca/ewt.gold.mrp \
57 | ucca/ewt.tupa.mrp $@ 2>&1 | tee ucca.smatch.log
58 |
59 | ucca.mrp.json:
60 | time python3 -u ../../main.py --n 500 $(TRACE) --score mrp \
61 | --read mrp --gold ucca/ewt.gold.mrp \
62 | ucca/ewt.tupa.mrp $@ 2>&1 | tee ucca.mrp.log
63 |
64 | empty.mrp.json:
65 | time python3 -u ../../main.py $(TRACE) --score mrp \
66 | --read mrp --gold dm/empty.gold.mrp \
67 | dm/empty.peking.mrp $@ 2>&1 | tee empty.mrp.log
68 |
69 | partial.mrp.json:
70 | time python3 -u ../../main.py $(TRACE) --score mrp \
71 | --read mrp --gold amr/partial.gold.mrp \
72 | amr/partial.system.mrp $@ 2>&1 | tee partial.mrp.log
73 |
74 | test.smatch.json:
75 | time python3 -u ../../main.py $(TRACE) --score smatch --limit 50 \
76 | --read amr --gold amr/test1.amr \
77 | amr/test2.amr $@ 2>&1 | tee test.smatch.log
78 |
79 | test.mrp.json:
80 | time python3 -u ../../main.py $(TRACE) --score mrp --limit 50 \
81 | --read amr --gold amr/test1.amr \
82 | amr/test2.amr $@ 2>&1 | tee test.mrp.log
83 |
84 | coli.smatch.json:
85 | time python3 -u ../../main.py --n 500 $(TRACE) --score smatch \
86 | --read amr --gold amr/coli.gold.amr \
87 | amr/coli.system.amr $@ 2>&1 | tee coli.smatch.log
88 |
89 | coli.mrp.json:
90 | time python3 -u ../../main.py --n 500 $(TRACE) --score mrp \
91 | --read amr --gold amr/coli.gold.amr \
92 | amr/coli.system.amr $@ 2>&1 | tee coli.mrp.log
93 |
94 | clean:
95 | /bin/rm *.json *.log
96 |
97 | oe:
98 | make -j 4 $$(egrep '^[a-z/.]*.json:' Makefile | sed 's/://');
99 |
100 | unit: empty.mrp.json partial.mrp.json
101 |
102 | all: dm.edm.json eds.edm.json eds.smatch.json eds.mrp.json \
103 | dm.sdp.json peking.sdp.json peking.smatch.json peking.mrp.json \
104 | ucca.ucca.json ucca.smatch.json ucca.mrp.json \
105 | empty.mrp.json test.smatch.json test.mrp.json \
106 | coli.smatch.json coli.mrp.json
107 |
--------------------------------------------------------------------------------
/data/score/amr/233.gold.amr:
--------------------------------------------------------------------------------
1 | (j / join-up-02 :ARG0 (c / country :name (n / name :op1 "U.S.") :mod (p2 / person :ARG0-of (o / observe-01))) :ARG1 (p / project))
2 |
3 |
--------------------------------------------------------------------------------
/data/score/amr/233.gold.dot:
--------------------------------------------------------------------------------
1 | digraph "233" {
2 | top [ style=invis ];
3 | top -> 0;
4 | 0 [ label=<
> ];
5 | 1 [ label=<> ];
6 | 2 [ label=<> ];
7 | 3 [ label=<> ];
8 | 4 [ label=<> ];
9 | 5 [ label=<> ];
10 | 0 -> 1 [ label="ARG0" ];
11 | 3 -> 4 [ label="(ARG0)-of" ];
12 | 1 -> 2 [ label="name" ];
13 | 1 -> 3 [ label="mod (domain)" ];
14 | 0 -> 5 [ label="ARG1" ];
15 | }
16 |
17 |
--------------------------------------------------------------------------------
/data/score/amr/233.gold.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/data/score/amr/233.gold.pdf
--------------------------------------------------------------------------------
/data/score/amr/233.system.amr:
--------------------------------------------------------------------------------
1 | (f / join-up-02 :ARG1 (e / project) :prep-as (u_1104 / observe-01 :ARG0 (c4 / country :name (n2 / name :op1 "U.S.") :ARG0-of f)))
2 |
3 |
--------------------------------------------------------------------------------
/data/score/amr/233.system.dot:
--------------------------------------------------------------------------------
1 | digraph "233" {
2 | top [ style=invis ];
3 | top -> 0;
4 | 0 [ label=<> ];
5 | 1 [ label=<> ];
6 | 2 [ label=<> ];
7 | 3 [ label=<> ];
8 | 4 [ label=<> ];
9 | 0 -> 1 [ label="ARG1" ];
10 | 3 -> 4 [ label="name" ];
11 | 0 -> 2 [ label="prep-as" ];
12 | 2 -> 3 [ label="ARG0" ];
13 | 3 -> 0 [ label="(ARG0)-of" ];
14 | }
15 |
16 |
--------------------------------------------------------------------------------
/data/score/amr/233.system.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/data/score/amr/233.system.pdf
--------------------------------------------------------------------------------
/data/score/amr/first.gold.amr:
--------------------------------------------------------------------------------
1 | (c / claim-01 :ARG0 (p / partisan :poss (p2 / person :name (n / name :op1 "Ronald" :op2 "Reagan"))) :ARG1 (w / win-01 :ARG0 p2 :ARG2 (w2 / war :name (n2 / name :op1 "Cold" :op2 "War"))) :time (c2 / collapse-01 :ARG1 (c3 / country :name (n3 / name :op1 "Soviet" :op2 "Union")) :time (d / date-entity :year 1991)))
2 |
3 |
--------------------------------------------------------------------------------
/data/score/amr/first.system.amr:
--------------------------------------------------------------------------------
1 | (f / claim-01 :ARG0 (u_2 / person :ARG0-of (o / partisan :ARG1 (p / person :name (n / name :op1 (explicitanon3 / Ronald :year-of (d / date-entity :time-of (s3 / collapse-01 :ARG1 (c4 / country :name (n2 / name :op1 "Soviet" :op2 "Union")) :time-of f))) :op2 "Reagan")) :ARG0-of (a2 / win-01 :ARG2 (e / war-01 :mod (u_1 / cold)) :ARG1-of f)))
2 |
3 |
--------------------------------------------------------------------------------
/data/score/amr/partial.gold.mrp:
--------------------------------------------------------------------------------
1 | {"edges":[{"label":"ARG1","source":1,"target":2},{"label":"op2","source":0,"target":3},{"label":"ARG1","source":3,"target":4},{"label":"op1","source":0,"target":1}],"flavor":2,"framework":"amr","id":"bolt-eng-DF-170-181103-8882762_0111.33","input":"Lowering wages/Breaking Unions.","nodes":[{"id":0,"label":"slash"},{"id":1,"label":"lower-05"},{"id":2,"label":"wage"},{"id":3,"label":"break-01"},{"id":4,"label":"union"}],"time":"2019-04-10 (20:10)","tops":[0],"version":"0.9"}
2 |
--------------------------------------------------------------------------------
/data/score/amr/partial.system.mrp:
--------------------------------------------------------------------------------
1 | {"edges":[{"label":"ARG1","source":1,"target":2},{"label":"op2","source":0,"target":3},{"label":"ARG1","source":3,"target":4},{"label":"op1","source":0,"target":1}],"flavor":2,"framework":"amr","id":"bolt-eng-DF-170-181103-8882762_0111.33","input":"Lowering wages/Breaking Unions.","nodes":[{"id":0,"label":"slash"},{"id":1,"label":"lower-05"},{"id":2,"label":"wage"},{"id":3,"label":"break-01", "anchors" : []},{"id":4,"label":"union"}],"time":"2019-04-10 (20:10)","tops":[0],"version":"0.9"}
2 |
--------------------------------------------------------------------------------
/data/score/amr/test1.amr:
--------------------------------------------------------------------------------
1 | # ::id isi_0001.1 ::date 2012-05-14T21:45:29
2 | # ::snt The boy wants the girl to believe him.
3 | (w / want-01
4 | :ARG0 (b / boy)
5 | :ARG1 (b2 / believe-01
6 | :ARG0 (g / girl)
7 | :ARG1 b))
8 |
9 | # ::id isi_0001.25 ::date 2012-05-14T21:59:17
10 | # ::snt The boy is a hard worker.
11 | (p / person
12 | :domain (b / boy)
13 | :ARG0-of (w / work-01
14 | :manner (h / hard)))
15 |
16 | # ::id isi_0002.209 ::date 2013-05-16T17:19:07
17 | # ::snt The poet William Shakespeare was born in Stratford-upon-Avon.
18 | (b / bear-02
19 | :ARG1 (p / poet :name (n / name :op1 "William" :op2 "Shakespeare"))
20 | :location (c / city :name (n2 / name :op1 "Stratford-upon-Avon")))
21 |
22 |
--------------------------------------------------------------------------------
/data/score/amr/test1.mrp:
--------------------------------------------------------------------------------
1 | {"id": "isi_0001.1", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:13)", "input": "The boy wants the girl to believe him.", "tops": [0], "nodes": [{"id": 0, "label": "want-01"}, {"id": 1, "label": "boy"}, {"id": 2, "label": "believe-01"}, {"id": 3, "label": "girl"}], "edges": [{"source": 2, "target": 3, "label": "ARG0"}, {"source": 2, "target": 1, "label": "ARG1"}, {"source": 0, "target": 1, "label": "ARG0"}, {"source": 0, "target": 2, "label": "ARG1"}]}
2 | {"id": "isi_0001.25", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:13)", "input": "The boy is a hard worker.", "tops": [0], "nodes": [{"id": 0, "label": "person"}, {"id": 1, "label": "boy"}, {"id": 2, "label": "work-01"}, {"id": 3, "label": "hard"}], "edges": [{"source": 0, "target": 1, "label": "domain"}, {"source": 2, "target": 3, "label": "manner"}, {"source": 0, "target": 2, "label": "ARG0-of", "normal": "ARG0"}]}
3 | {"id": "isi_0002.209", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:13)", "input": "The poet William Shakespeare was born in Stratford-upon-Avon.", "tops": [0], "nodes": [{"id": 0, "label": "bear-02"}, {"id": 1, "label": "poet"}, {"id": 2, "label": "name", "properties": ["op1", "op2"], "values": ["William", "Shakespeare"]}, {"id": 3, "label": "city"}, {"id": 4, "label": "name", "properties": ["op1"], "values": ["Stratford-upon-Avon"]}], "edges": [{"source": 0, "target": 3, "label": "location"}, {"source": 3, "target": 4, "label": "name"}, {"source": 1, "target": 2, "label": "name"}, {"source": 0, "target": 1, "label": "ARG1"}]}
4 |
--------------------------------------------------------------------------------
/data/score/amr/test2.amr:
--------------------------------------------------------------------------------
1 | # ::id isi_0001.1 ::date 2012-05-14T21:45:29
2 | # ::snt The boy wants the girl to believe him.
3 | (w / want-01
4 | :ARG0 (b / boy)
5 | :ARG1 (b2 / believe-01
6 | :ARG0 (g / girl)
7 | :ARG1 (h / he)))
8 |
9 | # ::id isi_0001.25 ::date 2012-05-14T21:59:17
10 | # ::snt The boy is a hard worker.
11 | (w / worker
12 | :mod (h / hard)
13 | :domain (b / boy))
14 |
15 | # ::id isi_0002.209 ::date 2013-05-16T17:19:07
16 | # ::snt The poet William Shakespeare was born in Stratford-upon-Avon.
17 | (b / bear-02
18 | :ARG1 (p / poet :name (n / name :op1 william :op2 "shakespeare"))
19 | :location (c / city :name (n2 / name :op1 "Stratford-upon-Avon")))
20 |
21 |
--------------------------------------------------------------------------------
/data/score/amr/test2.mrp:
--------------------------------------------------------------------------------
1 | {"id": "isi_0001.1", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:16)", "input": "The boy wants the girl to believe him.", "tops": [0], "nodes": [{"id": 0, "label": "want-01"}, {"id": 1, "label": "boy"}, {"id": 2, "label": "believe-01"}, {"id": 3, "label": "girl"}, {"id": 4, "label": "he"}], "edges": [{"source": 0, "target": 1, "label": "ARG0"}, {"source": 2, "target": 3, "label": "ARG0"}, {"source": 2, "target": 4, "label": "ARG1"}, {"source": 0, "target": 2, "label": "ARG1"}]}
2 | {"id": "isi_0001.25", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:16)", "input": "The boy is a hard worker.", "tops": [0], "nodes": [{"id": 0, "label": "worker"}, {"id": 1, "label": "hard"}, {"id": 2, "label": "boy"}], "edges": [{"source": 0, "target": 2, "label": "domain"}, {"source": 0, "target": 1, "label": "mod", "normal": "domain"}]}
3 | {"id": "isi_0002.209", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:16)", "input": "The poet William Shakespeare was born in Stratford-upon-Avon.", "tops": [0], "nodes": [{"id": 0, "label": "bear-02"}, {"id": 1, "label": "poet"}, {"id": 2, "label": "name", "properties": ["op1", "op2"], "values": ["william", "shakespeare"]}, {"id": 3, "label": "city"}, {"id": 4, "label": "name", "properties": ["op1"], "values": ["Stratford-upon-Avon"]}], "edges": [{"source": 3, "target": 4, "label": "name"}, {"source": 1, "target": 2, "label": "name"}, {"source": 0, "target": 3, "label": "location"}, {"source": 0, "target": 1, "label": "ARG1"}]}
4 |
--------------------------------------------------------------------------------
/data/score/dm/empty.gold.mrp:
--------------------------------------------------------------------------------
1 | {"id": "22100001", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-06-23", "input": "Consumers may want to move their telephones a little closer to the TV set.", "nodes": [], "edges": []}
2 | {"id": "22100002", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-06-23", "input": "Couch-potato jocks watching ABC's \"Monday Night Football\" can now vote during halftime for the greatest play in 20 years from among four or five filmed replays.", "tops": [], "nodes": null, "edges": null}
3 | {"id": "22100003", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-06-23", "input": "Two weeks ago, viewers of several NBC daytime consumer segments started calling a 900 number for advice on various life-style issues.", "tops": [11], "nodes": [{"id": 0, "label": "two", "properties": ["pos", "frame"], "values": ["CD", "card:i-i-c"], "anchors": [{"from": 0, "to": 3}]}, {"id": 1, "label": "week", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 4, "to": 9}]}, {"id": 2, "label": "ago", "properties": ["pos", "frame"], "values": ["RB", "p:e-i-u"], "anchors": [{"from": 10, "to": 13}]}, {"id": 4, "label": "viewer", "properties": ["pos", "frame"], "values": ["NNS", "n_of:x-i"], "anchors": [{"from": 15, "to": 22}]}, {"id": 6, "label": "several", "properties": ["pos", "frame"], "values": ["JJ", "a:e-p"], "anchors": [{"from": 26, "to": 33}]}, {"id": 7, "label": "NBC", "properties": ["pos", "frame"], "values": ["NNP", "named:x-c"], "anchors": [{"from": 34, "to": 37}]}, {"id": 8, "label": "daytime", "properties": ["pos", "frame"], "values": ["JJ", "n:x"], "anchors": [{"from": 38, "to": 45}]}, {"id": 9, "label": "consumer", "properties": ["pos", "frame"], "values": ["NN", "n_of:x-i"], "anchors": [{"from": 46, "to": 54}]}, {"id": 10, "label": "segment", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 55, "to": 63}]}, {"id": 11, "label": "start", "properties": ["pos", "frame"], "values": ["VBD", "v:e-h"], "anchors": [{"from": 64, "to": 71}]}, {"id": 12, "label": "call", "properties": ["pos", "frame"], "values": ["VBG", "v:e-i-p"], "anchors": [{"from": 72, "to": 79}]}, {"id": 13, "label": "a", "properties": ["pos", "frame"], "values": ["DT", "q:i-h-h"], "anchors": [{"from": 80, "to": 81}]}, {"id": 14, "label": "900", "properties": ["pos", "frame"], "values": ["CD", "card:i-i-c"], "anchors": [{"from": 82, "to": 85}]}, {"id": 15, "label": "number", "properties": ["pos", "frame"], "values": ["NN", "n_of:x"], "anchors": [{"from": 86, "to": 92}]}, {"id": 16, "label": "for", "properties": ["pos", "frame"], "values": ["IN", "p:e-u-i"], "anchors": [{"from": 93, "to": 96}]}, {"id": 17, "label": "advice", "properties": ["pos", "frame"], "values": ["NN", "n:x"], "anchors": [{"from": 97, "to": 103}]}, {"id": 18, "label": "on", "properties": ["pos", "frame"], "values": ["IN", "p:e-u-i"], "anchors": [{"from": 104, "to": 106}]}, {"id": 19, "label": "various", "properties": ["pos", "frame"], "values": ["JJ", "a:e-p"], "anchors": [{"from": 107, "to": 114}]}, {"id": 20, "label": "style", "properties": ["pos", "frame"], "values": ["NN", "n_of:x"], "anchors": [{"from": 115, "to": 125}]}, {"id": 21, "label": "issue", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 126, "to": 132}]}], "edges": [{"source": 2, "target": 11, "label": "ARG1"}, {"source": 8, "target": 10, "label": "compound"}, {"source": 16, "target": 12, "label": "ARG1"}, {"source": 13, "target": 15, "label": "BV"}, {"source": 0, "target": 1, "label": "ARG1"}, {"source": 9, "target": 10, "label": "compound"}, {"source": 14, "target": 15, "label": "ARG1"}, {"source": 12, "target": 4, "label": "ARG1"}, {"source": 18, "target": 17, "label": "ARG1"}, {"source": 2, "target": 1, "label": "ARG2"}, {"source": 12, "target": 15, "label": "ARG2"}, {"source": 19, "target": 21, "label": "ARG1"}, {"source": 11, "target": 12, "label": "ARG1"}, {"source": 6, "target": 10, "label": "ARG1"}, {"source": 20, "target": 21, "label": "compound"}, {"source": 4, "target": 10, "label": "ARG1"}, {"source": 16, "target": 17, "label": "ARG2"}, {"source": 7, "target": 10, "label": "compound"}, {"source": 18, "target": 21, "label": "ARG2"}]}
4 |
--------------------------------------------------------------------------------
/data/score/dm/empty.peking.mrp:
--------------------------------------------------------------------------------
1 | {"id": "22100001", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-07-05", "input": "Consumers may want to move their telephones a little closer to the TV set.", "tops": [13], "nodes": [{"id": 0, "label": "consumer", "properties": ["pos", "frame"], "values": ["NNS", "n_of:x-i"], "anchors": [{"from": 0, "to": 9}]}, {"id": 1, "label": "may", "properties": ["pos", "frame"], "values": ["MD", "v_modal:e-h"], "anchors": [{"from": 10, "to": 13}]}, {"id": 2, "label": "want", "properties": ["pos", "frame"], "values": ["VB", "v:e-i-h"], "anchors": [{"from": 14, "to": 18}]}, {"id": 4, "label": "move", "properties": ["pos", "frame"], "values": ["VB", "v_cause:e-i-p"], "anchors": [{"from": 22, "to": 26}]}, {"id": 5, "label": "their", "properties": ["pos", "frame"], "values": ["PRP$", "q:i-h-h"], "anchors": [{"from": 27, "to": 32}]}, {"id": 6, "label": "telephone", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 33, "to": 43}]}, {"id": 7, "label": "a+little", "properties": ["pos", "frame"], "values": ["DT", "x:e-u"], "anchors": [{"from": 44, "to": 45}]}, {"id": 8, "label": "a+little", "properties": ["pos", "frame"], "values": ["JJ", "x:e-u"], "anchors": [{"from": 46, "to": 52}]}, {"id": 9, "label": "closer", "properties": ["pos", "frame"], "values": ["RBR", "a_to:e-i-i"], "anchors": [{"from": 53, "to": 59}]}, {"id": 11, "label": "the", "properties": ["pos", "frame"], "values": ["DT", "q:i-h-h"], "anchors": [{"from": 63, "to": 66}]}, {"id": 12, "label": "tv", "properties": ["pos", "frame"], "values": ["NN", "n:x"], "anchors": [{"from": 67, "to": 69}]}, {"id": 13, "label": "set", "properties": ["pos", "frame"], "values": ["VBD", "v:e-i-p"], "anchors": [{"from": 70, "to": 73}]}], "edges": [{"source": 13, "target": 1, "label": "ARG2"}, {"source": 5, "target": 6, "label": "poss"}, {"source": 4, "target": 6, "label": "ARG2"}, {"source": 8, "target": 7, "label": "mwe"}, {"source": 2, "target": 0, "label": "ARG1"}, {"source": 11, "target": 12, "label": "BV"}, {"source": 13, "target": 12, "label": "ARG1"}, {"source": 8, "target": 9, "label": "ARG1"}, {"source": 2, "target": 4, "label": "ARG2"}, {"source": 4, "target": 9, "label": "ARG3"}, {"source": 1, "target": 2, "label": "ARG1"}]}
2 | {"id": "22100002", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-07-05", "input": "Couch-potato jocks watching ABC's \"Monday Night Football\" can now vote during halftime for the greatest play in 20 years from among four or five filmed replays.", "tops": [10], "nodes": [{"id": 0, "label": "potato", "properties": ["pos", "frame"], "values": ["NN", "n:x"], "anchors": [{"from": 0, "to": 12}]}, {"id": 1, "label": "_generic_nns_", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 13, "to": 18}]}, {"id": 2, "label": "watch", "properties": ["pos", "frame"], "values": ["VBG", "v:e-i-p"], "anchors": [{"from": 19, "to": 27}]}, {"id": 6, "label": "Monday", "properties": ["pos", "frame"], "values": ["NNP", "dofw:x-c"], "anchors": [{"from": 35, "to": 41}]}, {"id": 7, "label": "night", "properties": ["pos", "frame"], "values": ["NNP", "n_of:x"], "anchors": [{"from": 42, "to": 47}]}, {"id": 8, "label": "_generic_proper_ne_", "properties": ["pos", "frame"], "values": ["NNP", "named:x-c"], "anchors": [{"from": 48, "to": 56}]}, {"id": 10, "label": "can", "properties": ["pos", "frame"], "values": ["MD", "v_modal:e-h"], "anchors": [{"from": 58, "to": 61}]}, {"id": 11, "label": "now", "properties": ["pos", "frame"], "values": ["RB", "a:e-e"], "anchors": [{"from": 62, "to": 65}]}, {"id": 12, "label": "vote", "properties": ["pos", "frame"], "values": ["VB", "v:e-i"], "anchors": [{"from": 66, "to": 70}]}, {"id": 13, "label": "during", "properties": ["pos", "frame"], "values": ["IN", "p:e-u-i"], "anchors": [{"from": 71, "to": 77}]}, {"id": 14, "label": "_generic_nn_", "properties": ["pos", "frame"], "values": ["NN", "n:x"], "anchors": [{"from": 78, "to": 86}]}, {"id": 15, "label": "for", "properties": ["pos", "frame"], "values": ["IN", "p:e-u-i"], "anchors": [{"from": 87, "to": 90}]}, {"id": 16, "label": "the", "properties": ["pos", "frame"], "values": ["DT", "q:i-h-h"], "anchors": [{"from": 91, "to": 94}]}, {"id": 17, "label": "greatest", "properties": ["pos", "frame"], "values": ["JJS", "a_for:e-i"], "anchors": [{"from": 95, "to": 103}]}, {"id": 18, "label": "play", "properties": ["pos", "frame"], "values": ["NN", "n:x"], "anchors": [{"from": 104, "to": 108}]}, {"id": 19, "label": "in", "properties": ["pos", "frame"], "values": ["IN", "p:e-u-i"], "anchors": [{"from": 109, "to": 111}]}, {"id": 20, "label": "_generic_card_ne_", "properties": ["pos", "frame"], "values": ["CD", "card:i-i-c"], "anchors": [{"from": 112, "to": 114}]}, {"id": 21, "label": "year", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 115, "to": 120}]}, {"id": 22, "label": "from", "properties": ["pos", "frame"], "values": ["IN", "p:e-u-i"], "anchors": [{"from": 121, "to": 125}]}, {"id": 23, "label": "among", "properties": ["pos", "frame"], "values": ["IN", "p:e-u-i"], "anchors": [{"from": 126, "to": 131}]}, {"id": 24, "label": "four", "properties": ["pos", "frame"], "values": ["CD", "card:i-i-c"], "anchors": [{"from": 132, "to": 136}]}, {"id": 26, "label": "five", "properties": ["pos", "frame"], "values": ["CD", "card:i-i-c"], "anchors": [{"from": 140, "to": 144}]}, {"id": 27, "label": "film", "properties": ["pos", "frame"], "values": ["VBN", "v:e-i-p"], "anchors": [{"from": 145, "to": 151}]}, {"id": 28, "label": "replay", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 152, "to": 159}]}], "edges": [{"source": 19, "target": 18, "label": "ARG1"}, {"source": 24, "target": 26, "label": "_or_c"}, {"source": 11, "target": 12, "label": "ARG1"}, {"source": 12, "target": 1, "label": "ARG1"}, {"source": 13, "target": 12, "label": "ARG1"}, {"source": 24, "target": 28, "label": "ARG1"}, {"source": 20, "target": 21, "label": "ARG1"}, {"source": 22, "target": 18, "label": "ARG1"}, {"source": 23, "target": 28, "label": "ARG2"}, {"source": 19, "target": 21, "label": "ARG2"}, {"source": 2, "target": 8, "label": "ARG2"}, {"source": 13, "target": 14, "label": "ARG2"}, {"source": 15, "target": 18, "label": "ARG2"}, {"source": 17, "target": 18, "label": "ARG1"}, {"source": 10, "target": 12, "label": "ARG1"}, {"source": 7, "target": 8, "label": "compound"}, {"source": 0, "target": 1, "label": "compound"}, {"source": 27, "target": 28, "label": "ARG2"}, {"source": 6, "target": 7, "label": "of"}, {"source": 2, "target": 1, "label": "ARG1"}, {"source": 15, "target": 12, "label": "ARG1"}, {"source": 16, "target": 18, "label": "BV"}, {"source": 23, "target": 22, "label": "ARG1"}, {"source": 26, "target": 28, "label": "ARG1"}]}
3 | {"id": "22100003", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-07-05", "input": "Two weeks ago, viewers of several NBC daytime consumer segments started calling a 900 number for advice on various life-style issues."}
4 |
--------------------------------------------------------------------------------
/data/score/eds/lpps.102990.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/data/score/eds/lpps.102990.png
--------------------------------------------------------------------------------
/data/score/psd/107480.foxik.mrp:
--------------------------------------------------------------------------------
1 | {"id": "107480", "flavor": 0, "framework": "psd", "version": 1.0, "time": "2019-08-01 (16:21)", "input": "I own three volcanoes, which I clean out every week (for I also clean out the one that is extinct; one never knows).", "tops": [1], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 1}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 1, "anchors": [{"from": 2, "to": 5}], "label": "own", "properties": ["pos", "frame"], "values": ["VBP", "ev-w2176f1"]}, {"id": 2, "anchors": [{"from": 6, "to": 11}], "label": "three", "properties": ["pos"], "values": ["CD"]}, {"id": 3, "anchors": [{"from": 12, "to": 21}], "label": "volcanoe", "properties": ["pos"], "values": ["NNS"]}, {"id": 4, "anchors": [{"from": 23, "to": 28}], "label": "which", "properties": ["pos"], "values": ["WDT"]}, {"id": 5, "anchors": [{"from": 29, "to": 30}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 6, "anchors": [{"from": 31, "to": 36}], "label": "clean_out", "properties": ["pos", "frame"], "values": ["VBP", "ev-w544f1"]}, {"id": 7, "anchors": [{"from": 41, "to": 46}], "label": "every", "properties": ["pos"], "values": ["DT"]}, {"id": 8, "anchors": [{"from": 47, "to": 51}], "label": "week", "properties": ["pos"], "values": ["NN"]}, {"id": 9, "anchors": [{"from": 57, "to": 58}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 10, "anchors": [{"from": 59, "to": 63}], "label": "also", "properties": ["pos"], "values": ["RB"]}, {"id": 11, "anchors": [{"from": 64, "to": 69}], "label": "clean_out", "properties": ["pos", "frame"], "values": ["VBP", "ev-w544f1"]}, {"id": 12, "anchors": [{"from": 78, "to": 81}], "label": "one", "properties": ["pos"], "values": ["NN"]}, {"id": 13, "anchors": [{"from": 82, "to": 86}], "label": "that", "properties": ["pos"], "values": ["WDT"]}, {"id": 14, "anchors": [{"from": 90, "to": 97}], "label": "extinct", "properties": ["pos"], "values": ["JJ"]}, {"id": 15, "anchors": [{"from": 97, "to": 98}], "label": "#Semicolon", "properties": ["pos"], "values": [":"]}, {"id": 16, "anchors": [{"from": 99, "to": 102}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 17, "anchors": [{"from": 103, "to": 108}], "label": "never", "properties": ["pos"], "values": ["RB"]}, {"id": 18, "anchors": [{"from": 109, "to": 114}], "label": "know", "properties": ["pos", "frame"], "values": ["VBZ", "ev-w1810f1"]}], "edges": [{"source": 1, "target": 0, "label": "ACT-arg"}, {"source": 1, "target": 3, "label": "PAT-arg"}, {"source": 1, "target": 11, "label": "CAUS"}, {"source": 3, "target": 2, "label": "RSTR"}, {"source": 3, "target": 6, "label": "DESCR"}, {"source": 6, "target": 4, "label": "PAT-arg"}, {"source": 6, "target": 5, "label": "ACT-arg"}, {"source": 6, "target": 8, "label": "THO"}, {"source": 8, "target": 7, "label": "RSTR"}, {"source": 11, "target": 4, "label": "PAT-arg"}, {"source": 11, "target": 5, "label": "ACT-arg"}, {"source": 11, "target": 9, "label": "ACT-arg"}, {"source": 11, "target": 10, "label": "RHEM"}, {"source": 11, "target": 12, "label": "PAT-arg"}, {"source": 15, "target": 11, "label": "CSQ.member"}, {"source": 15, "target": 18, "label": "CONJ.member"}, {"source": 18, "target": 16, "label": "ACT-arg"}, {"source": 18, "target": 17, "label": "TWHEN"}, {"source": 1, "target": 0, "label": "ACT-arg"}, {"source": 3, "target": 2, "label": "RSTR"}, {"source": 1, "target": 3, "label": "PAT-arg"}, {"source": 6, "target": 4, "label": "PAT-arg"}, {"source": 11, "target": 4, "label": "PAT-arg"}, {"source": 6, "target": 5, "label": "ACT-arg"}, {"source": 11, "target": 5, "label": "ACT-arg"}, {"source": 3, "target": 6, "label": "DESCR"}, {"source": 8, "target": 7, "label": "RSTR"}, {"source": 6, "target": 8, "label": "THO"}, {"source": 11, "target": 9, "label": "ACT-arg"}, {"source": 11, "target": 10, "label": "RHEM"}, {"source": 1, "target": 11, "label": "CAUS"}, {"source": 15, "target": 11, "label": "CSQ.member"}, {"source": 11, "target": 12, "label": "PAT-arg"}, {"source": 18, "target": 16, "label": "ACT-arg"}, {"source": 18, "target": 17, "label": "TWHEN"}, {"source": 15, "target": 18, "label": "CONJ.member"}]}
2 |
--------------------------------------------------------------------------------
/data/score/psd/107480.gold.mrp:
--------------------------------------------------------------------------------
1 | {"id": "107480", "flavor": 0, "framework": "psd", "version": 1.0, "time": "2019-06-23", "input": "I own three volcanoes, which I clean out every week (for I also clean out the one that is extinct; one never knows).", "tops": [1], "nodes": [{"id": 0, "label": "#PersPron", "properties": ["pos"], "values": ["PRP"], "anchors": [{"from": 0, "to": 1}]}, {"id": 1, "label": "own", "properties": ["pos"], "values": ["VBP"], "anchors": [{"from": 2, "to": 5}]}, {"id": 2, "label": "three", "properties": ["pos"], "values": ["CD"], "anchors": [{"from": 6, "to": 11}]}, {"id": 3, "label": "volcano", "properties": ["pos"], "values": ["NNS"], "anchors": [{"from": 12, "to": 21}]}, {"id": 5, "label": "which", "properties": ["pos"], "values": ["WDT"], "anchors": [{"from": 23, "to": 28}]}, {"id": 6, "label": "#PersPron", "properties": ["pos"], "values": ["PRP"], "anchors": [{"from": 29, "to": 30}]}, {"id": 7, "label": "clean_out", "properties": ["pos"], "values": ["VBP"], "anchors": [{"from": 31, "to": 36}]}, {"id": 9, "label": "every", "properties": ["pos"], "values": ["DT"], "anchors": [{"from": 41, "to": 46}]}, {"id": 10, "label": "week", "properties": ["pos"], "values": ["NN"], "anchors": [{"from": 47, "to": 51}]}, {"id": 13, "label": "#PersPron", "properties": ["pos"], "values": ["PRP"], "anchors": [{"from": 57, "to": 58}]}, {"id": 14, "label": "also", "properties": ["pos"], "values": ["RB"], "anchors": [{"from": 59, "to": 63}]}, {"id": 15, "label": "clean_out", "properties": ["pos"], "values": ["VBP"], "anchors": [{"from": 64, "to": 69}]}, {"id": 18, "label": "one", "properties": ["pos"], "values": ["NN"], "anchors": [{"from": 78, "to": 81}]}, {"id": 19, "label": "that", "properties": ["pos"], "values": ["WDT"], "anchors": [{"from": 82, "to": 86}]}, {"id": 20, "label": "be", "properties": ["pos"], "values": ["VBZ"], "anchors": [{"from": 87, "to": 89}]}, {"id": 21, "label": "extinct", "properties": ["pos"], "values": ["JJ"], "anchors": [{"from": 90, "to": 97}]}, {"id": 23, "label": "one", "properties": ["pos"], "values": ["CD"], "anchors": [{"from": 99, "to": 102}]}, {"id": 24, "label": "never", "properties": ["pos"], "values": ["RB"], "anchors": [{"from": 103, "to": 108}]}, {"id": 25, "label": "know", "properties": ["pos"], "values": ["VBZ"], "anchors": [{"from": 109, "to": 114}]}], "edges": [{"source": 7, "target": 6, "label": "ACT-arg"}, {"source": 18, "target": 20, "label": "RSTR"}, {"source": 15, "target": 18, "label": "PAT-arg"}, {"source": 15, "target": 13, "label": "ACT-arg"}, {"source": 25, "target": 24, "label": "TWHEN"}, {"source": 15, "target": 25, "label": "CAUS"}, {"source": 3, "target": 7, "label": "RSTR"}, {"source": 15, "target": 14, "label": "RHEM"}, {"source": 1, "target": 15, "label": "CAUS"}, {"source": 20, "target": 19, "label": "ACT-arg"}, {"source": 1, "target": 0, "label": "ACT-arg"}, {"source": 25, "target": 23, "label": "ACT-arg"}, {"source": 3, "target": 2, "label": "RSTR"}, {"source": 20, "target": 21, "label": "PAT-arg"}, {"source": 7, "target": 10, "label": "THO"}, {"source": 7, "target": 5, "label": "PAT-arg"}, {"source": 1, "target": 3, "label": "PAT-arg"}, {"source": 10, "target": 9, "label": "RSTR"}]}
2 |
--------------------------------------------------------------------------------
/data/score/psd/peking.brown.sdp:
--------------------------------------------------------------------------------
1 | Representation type: PSD
2 | # Evaluation
3 |
4 | Gold standard file: ../test/en.ood.psd.sdp
5 | System output file: Peking/en.ood.closed.psd.1.sdp
6 |
7 | ## Scores including virtual dependencies to top nodes
8 |
9 | ### Labeled scores
10 |
11 | Number of edges in gold standard: 21396
12 | Number of edges in system output: 19411
13 | Number of edges in common: 14877
14 |
15 | LP: 0.766421
16 | LR: 0.695317
17 | LF: 0.729140
18 | LM: 0.171444
19 |
20 | ### Unlabeled scores
21 |
22 | Number of unlabeled edges in gold standard: 21396
23 | Number of unlabeled edges in system output: 19411
24 | Number of unlabeled edges in common: 17432
25 |
26 | UP: 0.898047
27 | UR: 0.814732
28 | UF: 0.854363
29 | UM: 0.358031
30 |
31 | ### Complete predications
32 |
33 | Number of complete predications in gold standard: 3919
34 | Number of complete predications in system output: 3900
35 | Number of complete predications in common: 2048
36 |
37 | PP: 0.525128
38 | PR: 0.522582
39 | PF: 0.523852
40 |
41 | ### Semantic frames
42 |
43 | Number of semantic frames in gold standard: 3919
44 | Number of semantic frames in system output: 3900
45 | Number of semantic frames in common: 1322
46 |
47 | FP: 0.338974
48 | FR: 0.337331
49 | FF: 0.338151
50 |
51 | ### Senses
52 |
53 | Number of senses in gold standard: 3919
54 | Number of senses in system output: 3900
55 | Number of senses in common: 2171
56 |
57 | SP: 0.556667
58 | SR: 0.553968
59 | SF: 0.555314
60 |
61 | ## Scores excluding virtual dependencies to top nodes
62 |
63 | ### Labeled scores
64 |
65 | Number of edges in gold standard: 19058
66 | Number of edges in system output: 17181
67 | Number of edges in common: 12790
68 |
69 | LP: 0.744427
70 | LR: 0.671109
71 | LF: 0.705869
72 | LM: 0.173067
73 |
74 | ### Unlabeled scores
75 |
76 | Number of unlabeled edges in gold standard: 19058
77 | Number of unlabeled edges in system output: 17181
78 | Number of unlabeled edges in common: 15345
79 |
80 | UP: 0.893138
81 | UR: 0.805174
82 | UF: 0.846878
83 | UM: 0.362358
84 |
85 | ### Complete predications
86 |
87 | Number of complete predications in gold standard: 3919
88 | Number of complete predications in system output: 3900
89 | Number of complete predications in common: 2048
90 |
91 | PP: 0.525128
92 | PR: 0.522582
93 | PF: 0.523852
94 |
95 | ### Semantic frames
96 |
97 | Number of semantic frames in gold standard: 3919
98 | Number of semantic frames in system output: 3900
99 | Number of semantic frames in common: 1322
100 |
101 | FP: 0.338974
102 | FR: 0.337331
103 | FF: 0.338151
104 |
105 | ### Senses
106 |
107 | Number of senses in gold standard: 3919
108 | Number of senses in system output: 3900
109 | Number of senses in common: 2171
110 |
111 | SP: 0.556667
112 | SR: 0.553968
113 | SF: 0.555314
114 |
--------------------------------------------------------------------------------
/data/score/revisions.txt:
--------------------------------------------------------------------------------
1 | 54c0499f55874555c22827a7e61d79aeb8d29906 oe@ifi.uio.no 2019-07-05 23:49:38 +0200 cosmetics; so much for tonight ...
2 | f9ceb0a2090742a67ca89ed26b293fbdcfc292cb daniel.hershcovich@gmail.com 2019-07-05 21:57:08 +0200 Fix dominated dict lookup to be by node id rather than index
3 | 8df18be265c92c11a7fac788d727a2c879e142c4 milan@strakovi.com 2019-07-05 10:13:02 +0200 Another fix for evaluation of empty graphs.
4 | 15187440752dec7819093fa79849ff4b48d7a3d4 oe@ifi.uio.no 2019-07-05 00:55:58 +0200 fine-tuning default limits for MRP and SMATCH scorers; disable RRHC-based initialization for UCCA graphs; allow better control of RRHC and MCES limits from the command line
5 | 0d20656f47ad86352d6de86ce5b193295a3442bd oe@ifi.uio.no 2019-07-03 12:57:38 +0200 cosmetics
6 | 1e2fa352c1384ea6a1005c193ebf1d449a0de1dd oe@ifi.uio.no 2019-07-03 01:41:40 +0200 disable more assertions: is_injective() actually fails on the UCCA test (when initializing from SMATCH)
7 | 8aaa494d5794abc849965dda6fd70208a530c3db oe@ifi.uio.no 2019-07-02 21:33:43 +0200 bug fix: over-counting can apply on the same set of correspondences too
8 | 3cccda87794669573018f08a3717461b6deedfab oe@ifi.uio.no 2019-07-02 17:46:36 +0200 allow initialization from SMATCH hill-climbing; guard against over-counting (see my email to tim of june 30, 2019)
9 | 6c863c9e6233b8d3e81f39e0015333c4c75d5264 daniel.hershcovich@gmail.com 2019-07-01 14:22:24 +0200 Normalization: drop (attribute, value) pairs whose value is the default value
10 | b2145c4fc9ec79624fc84955f373b3387ca02d75 oe@ifi.uio.no 2019-06-30 01:33:24 +0200 give more weight to anchor overlap in UCCA initialization and rewards
11 | c31601c31b0e17639aa9557559d5655bfd55c371 oe@ifi.uio.no 2019-06-30 01:15:07 +0200 bug fix in sorted_splits(); streamlined smatch() interface; cosmetics
12 | 210da9b2e9eff2be7adf988d2865ab77c5ec3447 oe@ifi.uio.no 2019-06-27 22:38:06 +0200 close #20 (prior to scoring, normalize graphs according to the description on the web page)
13 | 1a61ea4484e77a458030a62a62e751e0668e7f11 oe@ifi.uio.no 2019-06-27 13:15:25 +0200 generalize anchor treatment in SMATCH wrapper
14 | b4db1996a894ad70dcb8bc83ba46ddfa354db44e daniel.hershcovich@gmail.com 2019-06-25 11:04:54 +0200 #26 Require leaf status of matched nodes to be the same in UCCA MCES
15 | 8696ffe1fa154acd03a4adbb1813354f198dfeb9 oe@ifi.uio.no 2019-06-20 10:34:00 +0200 fix copy-paste error (owing to a missing generalization)
16 | 274890bdccf3e3e502b755386b7af7fecf39284d oe@ifi.uio.no 2019-06-18 23:59:10 +0200 bug fix: edge attributes
17 | 09c48bd4a8ab8b72d05cea9571000a2e3524bb1b oe@ifi.uio.no 2019-06-18 00:59:52 +0200 activate improved estimate of edge potential
18 | 1c68aa39675291dc998a508e818e63723b0804c0 marco.kuhlmann@liu.se 2019-06-17 23:30:13 +0200 Treat edge attributes properly (closes: #13)
19 | 08e0d8a839b98a395c868cc1bd2e6ca859ef3e05 marco.kuhlmann@liu.se 2019-06-17 22:30:42 +0200 Respect node ordering in bi-lexical graphs (closes: #15)
20 | 7718d1ca50b250e154365e5846981564d7b635d5 oe@ifi.uio.no 2019-06-16 17:10:33 +0200 expose per-item result; rationalize --limit and --trace
21 |
--------------------------------------------------------------------------------
/data/score/test.slurm:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #SBATCH --job-name=score
4 | #SBATCH --mail-type=FAIL
5 | #SBATCH --account=nn9447k
6 | #SBATCH --time=12:00:00
7 | #SBATCH --nodes=1
8 | #SBATCH --mem-per-cpu=4G
9 | #SBATCH --ntasks-per-node=8
10 |
11 | commit="$(git log --pretty=format:\%H -n 1)";
12 | echo "directory: $(pwd)";
13 | echo "git status: $(git status | head -1)";
14 | echo "git commit: ${commit}";
15 | echo;
16 |
17 | source /cluster/bin/jobsetup;
18 |
19 | module purge;
20 | module use -a /projects/nlpl/software/modulefiles;
21 | module load nlpl-python-candy/201902/3.7 nlpl-numpy/1.16.3/3.7;
22 |
23 | /bin/cp ${HOME}/lib/mrp/2019/mtool/data/score/Makefile ./Makefile;
24 | make -j ${SLURM_CPUS_ON_NODE:-4} $(egrep '^[a-z/.]*.json:' Makefile | grep -v all: | sed 's/://');
25 | if [ -d ./../../../etc/ ]; then
26 | target=../../../../etc/${commit};
27 | [ -d ${target} ] || mkdir ${target};
28 | cp -va *.json *.log ${target};
29 | fi
30 |
--------------------------------------------------------------------------------
/data/score/ucca/anchors.gold.mrp:
--------------------------------------------------------------------------------
1 | {"id": "133601-0004", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (11:29)", "input": "Even though you are expensive.", "tops": [5], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 4}, {"from": 5, "to": 11}]}, {"id": 1, "anchors": [{"from": 12, "to": 15}]}, {"id": 2, "anchors": [{"from": 16, "to": 19}]}, {"id": 3, "anchors": [{"from": 20, "to": 29}]}, {"id": 4, "anchors": [{"from": 29, "to": 30}]}, {"id": 5}, {"id": 6}], "edges": [{"source": 5, "target": 0, "label": "L"}, {"source": 6, "target": 3, "label": "S"}, {"source": 6, "target": 4, "label": "U"}, {"source": 6, "target": 1, "label": "A"}, {"source": 5, "target": 6, "label": "H"}, {"source": 6, "target": 2, "label": "F"}]}
2 |
--------------------------------------------------------------------------------
/data/score/ucca/anchors.tupa.mrp:
--------------------------------------------------------------------------------
1 | {"id": "133601-0004", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (11:31)", "input": "Even though you are expensive.", "tops": [5], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 11}]}, {"id": 1, "anchors": [{"from": 12, "to": 15}]}, {"id": 2, "anchors": [{"from": 16, "to": 19}]}, {"id": 3, "anchors": [{"from": 20, "to": 29}]}, {"id": 4, "anchors": [{"from": 29, "to": 30}]}, {"id": 5}, {"id": 6}], "edges": [{"source": 6, "target": 4, "label": "U"}, {"source": 6, "target": 3, "label": "S"}, {"source": 6, "target": 2, "label": "F"}, {"source": 6, "target": 1, "label": "A"}, {"source": 5, "target": 0, "label": "L"}, {"source": 5, "target": 6, "label": "H"}]}
2 |
--------------------------------------------------------------------------------
/data/score/ucca/id.mrp:
--------------------------------------------------------------------------------
1 | {"id": 72, "flavor": 1, "framework": "ucca", "version": 1.0, "time": "2019-07-06", "tops": [22], "nodes": [{"id": 0}, {"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}, {"id": 7}, {"id": 8}, {"id": 9}, {"id": 10}, {"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}, {"id": 16}, {"id": 17}, {"id": 18}, {"id": 19}, {"id": 20}, {"id": 21}, {"id": 22}, {"id": 23}, {"id": 24}, {"id": 25}, {"id": 26}, {"id": 27}, {"id": 28}, {"id": 29}], "edges": [{"source": 21, "target": 2, "label": "R"}, {"source": 24, "target": 7, "label": "C"}, {"source": 23, "target": 21, "label": "T"}, {"source": 26, "target": 11, "label": "C"}, {"source": 24, "target": 25, "label": "E"}, {"source": 28, "target": 15, "label": "R"}, {"source": 28, "target": 17, "label": "C"}, {"source": 23, "target": 24, "label": "A"}, {"source": 27, "target": 14, "label": "C"}, {"source": 25, "target": 28, "label": "E"}, {"source": 22, "target": 23, "label": "H"}, {"source": 21, "target": 0, "label": "E"}, {"source": 23, "target": 4, "label": "A"}, {"source": 23, "target": 5, "label": "P"}, {"source": 25, "target": 9, "label": "E"}, {"source": 27, "target": 13, "label": "E"}, {"source": 28, "target": 29, "label": "E"}, {"source": 25, "target": 27, "label": "E"}, {"source": 24, "target": 6, "label": "E"}, {"source": 29, "target": 20, "label": "U"}, {"source": 21, "target": 1, "label": "C"}, {"source": 28, "target": 16, "label": "E"}, {"source": 23, "target": 3, "label": "U"}, {"source": 29, "target": 19, "label": "C"}, {"source": 26, "target": 12, "label": "R"}, {"source": 25, "target": 8, "label": "R"}, {"source": 27, "target": 26, "label": "E"}, {"source": 25, "target": 10, "label": "C"}, {"source": 29, "target": 18, "label": "R"}]}
2 | {"id": 127, "flavor": 1, "framework": "ucca", "version": 1.0, "time": "2019-07-06", "tops": [78], "nodes": [{"id": 0}, {"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}, {"id": 7}, {"id": 8}, {"id": 9}, {"id": 10}, {"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}, {"id": 16}, {"id": 17}, {"id": 18}, {"id": 19}, {"id": 20}, {"id": 21}, {"id": 22}, {"id": 23}, {"id": 24}, {"id": 25}, {"id": 26}, {"id": 27}, {"id": 28}, {"id": 29}, {"id": 30}, {"id": 31}, {"id": 32}, {"id": 33}, {"id": 34}, {"id": 35}, {"id": 36}, {"id": 37}, {"id": 38}, {"id": 39}, {"id": 40}, {"id": 41}, {"id": 42}, {"id": 43}, {"id": 44}, {"id": 45}, {"id": 46}, {"id": 47}, {"id": 48}, {"id": 49}, {"id": 50}, {"id": 51}, {"id": 52}, {"id": 53}, {"id": 54}, {"id": 55}, {"id": 56}, {"id": 57}, {"id": 58}, {"id": 59}, {"id": 60}, {"id": 61}, {"id": 62}, {"id": 63}, {"id": 64}, {"id": 65}, {"id": 66}, {"id": 67}, {"id": 68}, {"id": 69}, {"id": 70}, {"id": 71}, {"id": 72}, {"id": 73}, {"id": 74}, {"id": 75}, {"id": 76}, {"id": 77}, {"id": 78}, {"id": 79}, {"id": 80}, {"id": 81}, {"id": 82}, {"id": 83}, {"id": 84}, {"id": 85}, {"id": 86}, {"id": 87}, {"id": 88}, {"id": 89}, {"id": 90}, {"id": 91}, {"id": 92}, {"id": 93}, {"id": 94}, {"id": 95}, {"id": 96}, {"id": 97}, {"id": 98}, {"id": 99}, {"id": 100}, {"id": 101}, {"id": 102}, {"id": 103}, {"id": 104}, {"id": 105}, {"id": 106}, {"id": 107}, {"id": 108}, {"id": 109}, {"id": 110}, {"id": 111}, {"id": 112}, {"id": 113}, {"id": 114}, {"id": 115}], "edges": [{"source": 110, "target": 63, "label": "R"}, {"source": 97, "target": 37, "label": "E"}, {"source": 111, "target": 68, "label": "C"}, {"source": 79, "target": 80, "label": "A"}, {"source": 80, "target": 98, "label": "H"}, {"source": 112, "target": 69, "label": "S"}, {"source": 80, "target": 0, "label": "U"}, {"source": 112, "target": 113, "label": "A"}, {"source": 109, "target": 110, "label": "E"}, {"source": 93, "target": 94, "label": "E"}, {"source": 95, "target": 32, "label": "C"}, {"source": 82, "target": 81, "label": "C"}, {"source": 111, "target": 112, "label": "E"}, {"source": 84, "target": 5, "label": "C"}, {"source": 106, "target": 55, "label": "C"}, {"source": 92, "target": 25, "label": "E"}, {"source": 103, "target": 66, "label": "N"}, {"source": 105, "target": 52, "label": "E"}, {"source": 103, "target": 56, "label": "U"}, {"source": 86, "target": 12, "label": "R"}, {"source": 105, "target": 53, "label": "C"}, {"source": 80, "target": 101, "label": "H"}, {"source": 102, "target": 48, "label": "E"}, {"source": 93, "target": 28, "label": "C"}, {"source": 82, "target": 4, "label": "N"}, {"source": 95, "target": 31, "label": "E"}, {"source": 115, "target": 76, "label": "C"}, {"source": 83, "target": 9, "label": "T"}, {"source": 108, "target": 109, "label": "A"}, {"source": 95, "target": 30, "label": "R"}, {"source": 94, "target": 95, "label": "A"}, {"source": 98, "target": 40, "label": "A"}, {"source": 82, "target": 84, "label": "C"}, {"source": 102, "target": 49, "label": "C"}, {"source": 113, "target": 114, "label": "A"}, {"source": 107, "target": 57, "label": "E"}, {"source": 91, "target": 23, "label": "P"}, {"source": 107, "target": 108, "label": "E"}, {"source": 103, "target": 65, "label": "U"}, {"source": 104, "target": 105, "label": "A"}, {"source": 113, "target": 70, "label": "R"}, {"source": 92, "target": 24, "label": "R"}, {"source": 80, "target": 96, "label": "H"}, {"source": 102, "target": 104, "label": "E"}, {"source": 115, "target": 74, "label": "R"}, {"source": 81, "target": 1, "label": "E"}, {"source": 104, "target": 50, "label": "P"}, {"source": 111, "target": 67, "label": "E"}, {"source": 115, "target": 77, "label": "U"}, {"source": 96, "target": 34, "label": "P"}, {"source": 97, "target": 36, "label": "C"}, {"source": 101, "target": 47, "label": "S"}, {"source": 83, "target": 87, "label": "A"}, {"source": 106, "target": 54, "label": "R"}, {"source": 81, "target": 3, "label": "E"}, {"source": 88, "target": 19, "label": "N"}, {"source": 99, "target": 43, "label": "C"}, {"source": 89, "target": 88, "label": "P"}, {"source": 86, "target": 10, "label": "E"}, {"source": 94, "target": 29, "label": "S"}, {"source": 80, "target": 39, "label": "L"}, {"source": 88, "target": 18, "label": "C"}, {"source": 96, "target": 97, "label": "A"}, {"source": 112, "target": 68, "label": "A", "attributes": ["remote"], "values": [true]}, {"source": 114, "target": 115, "label": "E"}, {"source": 80, "target": 22, "label": "U"}, {"source": 80, "target": 91, "label": "H"}, {"source": 103, "target": 107, "label": "C"}, {"source": 110, "target": 64, "label": "C"}, {"source": 87, "target": 86, "label": "E"}, {"source": 109, "target": 62, "label": "C"}, {"source": 97, "target": 35, "label": "E"}, {"source": 105, "target": 51, "label": "R"}, {"source": 78, "target": 79, "label": "H"}, {"source": 91, "target": 92, "label": "A"}, {"source": 80, "target": 38, "label": "U"}, {"source": 88, "target": 17, "label": "U"}, {"source": 114, "target": 73, "label": "C"}, {"source": 100, "target": 43, "label": "A", "attributes": ["remote"], "values": [true]}, {"source": 85, "target": 7, "label": "C"}, {"source": 80, "target": 33, "label": "U"}, {"source": 94, "target": 28, "label": "A", "attributes": ["remote"], "values": [true]}, {"source": 101, "target": 46, "label": "A"}, {"source": 109, "target": 60, "label": "R"}, {"source": 114, "target": 72, "label": "E"}, {"source": 113, "target": 71, "label": "P"}, {"source": 99, "target": 100, "label": "E"}, {"source": 93, "target": 27, "label": "E"}, {"source": 83, "target": 82, "label": "A"}, {"source": 86, "target": 11, "label": "C"}, {"source": 80, "target": 89, "label": "H"}, {"source": 100, "target": 44, "label": "P"}, {"source": 99, "target": 42, "label": "E"}, {"source": 98, "target": 41, "label": "P"}, {"source": 83, "target": 8, "label": "P"}, {"source": 88, "target": 16, "label": "C"}, {"source": 81, "target": 2, "label": "C"}, {"source": 80, "target": 83, "label": "H"}, {"source": 91, "target": 93, "label": "A"}, {"source": 90, "target": 21, "label": "C"}, {"source": 90, "target": 20, "label": "E"}, {"source": 108, "target": 58, "label": "A", "attributes": ["remote"], "values": [true]}, {"source": 101, "target": 103, "label": "A"}, {"source": 80, "target": 45, "label": "U"}, {"source": 92, "target": 26, "label": "C"}, {"source": 98, "target": 99, "label": "A"}, {"source": 104, "target": 49, "label": "A", "attributes": ["remote"], "values": [true]}, {"source": 103, "target": 102, "label": "C"}, {"source": 107, "target": 58, "label": "C"}, {"source": 89, "target": 90, "label": "A"}, {"source": 109, "target": 61, "label": "E"}, {"source": 85, "target": 6, "label": "R"}, {"source": 108, "target": 59, "label": "P"}, {"source": 105, "target": 106, "label": "E"}, {"source": 80, "target": 15, "label": "U"}, {"source": 115, "target": 75, "label": "E"}, {"source": 87, "target": 13, "label": "E"}, {"source": 84, "target": 85, "label": "E"}, {"source": 87, "target": 14, "label": "C"}, {"source": 103, "target": 111, "label": "C"}]}
3 |
--------------------------------------------------------------------------------
/data/score/ucca/koller.mrp:
--------------------------------------------------------------------------------
1 | {"id": "291046-0001", "framework": "ucca", "flavor": 1, "time": "2019-07-17 (10:43)", "version": "0.9", "input": "Hams on Friendly … RIP", "nodes": [{"anchors": [{"from": 0, "to": 4}], "id": 0, "label": "hams", "properties": [], "values": []}, {"anchors": [{"from": 5, "to": 7}], "id": 1, "label": "on", "properties": [], "values": []}, {"anchors": [{"from": 8, "to": 16}], "id": 2, "label": "friendly", "properties": [], "values": []}, {"anchors": [{"from": 17, "to": 20}], "id": 3, "label": "...", "properties": [], "values": []}, {"anchors": [{"from": 21, "to": 24}], "id": 4, "label": "rip", "properties": [], "values": []}, {"id": 4}, {"id": 5}, {"id": 6}], "edges": [{"source": 5, "target": 1, "label": "A"}, {"source": 5, "target": 2, "label": "S"}, {"source": 6, "target": 5, "label": "A"}, {"source": 5, "target": 3, "label": "A"}, {"source": 6, "target": 0, "label": "S"}, {"source": 6, "target": 4, "label": "U"}]}
2 |
--------------------------------------------------------------------------------
/data/score/ucca/small.gold.mrp:
--------------------------------------------------------------------------------
1 | {"id": "001325-0001", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (17:11)", "input": "Highly recommended", "tops": [2], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 6}]}, {"id": 1, "anchors": [{"from": 7, "to": 18}]}, {"id": 2}, {"id": 3}], "edges": [{"source": 3, "target": 1, "label": "S"}, {"source": 2, "target": 3, "label": "H"}, {"source": 3, "target": 0, "label": "D"}]}
2 |
--------------------------------------------------------------------------------
/data/score/ucca/small.gold.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/data/score/ucca/small.gold.pdf
--------------------------------------------------------------------------------
/data/score/ucca/small.tupa.mrp:
--------------------------------------------------------------------------------
1 | {"id": "001325-0001", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (17:12)", "input": "Highly recommended", "tops": [2], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 6}]}, {"id": 1, "anchors": [{"from": 7, "to": 18}]}, {"id": 2}, {"id": 3}], "edges": [{"source": 2, "target": 3, "label": "H"}, {"source": 3, "target": 0, "label": "D"}, {"source": 3, "target": 1, "label": "P"}]}
2 |
--------------------------------------------------------------------------------
/data/score/ucca/small.tupa.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/data/score/ucca/small.tupa.pdf
--------------------------------------------------------------------------------
/data/score/ucca/test.gold.mrp:
--------------------------------------------------------------------------------
1 | {"id": "001325-0002", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (15:48)", "input": "My 8 year old daughter loves this place.", "tops": [11], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 2}]}, {"id": 1, "anchors": [{"from": 3, "to": 4}]}, {"id": 2, "anchors": [{"from": 5, "to": 9}]}, {"id": 3, "anchors": [{"from": 10, "to": 13}]}, {"id": 4, "anchors": [{"from": 14, "to": 22}]}, {"id": 5, "anchors": [{"from": 23, "to": 28}]}, {"id": 6, "anchors": [{"from": 29, "to": 33}]}, {"id": 7, "anchors": [{"from": 34, "to": 39}]}, {"id": 8, "anchors": [{"from": 39, "to": 40}]}, {"id": 9}, {"id": 10}, {"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}], "edges": [{"source": 12, "target": 9, "label": "A"}, {"source": 11, "target": 12, "label": "H"}, {"source": 15, "target": 7, "label": "C"}, {"source": 13, "target": 2, "label": "C"}, {"source": 12, "target": 15, "label": "A"}, {"source": 13, "target": 1, "label": "Q"}, {"source": 15, "target": 8, "label": "U"}, {"source": 10, "target": 0, "label": "A"}, {"source": 10, "target": 4, "label": "A"}, {"source": 9, "target": 10, "label": "C"}, {"source": 10, "target": 4, "label": "S"}, {"source": 14, "target": 4, "label": "A", "properties": ["remote"], "values": [true]}, {"source": 12, "target": 5, "label": "S"}, {"source": 14, "target": 13, "label": "T"}, {"source": 15, "target": 6, "label": "E"}, {"source": 14, "target": 3, "label": "S"}, {"source": 9, "target": 14, "label": "E"}]}
2 | {"id": "20003013", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-04-09 (14:49)", "input": "Among 33 men who worked closely with the substance, 28 have died -- more than three times the expected number.", "tops": [23], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 5}]}, {"id": 1, "anchors": [{"from": 6, "to": 8}]}, {"id": 2, "anchors": [{"from": 9, "to": 12}]}, {"id": 3, "anchors": [{"from": 13, "to": 16}]}, {"id": 4, "anchors": [{"from": 17, "to": 23}]}, {"id": 5, "anchors": [{"from": 24, "to": 31}]}, {"id": 6, "anchors": [{"from": 32, "to": 36}]}, {"id": 7, "anchors": [{"from": 37, "to": 40}]}, {"id": 8, "anchors": [{"from": 41, "to": 50}]}, {"id": 9, "anchors": [{"from": 50, "to": 51}]}, {"id": 10, "anchors": [{"from": 52, "to": 54}]}, {"id": 11, "anchors": [{"from": 55, "to": 59}]}, {"id": 12, "anchors": [{"from": 60, "to": 64}]}, {"id": 13, "anchors": [{"from": 65, "to": 67}]}, {"id": 14, "anchors": [{"from": 68, "to": 72}, {"from": 73, "to": 77}]}, {"id": 15, "anchors": [{"from": 78, "to": 83}]}, {"id": 16, "anchors": [{"from": 84, "to": 89}]}, {"id": 17, "anchors": [{"from": 90, "to": 93}]}, {"id": 18, "anchors": [{"from": 94, "to": 102}]}, {"id": 19, "anchors": [{"from": 103, "to": 109}]}, {"id": 20, "anchors": [{"from": 109, "to": 110}]}, {"id": 21}, {"id": 22}, {"id": 23}, {"id": 24}, {"id": 25}, {"id": 26}, {"id": 27}, {"id": 28}, {"id": 29}, {"id": 30}], "edges": [{"source": 27, "target": 16, "label": "C"}, {"source": 21, "target": 0, "label": "R"}, {"source": 30, "target": 19, "label": "A", "properties": ["remote"], "values": [true]}, {"source": 28, "target": 29, "label": "A"}, {"source": 30, "target": 18, "label": "S"}, {"source": 24, "target": 3, "label": "R"}, {"source": 29, "target": 20, "label": "U"}, {"source": 22, "target": 26, "label": "A"}, {"source": 22, "target": 12, "label": "P"}, {"source": 23, "target": 13, "label": "U"}, {"source": 21, "target": 1, "label": "Q"}, {"source": 27, "target": 15, "label": "Q"}, {"source": 29, "target": 17, "label": "F"}, {"source": 25, "target": 7, "label": "E"}, {"source": 28, "target": 27, "label": "S"}, {"source": 22, "target": 21, "label": "A"}, {"source": 23, "target": 28, "label": "H"}, {"source": 26, "target": 10, "label": "Q"}, {"source": 22, "target": 9, "label": "U"}, {"source": 24, "target": 25, "label": "A"}, {"source": 25, "target": 6, "label": "R"}, {"source": 21, "target": 24, "label": "E"}, {"source": 26, "target": 2, "label": "C", "properties": ["remote"], "values": [true]}, {"source": 24, "target": 5, "label": "D"}, {"source": 24, "target": 2, "label": "A", "properties": ["remote"], "values": [true]}, {"source": 22, "target": 11, "label": "F"}, {"source": 25, "target": 8, "label": "C"}, {"source": 21, "target": 2, "label": "C"}, {"source": 27, "target": 14, "label": "E"}, {"source": 29, "target": 30, "label": "E"}, {"source": 24, "target": 4, "label": "P"}, {"source": 29, "target": 19, "label": "C"}, {"source": 28, "target": 26, "label": "A", "properties": ["remote"], "values": [true]}, {"source": 23, "target": 22, "label": "H"}]}
3 |
--------------------------------------------------------------------------------
/data/score/ucca/test.gold.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/data/score/ucca/test.gold.pdf
--------------------------------------------------------------------------------
/data/score/ucca/test.tupa.mrp:
--------------------------------------------------------------------------------
1 | {"id": "001325-0002", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (15:48)", "input": "My 8 year old daughter loves this place.", "tops": [10], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 2}]}, {"id": 1, "anchors": [{"from": 3, "to": 4}]}, {"id": 2, "anchors": [{"from": 5, "to": 9}]}, {"id": 3, "anchors": [{"from": 10, "to": 13}]}, {"id": 4, "anchors": [{"from": 14, "to": 22}]}, {"id": 5, "anchors": [{"from": 23, "to": 28}]}, {"id": 6, "anchors": [{"from": 29, "to": 33}]}, {"id": 7, "anchors": [{"from": 34, "to": 39}]}, {"id": 8, "anchors": [{"from": 39, "to": 40}]}, {"id": 9}, {"id": 10}, {"id": 11}, {"id": 12}, {"id": 13}], "edges": [{"source": 11, "target": 13, "label": "A"}, {"source": 13, "target": 7, "label": "C"}, {"source": 9, "target": 3, "label": "E"}, {"source": 13, "target": 8, "label": "U"}, {"source": 11, "target": 5, "label": "P"}, {"source": 13, "target": 6, "label": "E"}, {"source": 9, "target": 0, "label": "E"}, {"source": 10, "target": 11, "label": "H"}, {"source": 12, "target": 2, "label": "C"}, {"source": 11, "target": 9, "label": "D"}, {"source": 9, "target": 4, "label": "C"}, {"source": 12, "target": 1, "label": "E"}, {"source": 9, "target": 12, "label": "E"}]}
2 | {"id": "20003013", "framework": "ucca", "version": 1.0, "time": "2019-07-05", "input": "Among 33 men who worked closely with the substance, 28 have died -- more than three times the expected number.", "nodes": [{"id": 23}, {"id": 0}, {"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}, {"id": 7}, {"id": 8}, {"id": 9}, {"id": 10}, {"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}, {"id": 16}, {"id": 17}, {"id": 18}, {"id": 19}, {"id": 20}, {"id": 21}, {"id": 23}, {"id": 24}], "edges": [{"source": 23, "target": 24, "label": "U"}, {"source": 23, "target": 23, "label": "L"}]}
3 |
--------------------------------------------------------------------------------
/data/score/ucca/test.tupa.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/data/score/ucca/test.tupa.pdf
--------------------------------------------------------------------------------
/data/validate/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: all
2 |
3 | all:
4 | time python3 -u ../../main.py --trace --trace --validate all \
5 | --read mrp eds/wsj.mrp $@ 2>&1 | tee eds.wsj.log
6 |
--------------------------------------------------------------------------------
/inspector.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | from graph import Graph;
4 |
5 | def summarize(graphs, golds):
6 | ids = None;
7 | if golds is not None:
8 | ids = dict();
9 | for gold in golds:
10 | language = gold.language();
11 | if language not in ids: ids[language] = dict();
12 | targets = gold.targets();
13 | if targets is None: targets = [gold.framework];
14 | for target in targets:
15 | if target not in ids[language]: ids[language][target] = set();
16 | ids[language][target].add(gold.id);
17 |
18 | counts = dict();
19 | seen = dict();
20 | targets = dict();
21 | targets["eng"] = ["eds", "ptg", "ucca", "amr", "drg"];
22 | targets["ces"] = ["ptg"];
23 | targets["deu"] = ["ucca", "drg"];
24 | targets["zho"] = ["amr"];
25 | for language in ["eng", "ces", "deu", "zho"]:
26 | counts[language] = dict();
27 | seen[language] = dict();
28 | for key in targets[language]:
29 | counts[language][key] = 0;
30 | seen[language][key] = set();
31 |
32 | for graph in graphs:
33 | language = graph.language();
34 | if language is None: language = "eng";
35 | framework = graph.framework;
36 | if golds is None or \
37 | language in ids and framework in ids[language] and \
38 | graph.id in ids[language][framework]:
39 | counts[language][framework] += 1;
40 | if graph.id in seen[language][framework]:
41 | print("inspector.summarize(): ignoring duplicate {} {} graph #{}."
42 | "".format(language, framework, graph.id),
43 | file = sys.stderr);
44 | else:
45 | seen[language][framework].add(graph.id);
46 |
47 | complete = True;
48 | for language in ["eng", "ces", "deu", "zho"]:
49 | for key in targets[language]:
50 | if len(ids[language][key]) != counts[language][key]: complete = False;
51 | counts["complete"] = complete;
52 | return counts;
53 |
--------------------------------------------------------------------------------
/score/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/score/__init__.py
--------------------------------------------------------------------------------
/score/core.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | #
4 | # _fix_me_
5 | # maybe use Unicode character classes instead, even if it likely would mean
6 | # many calls to match one-character regular expressions?
7 | #
8 | PUNCTUATION = frozenset(".?!;,:“\"”‘'’()[]{} \t\n\f")
9 | SPACE = frozenset(" \t\n\f")
10 |
11 | def intersect(golds, systems, quiet = False):
12 | golds = {(graph.language(), graph.framework, graph.id): graph
13 | for graph in golds};
14 | seen = set();
15 | for graph in systems:
16 | language = graph.language();
17 | key = (language, graph.framework, graph.id);
18 | if language is None and key not in golds:
19 | language = "eng";
20 | key = (language, graph.framework, graph.id);
21 | if key in seen:
22 | if not quiet:
23 | print("score.intersect(): ignoring duplicate {} {} graph #{}"
24 | .format(language, graph.framework, graph.id),
25 | file=sys.stderr);
26 | else:
27 | seen.add(key);
28 | gold = golds.get(key);
29 | if gold is None:
30 | if not quiet:
31 | print("score.intersect(): ignoring {} {} graph #{} with no gold graph"
32 | .format(graph.language(), graph.framework, graph.id),
33 | file=sys.stderr);
34 | else:
35 | yield gold, graph;
36 |
37 | for key in golds.keys() - seen:
38 | gold = golds[key];
39 | if not quiet:
40 | print("score.intersect(): missing system {} {} graph #{}"
41 | .format(gold.language(), gold.framework, gold.id),
42 | file=sys.stderr);
43 | #
44 | # manufacture an empty graph as the system graph
45 | #
46 | from graph import Graph;
47 | yield gold, Graph(gold.id, flavor = gold.flavor,
48 | framework = gold.framework);
49 |
50 | def anchor(node):
51 | result = list();
52 | if node.anchors is not None:
53 | for span in node.anchors:
54 | if "from" in span and "to" in span:
55 | result.append((span["from"], span["to"]));
56 | return result;
57 |
58 | def explode(string, anchors, trim = PUNCTUATION):
59 | result = set();
60 | for anchor in anchors:
61 | start = end = None;
62 | if isinstance(anchor, tuple):
63 | start, end = anchor;
64 | elif "from" in anchor and "to" in anchor:
65 | start = anchor["from"]; end = anchor["to"];
66 | if start is not None and end is not None:
67 | while start < end and string[start] in trim:
68 | start += 1;
69 | while end > start and string[end - 1] in trim:
70 | end -= 1;
71 | for i in range(start, end):
72 | if string[i] not in SPACE:
73 | result.add(i);
74 | return frozenset(result);
75 |
76 | def fscore(gold, system, correct):
77 | p = correct / system if system else 0.0;
78 | r = correct / gold if gold else 0.0;
79 | f = 2 * p * r / (p + r) if p + r != 0 else 0.0;
80 | return p, r, f;
81 |
82 |
83 |
--------------------------------------------------------------------------------
/score/edm.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | from graph import Graph;
4 | import score.core;
5 |
6 | def tuples(graph, explode = False):
7 | identities = dict();
8 | names = set();
9 | tops = set();
10 | arguments = set();
11 | properties = set();
12 | for node in graph.nodes:
13 | if graph.input and explode:
14 | identity = score.core.explode(graph.input,
15 | score.core.anchor(node));
16 | else:
17 | identity = tuple(score.core.anchor(node));
18 | identities[node.id] = identity;
19 | if node.label is not None: names.add((identity, node.label));
20 | if node.is_top: tops.add(identity);
21 | if node.properties and node.values:
22 | for property, value in zip(node.properties, node.values):
23 | properties.add((identity, property, value))
24 | for edge in graph.edges:
25 | arguments.add((identities[edge.src], identities[edge.tgt], edge.lab));
26 | return names, arguments, properties, tops;
27 |
28 | def evaluate(golds, systems, format = "json", trace = 0):
29 | tgn = tsn = tcn = 0;
30 | tga = tsa = tca = 0;
31 | tgt = tst = tct = 0;
32 | tgp = tsp = tcp = 0;
33 | scores = dict() if trace else None;
34 | result = {"n": 0};
35 | for gold, system in score.core.intersect(golds, systems):
36 | explode = gold.input and system.input;
37 | gnames, garguments, gproperties, gtops = tuples(gold, explode = explode);
38 | snames, sarguments, sproperties, stops = tuples(system, explode = explode);
39 | if trace > 1:
40 | print("[{}] gold:\n{}\n{}\n{}\n{}\n\n"
41 | "".format(gold.id, gtops,
42 | gnames, garguments, gproperties));
43 | print("[{}] system:\n{}\n{}\n{}\n{}\n\n"
44 | "".format(gold.id, stops,
45 | snames, sarguments, sproperties));
46 | gn = len(gnames); sn = len(snames);
47 | cn = len(gnames & snames);
48 | ga = len(garguments); sa = len(sarguments);
49 | ca = len(garguments & sarguments);
50 | gt = len(gtops); st = len(stops);
51 | ct = len(gtops & stops);
52 | gp = len(gproperties); sp = len(sproperties);
53 | cp = len(gproperties & sproperties);
54 | tgn += gn; tsn += sn; tcn += cn;
55 | tga += ga; tsa += sa; tca += ca;
56 | tgt += gt; tst += st; tct += ct;
57 | tgp += gp; tsp += sp; tcp += cp;
58 | result["n"] += 1;
59 | if trace:
60 | if gold.id in scores:
61 | print("edm.evaluate(): duplicate graph identifier: {}"
62 | "".format(gold.id), file = sys.stderr);
63 | scores[gold.id] = {"names": {"g": gn, "s": sn, "c": cn},
64 | "arguments": {"g": ga, "s": sa, "c": ca},
65 | "tops": {"g": gt, "s": st, "c": ct},
66 | "properties": {"g": gp, "s": sp, "c": cp}};
67 | if scores is not None: result["scores"] = scores;
68 | p, r, f = score.core.fscore(tgn, tsn, tcn);
69 | result["names"] = {"g": tgn, "s": tsn, "c": tcn, "p": p, "r": r, "f": f};
70 | p, r, f = score.core.fscore(tga, tsa, tca);
71 | result["arguments"] = {"g": tga, "s": tsa, "c": tca, "p": p, "r": r, "f": f};
72 | p, r, f = score.core.fscore(tgt, tst, tct);
73 | result["tops"] = {"g": tgt, "s": tst, "c": tct, "p": p, "r": r, "f": f};
74 | p, r, f = score.core.fscore(tgp, tsp, tcp);
75 | result["properties"] = {"g": tgp, "s": tsp, "c": tcp, "p": p, "r": r, "f": f};
76 | tga = tgn + tga + tgt + tgp;
77 | tsa = tsn + tsa + tst + tsp;
78 | tca = tcn + tca + tct + tcp;
79 | p, r, f = score.core.fscore(tga, tsa, tca);
80 | result["all"] = {"g": tga, "s": tsa, "c": tca, "p": p, "r": r, "f": f};
81 | return result;
82 |
--------------------------------------------------------------------------------
/score/lib/counter.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/score/lib/counter.pdf
--------------------------------------------------------------------------------
/score/lib/damonte.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/score/lib/damonte.pdf
--------------------------------------------------------------------------------
/score/lib/edm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/score/lib/edm.pdf
--------------------------------------------------------------------------------
/score/lib/sdp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/score/lib/sdp.pdf
--------------------------------------------------------------------------------
/score/lib/sema.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/score/lib/sema.pdf
--------------------------------------------------------------------------------
/score/lib/sembleu.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/score/lib/sembleu.pdf
--------------------------------------------------------------------------------
/score/lib/smatch.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/score/lib/smatch.pdf
--------------------------------------------------------------------------------
/score/lib/ucca.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/score/lib/ucca.pdf
--------------------------------------------------------------------------------
/score/sdp.py:
--------------------------------------------------------------------------------
1 | # Marco Kuhlmann
2 |
3 | import sys
4 |
5 | from score.core import anchor, intersect;
6 |
7 | class Measure(object):
8 |
9 | def __init__(self, get_items):
10 | self.get_items = get_items
11 | self.g = 0
12 | self.s = 0
13 | self.c = 0
14 | self.n_updates = 0
15 | self.n_matches = 0
16 |
17 | def update(self, gold, system, gidentities, sidentities, trace = 0):
18 | g_items = set(self.get_items(gold, gidentities))
19 | s_items = set(self.get_items(system, sidentities))
20 | self.g += len(g_items)
21 | self.s += len(s_items)
22 | self.c += len(g_items & s_items)
23 | self.n_updates += 1
24 | self.n_matches += g_items == s_items
25 | if trace:
26 | return {"g": len(g_items), "s": len(s_items),
27 | "c": len(g_items & s_items), "m": 1 if g_items == s_items else 0};
28 |
29 | def p(self):
30 | return self.c / self.s if self.s != 0 else 0.0
31 |
32 | def r(self):
33 | return self.c / self.g if self.g != 0 else 0.0
34 |
35 | def f(self):
36 | p = self.p()
37 | r = self.r()
38 | return 2 * p * r / (p + r) if p + r != 0 else 0.0
39 |
40 | def m(self):
41 | return self.n_matches / self.n_updates if self.n_updates != 0 else 0.0
42 |
43 | def report(self):
44 | json = {}
45 | json["g"] = self.g
46 | json["s"] = self.s
47 | json["c"] = self.c
48 | json["p"] = self.p()
49 | json["r"] = self.r()
50 | json["f"] = self.f()
51 | json["m"] = self.m()
52 | return json
53 |
54 | # def argument_predicate_dm(label):
55 | # return True
56 |
57 | # def argument_predicate_pas(label):
58 | # arguments = set("adj_ARG1 adj_ARG2 adj_MOD coord_ARG1 coord_ARG2 prep_ARG1 prep_ARG2 prep_ARG3 prep_MOD verb_ARG1 verb_ARG2 verb_ARG3 verb_ARG4 verb_MOD".split())
59 | # return label in arguments
60 |
61 | # def argument_predicate_psd(label):
62 | # return label.endswith("-arg")
63 |
64 | class Scorer(object):
65 |
66 | def __init__(self, include_virtual=True):
67 | self.measures = []
68 | self.measures.append(("labeled", Measure(self.get_itemsL)))
69 | self.measures.append(("unlabeled", Measure(self.get_itemsU)))
70 | # self.measureP = Measure(self.get_itemsP)
71 | # self.measureF = Measure(self.get_itemsF)
72 | # self.measureS = Measure(self.get_itemsS)
73 | self.include_virtual = include_virtual
74 |
75 | def identify(self, id):
76 | return self.identities[id]
77 |
78 | def get_itemsL(self, graph, identities):
79 | result = {(identities[e.src], identities[e.tgt], e.lab) for e in graph.edges}
80 | if self.include_virtual:
81 | for node in graph.nodes:
82 | if node.is_top:
83 | result.add((-1, identities[node.id], None))
84 | return result
85 |
86 | def get_itemsU(self, graph, identities):
87 | result = {(identities[e.src], identities[e.tgt]) for e in graph.edges}
88 | if self.include_virtual:
89 | for node in graph.nodes:
90 | if node.is_top:
91 | result.add((-1, identities[node.id]))
92 | return result
93 |
94 | # def get_itemsP(self, graph):
95 | # return {(frame[0], frame[2]) for frame in self.get_itemsF(graph)}
96 |
97 | # def get_itemsF(self, graph):
98 | # result = set()
99 | # for node in graph.nodes:
100 | # if self.has_scorable_predicate(node):
101 | # arguments = set()
102 | # for edge in node.outgoing_edges:
103 | # if self.argument_predicate(edge.lab):
104 | # arguments.add(edge)
105 | # extract = (node.id, node.sense, tuple(sorted(arguments)))
106 | # result.add(extract)
107 | # return result
108 |
109 | # def get_itemsS(self, graph):
110 | # return {(frame[0], frame[1]) for frame in self.get_itemsF(graph)}
111 |
112 | # def argument_predicate(self, label):
113 | # return True
114 |
115 | # def has_scorable_predicate(self, node):
116 | # return node.pred and node.pos.startswith("V")
117 |
118 | # def show_predications(self, g):
119 | # print(g.id)
120 | # report_predications(self.complete_predications(g))
121 |
122 | def update(self, g, s, trace):
123 | gidentities = {node.id: tuple(anchor(node)) for node in g.nodes}
124 | sidentities = {node.id: tuple(anchor(node)) for node in s.nodes}
125 | scores = dict();
126 | for key, measure in self.measures:
127 | score = measure.update(g, s, gidentities, sidentities, trace)
128 | if trace: scores[key] = score;
129 | return scores;
130 |
131 | def report(self, n, scores = None):
132 | json = {"n": n}
133 | for info, measure in self.measures:
134 | json[info] = measure.report()
135 | if scores is not None: json["scores"] = scores
136 | return json
137 |
138 | def evaluate(gold, system, format = "json", trace = 0):
139 | scorer = Scorer(include_virtual=True)
140 | n = 0
141 | scores = dict() if trace else None
142 | for g, s in intersect(gold, system):
143 | score = scorer.update(g, s, trace)
144 | n += 1
145 | if trace: scores[g.id] = score
146 | result = scorer.report(n, scores)
147 | return result
148 |
--------------------------------------------------------------------------------
/score/smatch.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | import score.core;
4 | from smatch.smatch import get_amr_match;
5 |
6 | def tuples(graph, prefix, values, faith = True):
7 | #
8 | # mimicry of get_triples() in amr.py
9 | #
10 | id = 0;
11 | mapping = dict();
12 | instances = [];
13 | relations = [];
14 | attributes = [];
15 | n = 0;
16 | for node in graph.nodes:
17 | mapping[node.id] = name = prefix + str(id);
18 | id += 1;
19 | if "anchors" in values and node.anchors is not None:
20 | anchor = score.core.anchor(node);
21 | if graph.input: anchor = score.core.explode(graph.input, anchor);
22 | attributes.append(("anchor", name, str(anchor)));
23 | if "labels" in values and node.label is not None:
24 | instance = node.label;
25 | else:
26 | instance = "__()_{}__".format(prefix, n);
27 | n += 1;
28 | instances.append(("instance", name, instance));
29 | if "tops" in values and node.is_top:
30 | #
31 | # the native SMATCH code (wrongly, i believe) ties the top property to
32 | # the node label (see https://github.com/cfmrp/mtool/issues/12). we get
33 | # to choose whether to faithfully replicate those scores or not.
34 | #
35 | attributes.append(("TOP", name,
36 | node.label if node.label and faith else ""));
37 | if "properties" in values and node.properties and node.values:
38 | for property, value in zip(node.properties, node.values):
39 | attributes.append((property, name, value));
40 | for edge in graph.edges:
41 | if "edges" in values:
42 | relations.append((edge.lab, mapping[edge.src], mapping[edge.tgt]));
43 | if "attributes" in values:
44 | if edge.attributes and edge.values:
45 | for attribute, value in zip(edge.attributes, edge.values):
46 | relations.append((str((attribute, value)),
47 | mapping[edge.src], mapping[edge.tgt]));
48 | return instances, attributes, relations, n;
49 |
50 | def smatch(gold, system, limit = 20, values = {}, trace = 0, faith = True):
51 | gprefix = "g"; sprefix = "s";
52 | ginstances, gattributes, grelations, gn \
53 | = tuples(gold, gprefix, values, faith);
54 | sinstances, sattributes, srelations, sn \
55 | = tuples(system, sprefix, values, faith);
56 | if trace > 1:
57 | print("gold instances [{}]: {}\ngold attributes [{}]: {}\n"
58 | "gold relations [{}]: {}"
59 | "".format(len(ginstances), ginstances,
60 | len(gattributes), gattributes,
61 | len(grelations), grelations),
62 | file = sys.stderr);
63 | print("system instances [{}]: {}\nsystem attributes [{}]: {}\n"
64 | "system relations [{}]: {}"
65 | "".format(len(sinstances), sinstances,
66 | len(sattributes), sattributes,
67 | len(srelations), srelations),
68 | file = sys.stderr);
69 | correct, gold, system, mapping \
70 | = get_amr_match(None, None, gold.id, limit = limit,
71 | instance1 = ginstances, attributes1 = gattributes,
72 | relation1 = grelations, prefix1 = gprefix,
73 | instance2 = sinstances, attributes2 = sattributes,
74 | relation2 = srelations, prefix2 = sprefix);
75 | return correct, gold - gn, system - sn, mapping;
76 |
77 | def evaluate(golds, systems, format = "json", limit = 20,
78 | values = {}, trace = 0):
79 | if limit is None or not limit > 0: limit = 20;
80 | if trace > 1: print("RRHC limit: {}".format(limit), file = sys.stderr);
81 | tg = ts = tc = n = 0;
82 | scores = dict() if trace else None;
83 | for gold, system in score.core.intersect(golds, systems):
84 | id = gold.id;
85 | correct, gold, system, mapping \
86 | = smatch(gold, system, limit, values, trace);
87 | tg += gold; ts += system; tc += correct;
88 | n += 1;
89 | if trace:
90 | if id in scores:
91 | print("smatch.evaluate(): duplicate graph identifier: {}"
92 | "".format(id), file = sys.stderr);
93 | scores[id] = {"g": gold, "s": system, "c": correct};
94 | if trace > 1:
95 | p, r, f = score.core.fscore(gold, system, correct);
96 | print("G: {}; S: {}; C: {}; P: {}; R: {}; F: {}"
97 | "".format(gold, system, correct, p, r, f), file = sys.stderr);
98 |
99 | p, r, f = score.core.fscore(tg, ts, tc);
100 | result = {"n": n, "g": tg, "s": ts, "c": tc, "p": p, "r": r, "f": f};
101 | if trace: result["scores"] = scores;
102 | return result;
103 |
--------------------------------------------------------------------------------
/score/ucca.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from operator import itemgetter;
3 |
4 | from score.core import anchor, explode, intersect, fscore;
5 |
6 |
7 | def identify(graph, node, anchors = None, dominated = None, recursion = False):
8 | #
9 | # from how this ends up being called in various places, there is a missing
10 | # higher-level interface; something like (maybe even as a Graph method):
11 | #
12 | # identities = identify(graph, walk = True, explode = True)
13 | #
14 | if dominated is None:
15 | dominated = dict()
16 | if node not in dominated: dominated[node] = node_dominated = set()
17 | else: node_dominated = dominated[node]
18 | if anchors is None:
19 | anchors = dict();
20 | elif node in anchors:
21 | return anchors, dominated;
22 | anchors[node] = node_anchors = anchor(graph.find_node(node));
23 | for edge in graph.edges:
24 | if edge.attributes is None or "remote" not in edge.attributes:
25 | if node == edge.src:
26 | identify(graph, edge.tgt, anchors, dominated, True);
27 | for leaf in anchors[edge.tgt]:
28 | if leaf not in node_anchors: node_anchors.append(leaf);
29 | node_dominated.add(edge.tgt)
30 | node_dominated |= dominated[edge.tgt]
31 | if not recursion:
32 | anchors = {key: tuple(sorted(value, key = itemgetter(0, 1)))
33 | for key, value in anchors.items()}
34 | return anchors, dominated;
35 |
36 | def tuples(graph):
37 | identities = dict();
38 | for node in graph.nodes:
39 | identities, _ = identify(graph, node.id, identities);
40 | #
41 | # for robust comparison, represent each yield as a character set
42 | #
43 | if graph.input:
44 | for id in identities:
45 | identities[id] = explode(graph.input, identities[id]);
46 | lprimary = set();
47 | lremote = set();
48 | uprimary = set();
49 | uremote = set();
50 | for edge in graph.edges:
51 | source = identities[edge.src];
52 | target = identities[edge.tgt];
53 | if edge.attributes and "remote" in edge.attributes:
54 | lremote.add((source, target, edge.lab));
55 | uremote.add((source, target));
56 | else:
57 | lprimary.add((source, target, edge.lab));
58 | uprimary.add((source, target));
59 | return lprimary, lremote, uprimary, uremote;
60 |
61 | def evaluate(golds, systems, format = "json", trace = 0):
62 | tglp = tslp = tclp = 0;
63 | tgup = tsup = tcup = 0;
64 | tglr = tslr = tclr = 0;
65 | tgur = tsur = tcur = 0;
66 | tp = tr = 0;
67 | scores = dict() if trace else None;
68 | result = {"n": 0, "labeled": dict(), "unlabeled": dict()};
69 |
70 | for gold, system in intersect(golds, systems):
71 | glprimary, glremote, guprimary, guremote = tuples(gold);
72 | slprimary, slremote, suprimary, suremote = tuples(system);
73 | glp = len(glprimary); slp = len(slprimary);
74 | clp = len(glprimary & slprimary);
75 | gup = len(guprimary); sup = len(suprimary);
76 | cup = len(guprimary & suprimary);
77 | glr = len(glremote); slr = len(slremote);
78 | clr = len(glremote & slremote);
79 | gur = len(guremote); sur = len(suremote);
80 | cur = len(guremote & suremote);
81 | tglp += glp; tslp += slp; tclp += clp;
82 | tgup += gup; tsup += sup; tcup += cup;
83 | tglr += glr; tslr += slr; tclr += clr;
84 | tgur += gur; tsur += sur; tcur += cur;
85 | result["n"] += 1;
86 | if trace:
87 | if gold.id in scores:
88 | print("ucca.evaluate(): duplicate graph identifier: {}"
89 | "".format(gold.id), file = sys.stderr);
90 | score = {"labeled": dict(), "unlabeled": dict()};
91 | score["labeled"]["primary"] = {"g": glp, "s": slp, "c": clp};
92 | score["labeled"]["remote"] = {"g": glr, "s": slr, "c": clr};
93 | score["unlabeled"]["primary"] = {"g": gup, "s": sup, "c": cup};
94 | score["unlabeled"]["remote"] = {"g": gur, "s": sur, "c": cur};
95 | scores[gold.id] = score;
96 | if trace > 1: print("{}: {}".format(gold.id, score));
97 | p, r, f = fscore(tglp, tslp, tclp);
98 | result["labeled"]["primary"] = \
99 | {"g": tglp, "s": tslp, "c": tclp, "p": p, "r": r, "f": f};
100 | p, r, f = fscore(tglr, tslr, tclr);
101 | result["labeled"]["remote"] = \
102 | {"g": tglr, "s": tslr, "c": tclr, "p": p, "r": r, "f": f};
103 | p, r, f = fscore(tgup, tsup, tcup);
104 | result["unlabeled"]["primary"] = \
105 | {"g": tgup, "s": tsup, "c": tcup, "p": p, "r": r, "f": f};
106 | p, r, f = fscore(tgur, tsur, tcur);
107 | result["unlabeled"]["remote"] = \
108 | {"g": tgur, "s": tsur, "c": tcur, "p": p, "r": r, "f": f};
109 | if trace: result["scores"] = scores;
110 | return result;
111 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | with open("README.md", "r") as fh:
4 | long_description = fh.read()
5 |
6 |
7 | exec(open('version.py').read())
8 | release = __version__
9 | version = '.'.join(release.split('.')[:2])
10 |
11 |
12 | setuptools.setup(
13 | name="mtool",
14 | version="0.0.1",
15 | author="Stephan Oepen , Marco Kuhlmann , "
16 | "Daniel Hershcovich , Tim O'Gorman ",
17 | author_email="mrp-organizers@nlpl.eu",
18 | description="The Swiss Army Knife of Meaning Representation",
19 | long_description=long_description,
20 | long_description_content_type="text/markdown",
21 | url="https://github.com/cfmrp/mtool",
22 | packages=setuptools.find_packages(),
23 | py_modules=["graph", "analyzer", "inspector", "treewidth", 'main', 'version'],
24 | license='LGPL-3.0',
25 | install_requires=[
26 | 'numpy',
27 | ],
28 | entry_points = {
29 | 'console_scripts': ['mtool=main:main'],
30 | },
31 | classifiers=[
32 | "Environment :: Console",
33 | "Development Status :: 4 - Beta",
34 | "Intended Audience :: Developers",
35 | "Intended Audience :: Education",
36 | "Intended Audience :: Science/Research",
37 | "Operating System :: OS Independent",
38 | "Programming Language :: Python :: 3",
39 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
40 | "Topic :: Scientific/Engineering :: Information Analysis"
41 | ]
42 | )
43 |
--------------------------------------------------------------------------------
/smatch/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (C) 2015 Shu Cai and Kevin Knight
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 |
--------------------------------------------------------------------------------
/smatch/README.md:
--------------------------------------------------------------------------------
1 | # Smatch (semantic match) tool
2 |
3 | This is source code of [smatch](http://amr.isi.edu/evaluation.html), an evaluation tool for AMR (Abstract Meaning Representation).
4 |
5 | The code here is based on [Shu Cai](https://github.com/snowblink14)'s [smatch v1.0.2](https://github.com/danielhers/smatch/tree/1.0.2), with some changes to allow programmatic usage.
6 |
7 | More details and updates about AMR and smatch can be found in USC/ISI's AMR site: http://amr.isi.edu/index.html
8 |
--------------------------------------------------------------------------------
/smatch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/smatch/__init__.py
--------------------------------------------------------------------------------
/treewidth.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import sys
3 |
4 | def make_clique(graph, nodes):
5 | for v1 in nodes:
6 | for v2 in nodes:
7 | if v1 != v2:
8 | graph[v1].add(v2)
9 |
10 | def count_fillin(graph, nodes):
11 | """How many edges would be needed to make v a clique."""
12 | count = 0
13 | for v1 in nodes:
14 | for v2 in nodes:
15 | if v1 != v2 and v2 not in graph[v1]:
16 | count += 1
17 | return count/2
18 |
19 | def is_clique(graph, vs):
20 | for v1 in vs:
21 | for v2 in vs:
22 | if v1 != v2 and v2 not in graph[v1]:
23 | return False
24 | return True
25 |
26 | def simplicial(graph, v):
27 | return is_clique(graph, graph[v])
28 |
29 | def almost_simplicial(graph, v):
30 | for u in graph[v]:
31 | if is_clique(graph, graph[v] - {u}):
32 | return True
33 | return False
34 |
35 | def eliminate_node(graph, v):
36 | make_clique(graph, graph[v])
37 | delete_node(graph, v)
38 |
39 | def delete_node(graph, v):
40 | for u in graph[v]:
41 | graph[u].remove(v)
42 | del graph[v]
43 |
44 | def contract_edge(graph, u, v):
45 | """Contract edge (u,v) by removing u"""
46 | graph[v] = (graph[v] | graph[u]) - {u, v}
47 | del graph[u]
48 | for w in graph:
49 | if u in graph[w]:
50 | graph[w] = (graph[w] | {v}) - {u, w}
51 |
52 | def copy_graph(graph):
53 | return {u:set(graph[u]) for u in graph}
54 |
55 | def upper_bound(graph):
56 | """Min-fill."""
57 | graph = copy_graph(graph)
58 | dmax = 0
59 | order = []
60 | while len(graph) > 0:
61 | #d, u = min((len(graph[u]), u) for u in graph) # min-width
62 | d, u = min((count_fillin(graph, graph[u]), u) for u in graph)
63 | dmax = max(dmax, len(graph[u]))
64 | eliminate_node(graph, u)
65 | order.append(u)
66 | return dmax, order
67 |
68 | def lower_bound(graph):
69 | """Minor-min-width"""
70 | graph = copy_graph(graph)
71 | dmax = 0
72 | while len(graph) > 0:
73 | # pick node of minimum degree
74 | d, u = min((len(graph[u]), u) for u in graph)
75 | dmax = max(dmax, d)
76 |
77 | # Gogate and Dechter: minor-min-width
78 | nb = graph[u] - {u}
79 | if len(nb) > 0:
80 | _, v = min((len(graph[v] & nb), v) for v in nb)
81 | contract_edge(graph, u, v)
82 | else:
83 | delete_node(graph, u)
84 | return dmax
85 |
86 | class Solution(object):
87 | pass
88 |
89 | def quickbb(graph):
90 | """Gogate and Dechter, A complete anytime algorithm for treewidth. UAI
91 | 2004. http://arxiv.org/pdf/1207.4109.pdf"""
92 |
93 | """Given a permutation of the nodes (called an elimination ordering),
94 | for each node, remove the node and make its neighbors into a clique.
95 | The maximum degree of the nodes at the time of their elimination is
96 | the width of the tree decomposition corresponding to that ordering.
97 | The treewidth of the graph is the minimum over all possible
98 | permutations.
99 | """
100 |
101 | best = Solution() # this gets around the lack of nonlocal in Python 2
102 | best.count = 0
103 |
104 | def bb(graph, order, f, g):
105 | best.count += 1
106 | if len(graph) < 2:
107 | if f < best.ub:
108 | assert f == g
109 | best.ub = f
110 | best.order = list(order) + list(graph)
111 | else:
112 | vs = []
113 | for v in graph:
114 | # very important pruning rule
115 | if simplicial(graph, v) or almost_simplicial(graph, v) and len(graph[v]) <= lb:
116 | vs = [v]
117 | break
118 | else:
119 | vs.append(v)
120 |
121 | for v in vs:
122 | graph1 = copy_graph(graph)
123 | eliminate_node(graph1, v)
124 | order1 = order + [v]
125 | # treewidth for current order so far
126 | g1 = max(g, len(graph[v]))
127 | # lower bound given where we are
128 | f1 = max(g, lower_bound(graph1))
129 | if f1 < best.ub:
130 | bb(graph1, order1, f1, g1)
131 |
132 | graph = { u : set(graph[u]) for u in graph }
133 |
134 | order = []
135 | best.ub, best.order = upper_bound(graph)
136 | lb = lower_bound(graph)
137 | if lb < best.ub:
138 | bb(graph, order, lb, 0)
139 |
140 | # Build the tree decomposition
141 | tree = collections.defaultdict(set)
142 | def build(order):
143 | if len(order) < 2:
144 | bag = frozenset(order)
145 | tree[bag] = set()
146 | return
147 | v = order[0]
148 | clique = graph[v]
149 | eliminate_node(graph, v)
150 | build(order[1:])
151 | for tv in tree:
152 | if clique.issubset(tv):
153 | break
154 | bag = frozenset(clique | {v})
155 | tree[bag].add(tv)
156 | tree[tv].add(bag)
157 | build(best.order)
158 | return tree
159 |
160 | if True and __name__ == "__main__":
161 | import fileinput, sys
162 | import graph
163 |
164 | s = []
165 | for line in fileinput.input():
166 | if line.lstrip().startswith('#'):
167 | continue
168 | s.append(line)
169 | s = ''.join(s)
170 |
171 | i = 0
172 | while i < len(s):
173 | try:
174 | g, i1 = graph.scan_graph(s, start=i, return_end=True)
175 | except:
176 | sys.stderr.write("couldn't read: %s\n" % s[i:i1])
177 |
178 | if g is None: break
179 | i = i1
180 |
181 | g = g.undirected_graph()
182 |
183 | tree = quickbb(g)
184 | print(max(len(tv)-1 for tv in tree))
185 | #print tree
186 |
187 | if False and __name__ == "__main__":
188 | import fileinput, sys
189 |
190 | g = collections.defaultdict(set)
191 | for line in fileinput.input():
192 | if line.rstrip() == "END":
193 | break
194 | u, v = line.split()
195 | g[u].add(v)
196 | g[v].add(u)
197 |
198 | tree = quickbb(g)
199 | root = list(tree)[0]
200 | def visit(tu, indent, memo):
201 | if tu in memo: return
202 | memo.add(tu)
203 | print(" "*indent, " ".join(tu))
204 | for tv in tree[tu]:
205 | visit(tv, indent+2, memo)
206 | visit(root, 0, set())
207 | print("bags:", len(tree))
208 | print("width:", max(len(tv)-1 for tv in tree))
209 |
--------------------------------------------------------------------------------
/ucca/README.md:
--------------------------------------------------------------------------------
1 | Universal Conceptual Cognitive Annotation
2 | ============================
3 | UCCA is a linguistic framework for semantic annotation, whose details
4 | are available at [the following paper](http://www.cs.huji.ac.il/~oabend/papers/ucca_acl.pdf):
5 |
6 | @inproceedings{abend2013universal,
7 | author={Abend, Omri and Rappoport, Ari},
8 | title={{U}niversal {C}onceptual {C}ognitive {A}nnotation ({UCCA})},
9 | booktitle={Proc. of ACL},
10 | month={August},
11 | year={2013},
12 | pages={228--238},
13 | url={http://aclweb.org/anthology/P13-1023}
14 | }
15 |
16 | This Python 3 package provides an API to the UCCA annotation and tools to
17 | manipulate and process it. Its main features are conversion between different
18 | representations of UCCA annotations, and rich objects for all of the linguistic
19 | relations which appear in the theoretical framework (see `core`, `layer0`, `layer1`
20 | and `convert` modules under the `ucca` package).
21 |
22 | The `scripts` package contains various utilities for processing passage files.
23 |
24 | To parse text to UCCA graphs, use [TUPA, the UCCA parser](http://www.cs.huji.ac.il/~danielh/tupa).
25 |
26 |
27 | Authors
28 | ------
29 | * Amit Beka: amit.beka@gmail.com
30 | * Daniel Hershcovich: danielh@cs.huji.ac.il
31 |
32 |
33 | License
34 | -------
35 | This package is licensed under the GPLv3 or later license.
36 |
37 | [](https://travis-ci.org/danielhers/ucca)
38 | [](https://ci.appveyor.com/project/danielh/ucca)
39 | [](http://ucca.readthedocs.io/en/latest/)
40 | [](https://badge.fury.io/py/UCCA)
41 |
--------------------------------------------------------------------------------
/ucca/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/ucca/__init__.py
--------------------------------------------------------------------------------
/ucca/ioutil.py:
--------------------------------------------------------------------------------
1 | """Input/output utility functions for UCCA scripts."""
2 | import os
3 | import sys
4 | import time
5 | from collections import defaultdict
6 | from glob import glob
7 | from itertools import filterfalse, chain
8 | from xml.etree.ElementTree import ParseError
9 |
10 | from ucca.convert import file2passage, passage2file, from_text, to_text, split2segments
11 | from ucca.core import Passage
12 |
13 | DEFAULT_LANG = "en"
14 | DEFAULT_ATTEMPTS = 3
15 | DEFAULT_DELAY = 5
16 |
17 |
18 | class LazyLoadedPassages:
19 | """
20 | Iterable interface to Passage objects that loads files on-the-go and can be iterated more than once
21 | """
22 | def __init__(self, files, sentences=False, paragraphs=False, converters=None, lang=DEFAULT_LANG,
23 | attempts=DEFAULT_ATTEMPTS, delay=DEFAULT_DELAY):
24 | self.files = files
25 | self.sentences = sentences
26 | self.paragraphs = paragraphs
27 | self.split = self.sentences or self.paragraphs
28 | self.converters = defaultdict(lambda: from_text) if converters is None else converters
29 | self.lang = lang
30 | self.attempts = attempts
31 | self.delay = delay
32 | self._files_iter = None
33 | self._split_iter = None
34 | self._file_handle = None
35 |
36 | def __iter__(self):
37 | self._files_iter = iter(self.files)
38 | self._split_iter = None
39 | self._file_handle = None
40 | return self
41 |
42 | def __next__(self):
43 | while True:
44 | passage = self._next_passage()
45 | if passage is not None:
46 | return passage
47 |
48 | def _next_passage(self):
49 | passage = None
50 | if self._split_iter is None:
51 | try:
52 | file = next(self._files_iter)
53 | except StopIteration: # Finished iteration
54 | raise
55 | if isinstance(file, Passage): # Not really a file, but a Passage
56 | passage = file
57 | else: # A file
58 | attempts = self.attempts
59 | while not os.path.exists(file):
60 | if attempts == 0:
61 | print("File not found: %s" % file, file=sys.stderr)
62 | return None
63 | print("Failed reading %s, trying %d more times..." % (file, attempts), file=sys.stderr)
64 | time.sleep(self.delay)
65 | attempts -= 1
66 | try:
67 | passage = file2passage(file) # XML or binary format
68 | except (IOError, ParseError) as e: # Failed to read as passage file
69 | base, ext = os.path.splitext(os.path.basename(file))
70 | converter = self.converters.get(ext.lstrip("."))
71 | if converter is None:
72 | raise IOError("Could not read %s file. Try adding '.txt' suffix: '%s'" % (ext, file)) from e
73 | self._file_handle = open(file, encoding="utf-8")
74 | self._split_iter = iter(converter(chain(self._file_handle, [""]), passage_id=base, lang=self.lang))
75 | if self.split:
76 | if self._split_iter is None:
77 | self._split_iter = (passage,)
78 | self._split_iter = iter(s for p in self._split_iter for s in
79 | split2segments(p, is_sentences=self.sentences, lang=self.lang))
80 | if self._split_iter is not None: # Either set before or initialized now
81 | try:
82 | passage = next(self._split_iter)
83 | except StopIteration: # Finished this converter
84 | self._split_iter = None
85 | if self._file_handle is not None:
86 | self._file_handle.close()
87 | self._file_handle = None
88 | return None
89 | return passage
90 |
91 | # The following three methods are implemented to support shuffle;
92 | # note files are shuffled but there is no shuffling within files, as it would not be efficient.
93 | # Note also the inconsistency because these access the files while __iter__ accesses individual passages.
94 | def __len__(self):
95 | return len(self.files)
96 |
97 | def __getitem__(self, i):
98 | return self.files[i]
99 |
100 | def __setitem__(self, i, value):
101 | self.files[i] = value
102 |
103 | def __bool__(self):
104 | return bool(self.files)
105 |
106 |
107 | def resolve_patterns(filename_patterns):
108 | for pattern in [filename_patterns] if isinstance(filename_patterns, str) else filename_patterns:
109 | yield from sorted(glob(pattern)) or [pattern]
110 |
111 |
112 | def get_passages(filename_patterns, **kwargs):
113 | for filenames in resolve_patterns(filename_patterns):
114 | yield from read_files_and_dirs(filenames, **kwargs)
115 |
116 |
117 | def gen_files(files_and_dirs):
118 | """
119 | :param files_and_dirs: iterable of files and/or directories to look in
120 | :return: all files given, plus any files directly under any directory given
121 | """
122 | for file_or_dir in [files_and_dirs] if isinstance(files_and_dirs, str) else files_and_dirs:
123 | if os.path.isdir(file_or_dir):
124 | yield from filterfalse(os.path.isdir, (os.path.join(file_or_dir, f)
125 | for f in sorted(os.listdir(file_or_dir))))
126 | else:
127 | yield file_or_dir
128 |
129 |
130 | def read_files_and_dirs(files_and_dirs, sentences=False, paragraphs=False, converters=None, lang=DEFAULT_LANG,
131 | attempts=DEFAULT_ATTEMPTS, delay=DEFAULT_DELAY):
132 | """
133 | :param files_and_dirs: iterable of files and/or directories to look in
134 | :param sentences: whether to split to sentences
135 | :param paragraphs: whether to split to paragraphs
136 | :param converters: dict of input format converters to use based on the file extension
137 | :param lang: language to use for tokenization model
138 | :param attempts: number of times to try reading a file before giving up
139 | :param delay: number of seconds to wait before subsequent attempts to read a file
140 | :return: lazy-loaded passages from all files given, plus any files directly under any directory given
141 | """
142 | return LazyLoadedPassages(list(gen_files(files_and_dirs)), sentences=sentences, paragraphs=paragraphs,
143 | converters=converters, lang=lang, attempts=attempts, delay=delay)
144 |
145 |
146 | def write_passage(passage, output_format=None, binary=False, outdir=".", prefix="", converter=None, verbose=True,
147 | append=False, basename=None):
148 | """
149 | Write a given UCCA passage in any format.
150 | :param passage: Passage object to write
151 | :param output_format: filename suffix (if given "ucca", suffix will be ".pickle" or ".xml" depending on `binary')
152 | :param binary: save in pickle format with ".pickle" suffix
153 | :param outdir: output directory, should exist already
154 | :param prefix: string to prepend to output filename
155 | :param converter: function to apply to passage before saving (if output_format is not "ucca"/"pickle"/"xml"),
156 | returning iterable of strings, each corresponding to an output line
157 | :param verbose: print "Writing passage" message
158 | :param append: if using converter, append to output file rather than creating a new file
159 | :param basename: use this instead of `passage.ID' for the output filename
160 | :return: path of created output file
161 | """
162 | os.makedirs(outdir, exist_ok=True)
163 | suffix = output_format if output_format and output_format != "ucca" else ("pickle" if binary else "xml")
164 | outfile = os.path.join(outdir, prefix + (basename or passage.ID) + "." + suffix)
165 | if verbose:
166 | print("%s '%s'..." % ("Appending to" if append else "Writing passage", outfile))
167 | if output_format is None or output_format in ("ucca", "pickle", "xml"):
168 | passage2file(passage, outfile, binary=binary)
169 | else:
170 | with open(outfile, "a" if append else "w", encoding="utf-8") as f:
171 | f.writelines(map("{}\n".format, (converter or to_text)(passage)))
172 | return outfile
173 |
--------------------------------------------------------------------------------
/ucca/layer0.py:
--------------------------------------------------------------------------------
1 | """Encapsulates all word and punctuation symbols layer.
2 |
3 | Layer 0 is the basic layer for all the UCCA annotation, as it includes the
4 | actual words and punctuation marks found in the :class:`core`.Passage.
5 |
6 | Layer 0 has only one type of node, :class:`Terminal`. This is a subtype of
7 | :class:`core`.Node, and can have one of two tags: Word or Punctuation.
8 |
9 | """
10 |
11 | from ucca import core
12 |
13 | LAYER_ID = '0'
14 |
15 |
16 | class NodeTags:
17 | Punct = 'Punctuation'
18 | Word = 'Word'
19 | __init__ = None
20 |
21 |
22 | ATTRIB_KEYS = ('text', 'paragraph', 'paragraph_position')
23 |
24 |
25 | class Terminal(core.Node):
26 | """Layer 0 Node type, represents a word or a punctuation mark.
27 |
28 | Terminals are :class:`core`.Node objects which represent a word or
29 | a punctuation mark in the :class:`core`.Passage object. They are immutable,
30 | as they shouldn't be changed throughout their use and have no children.
31 | Hence, they can be compared and hashed, unlike other core.Node subclasses.
32 |
33 | Attributes:
34 | ID: the unique ID of each Terminal is its global position in the
35 | Passage, e.g. ID=0.4 is the 4th Terminal in the :class:`Passage`.
36 | tag: from NodeTags
37 | layer: '0' (LAYER_ID)
38 | attrib: returns a copy of the attribute dictionary, so changing it
39 | will not affect the Terminal object
40 | text: text of the Terminal, whether punctuation or a word
41 | position: global position of the Terminal in the passage, starting at 1
42 | paragraph: which paragraph the Terminal belongs to, starting at 1
43 | para_pos: the position of the Terminal in the paragraph,
44 | starting at 1 (per paragraph).
45 | punct: whether the Terminal is a punctuation mark (boolean)
46 |
47 | """
48 |
49 | @property
50 | def text(self):
51 | return self.attrib['text']
52 |
53 | @property
54 | def position(self):
55 | # the format of ID is LAYER_ID + ID separator + position
56 | return int(self.ID[len(LAYER_ID) + len(core.Node.ID_SEPARATOR):])
57 |
58 | @property
59 | def para_pos(self):
60 | return self.attrib['paragraph_position']
61 |
62 | @property
63 | def paragraph(self):
64 | return self.attrib['paragraph']
65 |
66 | @property
67 | def tok(self):
68 | try:
69 | return self.layer.extra["doc"][self.paragraph - 1][self.para_pos - 1]
70 | except (KeyError, IndexError):
71 | return None
72 |
73 | def get_annotation(self, attr, as_array=False):
74 | return attr(self.tok[attr.value]) if as_array else self.extra.get(attr.key)
75 |
76 | @property
77 | def attrib(self):
78 | return self._attrib.copy()
79 |
80 | @property
81 | def punct(self):
82 | return self.tag == NodeTags.Punct
83 |
84 | def get_terminals(self, punct=True, *args, **kwargs):
85 | """Returns a list containing just this Terminal.
86 |
87 | :param punct: whether to include punctuation Terminals, defaults to True
88 |
89 | :return: a list of :class:`layer0`.Terminal objects
90 | """
91 | del args, kwargs
92 | return [] if self.punct and not punct else [self]
93 |
94 | def equals(self, other, *, ordered=False, **kwargs):
95 | """Equals if the Terminals are of the same Layer, tag, position & text.
96 |
97 | :param other: another Terminal to equal to
98 | :param ordered: unused, here for API conformity.
99 |
100 | :return: True iff the two Terminals are equal.
101 | """
102 | return (self.layer.ID == other.layer.ID and self.text == other.text
103 | and self.position == other.position and self.tag == other.tag
104 | and self.paragraph == other.paragraph
105 | and self.para_pos == other.para_pos)
106 |
107 | def __eq__(self, other):
108 | """Equals if both of the same Passage, Layer, position, tag & text."""
109 | if other.layer.ID != LAYER_ID:
110 | return False
111 | return (self.root == other.root and self.layer.ID == other.layer.ID
112 | and self.position == other.position
113 | and self.text == other.text and self.tag == other.tag
114 | and self.paragraph == other.paragraph
115 | and self.para_pos == other.para_pos)
116 |
117 | def __hash__(self):
118 | """Hashes the Terminals according to its ID and text."""
119 | return hash(self.ID + str(self.text))
120 |
121 | def __str__(self):
122 | return self.text
123 |
124 | # Terminal are immutable (except the extra dictionary which is
125 | # just a temporary playground) and have no children, so enforce it
126 | def add(self, *args, **kwargs):
127 | raise NotImplementedError()
128 |
129 | def remove(self, *args, **kwargs):
130 | raise NotImplementedError()
131 |
132 |
133 | class Layer0(core.Layer):
134 | """Represents the :class:`Terminal` objects layer.
135 |
136 | Attributes:
137 | words: a tuple of only the words (not punctuation) Terminals, ordered
138 | pairs: a tuple of (position, terminal) tuples of all Terminals, ordered
139 |
140 | """
141 |
142 | def __init__(self, root, attrib=None):
143 | super().__init__(ID=LAYER_ID, root=root, attrib=attrib)
144 |
145 | @property
146 | def words(self):
147 | return tuple(x for x in self._all if not x.punct)
148 |
149 | @property
150 | def pairs(self):
151 | return tuple(enumerate(self._all, start=1))
152 |
153 | def by_position(self, pos):
154 | """Returns the Terminals at the position given.
155 |
156 | :param pos: the position of the Terminal object
157 | :return: the Terminal in this position
158 | :raise IndexError: if the position is out of bounds
159 | """
160 | return self._all[pos - 1] # positions start at 1, not 0
161 |
162 | def add_terminal(self, text, punct, paragraph=1):
163 | """Adds the next Terminal at the next available position.
164 |
165 | Creates a :class:`Terminal` object with the next position, assuming that
166 | all positions are filled (no holes).
167 |
168 | :param text: the text of the Terminal
169 | :param punct: boolean, whether it's a punctuation mark
170 | :param paragraph: paragraph number, defaults to 1
171 |
172 | :return: the created Terminal
173 |
174 | :raise DuplicateIdError: if trying to add an already existing Terminal,
175 | caused by un-ordered Terminal positions in the layer
176 | """
177 | position = len(self._all) + 1 # we want positions to start with 1
178 | para_pos = self._all[-1].para_pos + 1 if position > 1 and paragraph == self._all[-1].paragraph else 1
179 | tag = NodeTags.Punct if punct else NodeTags.Word
180 | return Terminal(ID="{}{}{}".format(LAYER_ID, core.Node.ID_SEPARATOR, position),
181 | root=self.root, tag=tag,
182 | attrib={'text': text,
183 | 'paragraph': paragraph,
184 | 'paragraph_position': para_pos})
185 |
186 | def copy(self, other_passage):
187 | """Creates a copied Layer0 object and Terminals in other_passage.
188 |
189 | :param other_passage: the Passage to copy self to
190 |
191 | """
192 | other = Layer0(root=other_passage, attrib=self.attrib.copy())
193 | other.extra = self.extra.copy()
194 | for t in self._all:
195 | copied = other.add_terminal(t.text, t.punct, t.paragraph)
196 | copied.extra = t.extra.copy()
197 |
198 | def docs(self, num_paragraphs=1):
199 | docs = self.extra.setdefault("doc", [[]])
200 | while len(docs) < num_paragraphs:
201 | docs.append([])
202 | return docs
203 |
204 | def doc(self, paragraph):
205 | return self.docs(paragraph)[paragraph - 1]
206 |
207 |
208 | def is_punct(node):
209 | """Returns whether the unit is a layer0 punctuation (for all Units)."""
210 | return node.layer.ID == LAYER_ID and node.punct
211 |
--------------------------------------------------------------------------------
/validate/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cfmrp/mtool/4cee3a2590d4ec7648408cede05adfaeccc0c698/validate/__init__.py
--------------------------------------------------------------------------------
/validate/amr.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | from graph import Graph;
4 | from validate.utilities import report;
5 |
6 | def test(graph, actions, stream = sys.stderr):
7 | n = 0;
8 | return n;
9 |
10 |
--------------------------------------------------------------------------------
/validate/core.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | import validate.amr;
4 | import validate.eds;
5 | import validate.sdp;
6 | import validate.ucca;
7 | from validate.utilities import report;
8 |
9 |
10 | def test(graph, actions, stream = sys.stderr):
11 | n = 0;
12 | if not isinstance(graph.id, str) or len(graph.id) == 0:
13 | n += 1;
14 | report(graph,
15 | "missing or invalid ‘id’ property",
16 | stream = stream);
17 | if not isinstance(graph.flavor, int) or graph.flavor not in {0, 1, 2}:
18 | n += 1;
19 | report(graph,
20 | "missing or invalid ‘flavor’ property",
21 | stream = stream);
22 | if not isinstance(graph.framework, str) or \
23 | graph.framework not in {"ccd", "dm", "pas", "psd", "ptg", "ud",
24 | "eds", "ucca", "amr", "drg"}:
25 | n += 1;
26 | report(graph,
27 | "missing or invalid ‘framework’ property",
28 | stream = stream);
29 | elif graph.flavor == 0 and \
30 | graph.framework not in {"ccd", "dm", "pas", "psd", "ud"} or \
31 | graph.flavor == 1 and graph.framework not in {"eds", "ptg", "ucca"} or \
32 | graph.flavor == 2 and graph.framework not in {"amr", "drg"}:
33 | n += 1;
34 | report(graph,
35 | "invalid Flavor ({}) framework: ‘{}’"
36 | "".format(graph.flavor, graph.framework), stream = stream);
37 |
38 | if "input" in actions:
39 | if not isinstance(graph.input, str) or len(graph.input) == 0:
40 | n += 1;
41 | report(graph,
42 | "missing or invalid ‘input’ property",
43 | stream = stream);
44 |
45 | l = len(graph.input) if graph.input else 0;
46 | for node in graph.nodes:
47 | if not isinstance(node.id, int):
48 | n += 1;
49 | report(graph,
50 | "invalid identifier",
51 | node = node, stream = stream);
52 | if "anchors" in actions and node.anchors and l:
53 | for anchor in node.anchors:
54 | if anchor["from"] < 0 or anchor["from"] > l \
55 | or anchor["to"] < 0 or anchor["to"] > l \
56 | or anchor["from"] > anchor["to"]:
57 | n += 1;
58 | report(graph,
59 | "invalid anchor: {}".format(anchor),
60 | node = node, stream = stream);
61 |
62 | if "edges" in actions:
63 | #
64 | # the following is most likely redundant: the MRP input codec already has
65 | # to make sure all source and target identifiers actually exist. maybe
66 | # add a type check (int), though?
67 | #
68 | nodes = {node.id: node for node in graph.nodes};
69 | for edge in graph.edges:
70 | if not isinstance(edge.src, int) or edge.src not in nodes:
71 | n += 1;
72 | report(graph,
73 | "invalid source",
74 | edge = edge, stream = stream);
75 | if not isinstance(edge.tgt, int) or edge.tgt not in nodes:
76 | n += 1;
77 | report(graph,
78 | "invalid target",
79 | edge = edge, stream = stream);
80 | num_attrib = len(edge.attributes) if edge.attributes else 0;
81 | num_values = len(edge.values) if edge.values else 0;
82 | if num_attrib != num_values:
83 | n += 1;
84 | report(graph,
85 | "unaligned ‘attributes’ vs. ‘values’",
86 | edge = edge, stream = stream);
87 |
88 | sdp = {"ccd", "dm", "pas", "psd"};
89 | if graph.framework == "amr" and "amr" in actions:
90 | n += validate.amr.test(graph, actions, stream);
91 | elif graph.framework == "eds" and "eds" in actions:
92 | n += validate.eds.test(graph, actions, stream);
93 | elif graph.framework in sdp and (sdp & actions):
94 | n += validate.sdp.test(graph, actions, stream);
95 | elif graph.framework == "ucca" and "ucca" in actions:
96 | n += validate.ucca.test(graph, actions, stream);
97 |
98 | return n;
99 |
--------------------------------------------------------------------------------
/validate/eds.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | from graph import Graph;
4 | from validate.utilities import report;
5 |
6 | def test(graph, actions, stream = sys.stderr):
7 | n = 0;
8 | for node in graph.nodes:
9 | if not isinstance(node.label, str) or len(node.label) == 0:
10 | n += 1;
11 | report(graph,
12 | "missing or invalid label",
13 | node = node, framework = "EDS", stream = stream);
14 | message = None;
15 | if "anchors" in actions:
16 | if not isinstance(node.anchors, list):
17 | message = "missing or invalid anchoring";
18 | elif len(node.anchors) != 1 \
19 | or ("from" not in node.anchors[0] or "to" not in node.anchors[0]):
20 | message = "invalid ‘anchors’ value: {}".format(node.anchors);
21 | if message is not None:
22 | n += 1;
23 | report(graph, message,
24 | node = node, framework = "EDS", stream = stream);
25 | return n;
26 |
27 |
--------------------------------------------------------------------------------
/validate/sdp.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | from graph import Graph;
4 | from validate.utilities import report;
5 |
6 | def test(graph, actions, stream = sys.stderr):
7 | n = 0;
8 | return n;
9 |
10 |
--------------------------------------------------------------------------------
/validate/ucca.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from validate.utilities import report
4 |
5 | CATEGORIES = {'H', 'A', 'P', 'S', 'D', 'G', 'C', 'E', 'F', 'N', 'R', 'T', 'Q', 'L', 'U'}
6 |
7 |
8 | def is_primary(edge):
9 | for attribute, value in zip(edge.attributes or (), edge.values or ()):
10 | if attribute == "remote" and value != "false":
11 | return False
12 | return True
13 |
14 |
15 | def is_implicit(node):
16 | for prop, value in zip(node.properties or (), node.values or ()):
17 | if prop == "implicit" and value != "false":
18 | return True
19 | return False
20 |
21 |
22 | def test(graph, actions, stream=sys.stderr):
23 | n = 0
24 | for edge in graph.edges:
25 | if not isinstance(edge.lab, str) or len(edge.lab) == 0:
26 | n += 1
27 | report(graph,
28 | "missing or invalid label",
29 | edge=edge, framework="UCCA", stream=stream)
30 | elif edge.lab.upper() not in CATEGORIES:
31 | n += 1
32 | report(graph,
33 | "edge label is not a UCCA category",
34 | edge=edge, framework="UCCA", stream=stream)
35 | if edge.is_loop():
36 | n += 1
37 | report(graph,
38 | "loop edge",
39 | edge=edge, framework="UCCA", stream=stream)
40 | roots = []
41 | for node in graph.nodes:
42 | primary = [edge for edge in node.incoming_edges if is_primary(edge)]
43 | primary_parents = {edge.src for edge in primary}
44 | if not primary:
45 | roots.append(node)
46 | elif len(primary_parents) > 1:
47 | n += 1
48 | report(graph,
49 | "multiple primary parents for node",
50 | node=node, edge=primary[0], framework="UCCA", stream=stream)
51 | if not roots:
52 | n += 1
53 | report(graph,
54 | "no roots in graph",
55 | framework="UCCA", stream=stream)
56 | elif len(roots) > 1:
57 | n += 1
58 | report(graph,
59 | "multiple roots in graph",
60 | node=roots[0], framework="UCCA", stream=stream)
61 | else:
62 | for node in roots:
63 | remotes = [edge for edge in node.incoming_edges if not is_primary(edge)]
64 | if remotes:
65 | n += 1
66 | report(graph,
67 | "root has remote parents",
68 | node=node, edge=remotes[0], framework="UCCA", stream=stream)
69 | for node in graph.nodes:
70 | if node.is_leaf() and not node.anchors and not is_implicit(node):
71 | n += 1
72 | report(graph,
73 | "unanchored non-implicit node",
74 | node=node, framework="UCCA", stream=stream)
75 | return n
76 |
--------------------------------------------------------------------------------
/validate/utilities.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | def report(graph, message, node = None, edge = None,
4 | framework = None, level = "E", stream = sys.stderr):
5 | if node is not None:
6 | node = "; node #{}".format(node.id);
7 | else:
8 | node = "";
9 | if edge is not None:
10 | edge = "; edge {} -{}-> {}".format(edge.src, edge.tgt,
11 | edge.lab if edge.lab else "");
12 | else:
13 | edge = "";
14 | if framework is not None:
15 | framework = "{{{}}} ".format(framework);
16 | else:
17 | framework = "";
18 | print("validate(): [{}] {}graph #{}{}{}: {}."
19 | "".format(level, framework, graph.id, node, edge, message),
20 | file = stream);
21 |
--------------------------------------------------------------------------------
/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.1";
2 |
3 |
--------------------------------------------------------------------------------