├── .dockerignore
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── decomp
├── __init__.py
├── corpus
│ ├── __init__.py
│ └── corpus.py
├── data
│ ├── 1.0
│ │ ├── normalized
│ │ │ ├── document
│ │ │ │ └── annotations
│ │ │ │ │ └── .gitkeep
│ │ │ └── sentence
│ │ │ │ └── annotations
│ │ │ │ ├── factuality.zip
│ │ │ │ ├── genericity.zip
│ │ │ │ ├── protoroles.zip
│ │ │ │ ├── time.zip
│ │ │ │ └── wordsense.zip
│ │ └── raw
│ │ │ └── sentence
│ │ │ └── annotations
│ │ │ ├── factuality.zip
│ │ │ ├── genericity.zip
│ │ │ ├── protoroles.zip
│ │ │ ├── time.zip
│ │ │ └── wordsense.zip
│ ├── 2.0
│ │ ├── normalized
│ │ │ ├── document
│ │ │ │ ├── .gitkeep
│ │ │ │ └── annotations
│ │ │ │ │ ├── .gitkeep
│ │ │ │ │ └── event_structure_mereology.zip
│ │ │ └── sentence
│ │ │ │ └── annotations
│ │ │ │ ├── .gitkeep
│ │ │ │ ├── event_structure_distributivity.zip
│ │ │ │ ├── event_structure_natural_parts.zip
│ │ │ │ ├── factuality.zip
│ │ │ │ ├── genericity.zip
│ │ │ │ ├── protoroles.zip
│ │ │ │ ├── time.zip
│ │ │ │ └── wordsense.zip
│ │ └── raw
│ │ │ ├── document
│ │ │ └── annotations
│ │ │ │ ├── event_structure_mereology.zip
│ │ │ │ └── time.zip
│ │ │ └── sentence
│ │ │ └── annotations
│ │ │ ├── event_structure_distributivity.zip
│ │ │ ├── event_structure_natural_parts.zip
│ │ │ ├── factuality.zip
│ │ │ ├── genericity.zip
│ │ │ ├── protoroles.zip
│ │ │ ├── time.zip
│ │ │ └── wordsense.zip
│ ├── LICENSE
│ └── ud_ids.json
├── graph
│ ├── __init__.py
│ ├── nx.py
│ └── rdf.py
├── semantics
│ ├── __init__.py
│ ├── predpatt.py
│ └── uds
│ │ ├── __init__.py
│ │ ├── annotation.py
│ │ ├── corpus.py
│ │ ├── document.py
│ │ ├── graph.py
│ │ └── metadata.py
├── syntax
│ ├── __init__.py
│ └── dependency.py
└── vis
│ ├── __init__.py
│ └── uds_vis.py
├── docs
├── Makefile
├── README.md
├── make.bat
├── requirements.txt
└── source
│ ├── conf.py
│ ├── data
│ ├── document-graphs.rst
│ ├── index.rst
│ ├── semantic-types.rst
│ ├── sentence-graphs.rst
│ └── syntactic-graphs.rst
│ ├── index.rst
│ ├── install.rst
│ ├── package
│ ├── decomp.corpus.corpus.rst
│ ├── decomp.corpus.rst
│ ├── decomp.graph.nx.rst
│ ├── decomp.graph.rdf.rst
│ ├── decomp.graph.rst
│ ├── decomp.semantics.predpatt.rst
│ ├── decomp.semantics.rst
│ ├── decomp.semantics.uds.annotation.rst
│ ├── decomp.semantics.uds.corpus.rst
│ ├── decomp.semantics.uds.document.rst
│ ├── decomp.semantics.uds.graph.rst
│ ├── decomp.semantics.uds.metadata.rst
│ ├── decomp.semantics.uds.rst
│ ├── decomp.syntax.dependency.rst
│ ├── decomp.syntax.rst
│ ├── decomp.vis.rst
│ ├── decomp.vis.uds_vis.rst
│ └── index.rst
│ └── tutorial
│ ├── assets
│ ├── vis_genericity_no_syntax.png
│ ├── vis_no_protoroles_no_syntax.png
│ ├── vis_no_protoroles_syntax.png
│ ├── vis_no_syntax.png
│ ├── vis_node_props_no_syntax.png
│ ├── vis_node_props_syntax.png
│ ├── vis_protoroles_no_syntax.png
│ ├── vis_protoroles_syntax.png
│ └── vis_syntax.png
│ ├── index.rst
│ ├── querying.rst
│ ├── quick-start.rst
│ ├── reading.rst
│ ├── serializing.rst
│ └── visualization.rst
├── requirements.txt
├── setup.py
├── tests
├── README.md
├── conftest.py
├── data
│ ├── normalized_edge_document_annotation.json
│ ├── normalized_edge_sentence_annotation.json
│ ├── normalized_node_document_annotation.json
│ ├── normalized_node_sentence_annotation.json
│ ├── raw_edge_sentence_annotation.json
│ ├── raw_edge_sentence_annotators.json
│ ├── raw_edge_sentence_annotators.txt
│ ├── raw_node_sentence_annotation.json
│ ├── raw_node_sentence_annotators.txt
│ ├── rawtree.conllu
│ └── vis_data.json
├── requirements.txt
├── test_dependency.py
├── test_predpatt.py
├── test_uds_annotation.py
├── test_uds_corpus.py
├── test_uds_document.py
├── test_uds_graph.py
├── test_uds_metadata.py
└── test_vis.py
└── uds-graph.png
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.6
2 |
3 | WORKDIR /usr/src/decomp
4 |
5 | COPY . .
6 |
7 | RUN pip install --no-cache-dir -r requirements.txt && \
8 | pip install --no-cache-dir . && \
9 | python -c "from decomp import UDSCorpus; UDSCorpus()"
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Aaron Steven White
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include decomp/ *
2 | recursive-include docs/ *
3 | recursive-include tests/ *
4 | include requirements.txt
5 | include README.md
6 | include LICENSE
7 | include Dockerfile
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Overview
2 |
3 | [Decomp](https://github.com/decompositional-semantics-initiative/decomp)
4 | is a toolkit for working with the [Universal Decompositional Semantics
5 | (UDS) dataset](http://decomp.io), which is a collection of directed
6 | acyclic semantic graphs with real-valued node and edge attributes
7 | pointing into [Universal
8 | Dependencies](https://universaldependencies.org/) syntactic dependency
9 | trees.
10 |
11 | 
12 |
13 | The toolkit is built on top of
14 | [NetworkX](https://github.com/networkx/networkx) and
15 | [RDFLib](https://github.com/RDFLib/rdflib) making it straightforward to:
16 |
17 | - read the UDS dataset from its native JSON format
18 | - query both the syntactic and semantic subgraphs of UDS (as well as
19 | pointers between them) using SPARQL 1.1 queries
20 | - serialize UDS graphs to many common formats, such as
21 | [Notation3](https://www.w3.org/TeamSubmission/n3/),
22 | [N-Triples](https://www.w3.org/TR/n-triples/),
23 | [turtle](https://www.w3.org/TeamSubmission/turtle/), and
24 | [JSON-LD](https://json-ld.org/), as well as any other format
25 | supported by NetworkX
26 |
27 | The toolkit was built by [Aaron Steven
28 | White](http://aaronstevenwhite.io/) and is maintained by the
29 | [Decompositional Semantics Initiative](http://decomp.io/). The UDS
30 | dataset was constructed from annotations collected by the
31 | [Decompositional Semantics Initiative](http://decomp.io/).
32 |
33 | # Documentation
34 |
35 | The [full documentation for the
36 | package](https://decomp.readthedocs.io/en/latest/index.html) is hosted
37 | at [Read the Docs](https://readthedocs.org/).
38 |
39 | # Citation
40 |
41 | If you make use of the dataset and/or toolkit in your research, we ask
42 | that you please cite the following paper in addition to the paper that
43 | introduces the underlying dataset(s) on which UDS is based.
44 |
45 | > White, Aaron Steven, Elias Stengel-Eskin, Siddharth Vashishtha, Venkata Subrahmanyan Govindarajan, Dee Ann Reisinger, Tim Vieira, Keisuke Sakaguchi, et al. 2020. [The Universal Decompositional Semantics Dataset and Decomp Toolkit](https://www.aclweb.org/anthology/2020.lrec-1.699/). In Proceedings of The 12th Language Resources and Evaluation Conference, 5698–5707. Marseille, France: European Language Resources Association.
46 |
47 | ```latex
48 | @inproceedings{white-etal-2020-universal,
49 | title = "The Universal Decompositional Semantics Dataset and Decomp Toolkit",
50 | author = "White, Aaron Steven and
51 | Stengel-Eskin, Elias and
52 | Vashishtha, Siddharth and
53 | Govindarajan, Venkata Subrahmanyan and
54 | Reisinger, Dee Ann and
55 | Vieira, Tim and
56 | Sakaguchi, Keisuke and
57 | Zhang, Sheng and
58 | Ferraro, Francis and
59 | Rudinger, Rachel and
60 | Rawlins, Kyle and
61 | Van Durme, Benjamin",
62 | booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference",
63 | month = may,
64 | year = "2020",
65 | address = "Marseille, France",
66 | publisher = "European Language Resources Association",
67 | url = "https://www.aclweb.org/anthology/2020.lrec-1.699",
68 | pages = "5698--5707",
69 | ISBN = "979-10-95546-34-4",
70 | }
71 | ```
72 |
73 | # License
74 |
75 | Everything besides the contents of `decomp/data` are covered by the
76 | MIT License contained at the same directory level as this README. All
77 | contents of `decomp/data` are covered by the CC-BY-SA 4.0 license
78 | contained in that directory.
79 |
80 | # Installation
81 |
82 | The most painless way to get started quickly is to use the included
83 | barebones Python 3.6-based Dockerfile. To build the image and start a
84 | python interactive prompt, use:
85 |
86 | ```bash
87 | git clone git://github.com/decompositional-semantics-initiative/decomp.git
88 | cd decomp
89 | docker build -t decomp .
90 | docker run -it decomp python
91 | ```
92 |
93 | If you prefer to install directly to your local environment, simply
94 | use `pip`.
95 |
96 | ```bash
97 | pip install --user git+git://github.com/decompositional-semantics-initiative/decomp.git
98 | ```
99 |
100 | You can also clone and use the included `setup.py`.
101 |
102 | ```bash
103 | git clone git://github.com/decompositional-semantics-initiative/decomp.git
104 | cd decomp
105 | pip install --user --no-cache-dir -r ./requirements.txt
106 | python setup.py install
107 | ```
108 |
109 | If you would like to install the package for the purposes of
110 | development, use:
111 |
112 | ```bash
113 | git clone git://github.com/decompositional-semantics-initiative/decomp.git
114 | cd decomp
115 | pip install --user --no-cache-dir -r ./requirements.txt
116 | python setup.py develop
117 | ```
118 |
119 | # Quick Start
120 |
121 | The UDS corpus can be read by directly importing it.
122 |
123 | ```python
124 | from decomp import UDSCorpus
125 |
126 | uds = UDSCorpus()
127 | ```
128 |
129 | This imports a `UDSCorpus` object `uds`, which contains all graphs
130 | across all splits in the data. If you would like a corpus, e.g.,
131 | containing only a particular split, see other loading options in [the
132 | tutorial on reading the
133 | corpus](https://decomp.readthedocs.io/en/latest/tutorial/reading.html)
134 | for details.
135 |
136 | The first time you read UDS, it will take several minutes to complete
137 | while the dataset is built from the [Universal Dependencies English Web
138 | Treebank](https://github.com/UniversalDependencies/UD_English-EWT),
139 | which is not shipped with the package (but is downloaded automatically
140 | on import in the background), and the [UDS
141 | annotations](http://decomp.io/data/), which are shipped with the
142 | package. Subsequent uses will be faster, since the dataset is cached on
143 | build.
144 |
145 | `UDSGraph` objects in the corpus can be accessed using standard
146 | dictionary getters or iteration. For instance, to get the UDS graph
147 | corresponding to the 12th sentence in `en-ud-train.conllu`, you can
148 | use:
149 |
150 | ``` python
151 | uds["ewt-train-12"]
152 | ```
153 |
154 | More generally, `UDSCorpus` objects behave like dictionaries. For
155 | example, to print all the graph identifiers in the corpus (e.g.
156 | `"ewt-train-12"`), you can use:
157 |
158 | ``` python
159 | for graphid in uds:
160 | print(graphid)
161 | ```
162 |
163 | Similarly, to print all the graph identifiers in the corpus (e.g.
164 | "ewt-in-12") along with the corresponding sentence, you can use:
165 |
166 | ``` python
167 | for graphid, graph in uds.items():
168 | print(graphid)
169 | print(graph.sentence)
170 | ```
171 |
172 | A list of graph identifiers can also be accessed via the `graphids`
173 | attribute of the UDSCorpus. A mapping from these identifiers and the
174 | corresponding graph can be accessed via the `graphs` attribute.
175 |
176 | ``` python
177 | # a list of the graph identifiers in the corpus
178 | uds.graphids
179 |
180 | # a dictionary mapping the graph identifiers to the
181 | # corresponding graph
182 | uds.graphs
183 | ```
184 |
185 | There are various instance attributes and methods for accessing nodes,
186 | edges, and their attributes in the UDS graphs. For example, to get a
187 | dictionary mapping identifiers for syntax nodes in the UDS graph to
188 | their attributes, you can use:
189 |
190 | ``` python
191 | uds["ewt-train-12"].syntax_nodes
192 | ```
193 |
194 | To get a dictionary mapping identifiers for semantics nodes in the UDS
195 | graph to their attributes, you can use:
196 |
197 | ``` python
198 | uds["ewt-train-12"].semantics_nodes
199 | ```
200 |
201 | To get a dictionary mapping identifiers for semantics edges (tuples of
202 | node identifiers) in the UDS graph to their attributes, you can use:
203 |
204 | ``` python
205 | uds["ewt-train-12"].semantics_edges()
206 | ```
207 |
208 | To get a dictionary mapping identifiers for semantics edges (tuples of
209 | node identifiers) in the UDS graph involving the predicate headed by the
210 | 7th token to their attributes, you can use:
211 |
212 | ``` python
213 | uds["ewt-train-12"].semantics_edges('ewt-train-12-semantics-pred-7')
214 | ```
215 |
216 | To get a dictionary mapping identifiers for syntax edges (tuples of node
217 | identifiers) in the UDS graph to their attributes, you can use:
218 |
219 | ``` python
220 | uds["ewt-train-12"].syntax_edges()
221 | ```
222 |
223 | And to get a dictionary mapping identifiers for syntax edges (tuples of
224 | node identifiers) in the UDS graph involving the node for the 7th token
225 | to their attributes, you can use:
226 |
227 | ``` python
228 | uds["ewt-train-12"].syntax_edges('ewt-train-12-syntax-7')
229 | ```
230 |
231 | There are also methods for accessing relationships between semantics and
232 | syntax nodes. For example, you can get a tuple of the ordinal position
233 | for the head syntax node in the UDS graph that maps of the predicate
234 | headed by the 7th token in the corresponding sentence to a list of the
235 | form and lemma attributes for that token, you can use:
236 |
237 | ``` python
238 | uds["ewt-train-12"].head('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
239 | ```
240 |
241 | And if you want the same information for every token in the span, you
242 | can use:
243 |
244 | ``` python
245 | uds["ewt-train-12"].span('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
246 | ```
247 |
248 | This will return a dictionary mapping ordinal position for syntax nodes
249 | in the UDS graph that make of the predicate headed by the 7th token in
250 | the corresponding sentence to a list of the form and lemma attributes
251 | for the corresponding tokens.
252 |
253 | More complicated queries of the UDS graph can be performed using the
254 | `query` method, which accepts arbitrary SPARQL 1.1 queries. See [the
255 | tutorial on querying the
256 | corpus](https://decomp.readthedocs.io/en/latest/tutorial/querying.html)
257 | for details.
258 |
--------------------------------------------------------------------------------
/decomp/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from pkg_resources import resource_filename
4 | from logging import basicConfig, DEBUG
5 |
6 | DATA_DIR = resource_filename('decomp', 'data/')
7 | basicConfig(filename=os.path.join(DATA_DIR, 'build.log'),
8 | filemode='w',
9 | level=DEBUG)
10 |
11 | from .semantics.uds import UDSCorpus
12 | from .semantics.uds import NormalizedUDSAnnotation
13 | from .semantics.uds import RawUDSAnnotation
14 |
--------------------------------------------------------------------------------
/decomp/corpus/__init__.py:
--------------------------------------------------------------------------------
1 | """Module for defining abstract corpus readers"""
2 |
3 | from .corpus import *
4 |
--------------------------------------------------------------------------------
/decomp/corpus/corpus.py:
--------------------------------------------------------------------------------
1 | """Module for defining abstract graph corpus readers"""
2 |
3 | from abc import ABCMeta, abstractmethod
4 |
5 | from random import sample
6 | from logging import warning
7 | from typing import Dict, List, Tuple, Iterable, Hashable, Any, TypeVar
8 |
9 | InGraph = TypeVar('InGraph') # the input graph type
10 | OutGraph = TypeVar('OutGraph') # the output graph type
11 |
12 |
13 | class Corpus(metaclass=ABCMeta):
14 | """Container for graphs
15 |
16 | Parameters
17 | ----------
18 | graphs_raw
19 | a sequence of graphs in a format that the graphbuilder for a
20 | subclass of this abstract class can process
21 | """
22 |
23 | def __init__(self, graphs_raw: Iterable[InGraph]):
24 | self._graphs_raw = graphs_raw
25 | self._build_graphs()
26 |
27 | def __iter__(self) -> Iterable[Hashable]:
28 | return iter(self._graphs)
29 |
30 | def items(self) -> Iterable[Tuple[Hashable, OutGraph]]:
31 | """Dictionary-like iterator for (graphid, graph) pairs"""
32 | return self._graphs.items()
33 |
34 | def __getitem__(self, k: Hashable) -> Any:
35 | return self._graphs[k]
36 |
37 | def __contains__(self, k: Hashable) -> bool:
38 | return k in self._graphs
39 |
40 | def __len__(self) -> int:
41 | return len(self._graphs)
42 |
43 | def _build_graphs(self) -> None:
44 | self._graphs = {}
45 |
46 | for graphid, rawgraph in self._graphs_raw.items():
47 | try:
48 | self._graphs[graphid] = self._graphbuilder(graphid, rawgraph)
49 | except ValueError:
50 | warning(graphid+' has no or multiple root nodes')
51 | except RecursionError:
52 | warning(graphid+' has loops')
53 |
54 | @abstractmethod
55 | def _graphbuilder(self,
56 | graphid: Hashable,
57 | rawgraph: InGraph) -> OutGraph:
58 | raise NotImplementedError
59 |
60 | @property
61 | def graphs(self) -> Dict[Hashable, OutGraph]:
62 | """the graphs in corpus"""
63 | return self._graphs
64 |
65 | @property
66 | def graphids(self) -> List[Hashable]:
67 | """The graph ids in corpus"""
68 |
69 | return list(self._graphs)
70 |
71 | @property
72 | def ngraphs(self) -> int:
73 | """Number of graphs in corpus"""
74 |
75 | return len(self._graphs)
76 |
77 | def sample(self, k: int) -> Dict[Hashable, OutGraph]:
78 | """Sample k graphs without replacement
79 |
80 | Parameters
81 | ----------
82 | k
83 | the number of graphs to sample
84 | """
85 |
86 | return {tid: self._graphs[tid]
87 | for tid
88 | in sample(self._graphs.keys(), k=k)}
89 |
--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/document/annotations/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/document/annotations/.gitkeep
--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/sentence/annotations/factuality.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/factuality.zip
--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/sentence/annotations/genericity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/genericity.zip
--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/sentence/annotations/protoroles.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/protoroles.zip
--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/sentence/annotations/time.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/time.zip
--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/sentence/annotations/wordsense.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/wordsense.zip
--------------------------------------------------------------------------------
/decomp/data/1.0/raw/sentence/annotations/factuality.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/factuality.zip
--------------------------------------------------------------------------------
/decomp/data/1.0/raw/sentence/annotations/genericity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/genericity.zip
--------------------------------------------------------------------------------
/decomp/data/1.0/raw/sentence/annotations/protoroles.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/protoroles.zip
--------------------------------------------------------------------------------
/decomp/data/1.0/raw/sentence/annotations/time.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/time.zip
--------------------------------------------------------------------------------
/decomp/data/1.0/raw/sentence/annotations/wordsense.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/wordsense.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/document/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/document/.gitkeep
--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/document/annotations/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/document/annotations/.gitkeep
--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/document/annotations/event_structure_mereology.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/document/annotations/event_structure_mereology.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/.gitkeep
--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/event_structure_distributivity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/event_structure_distributivity.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/event_structure_natural_parts.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/event_structure_natural_parts.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/factuality.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/factuality.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/genericity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/genericity.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/protoroles.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/protoroles.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/time.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/time.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/wordsense.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/wordsense.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/raw/document/annotations/event_structure_mereology.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/document/annotations/event_structure_mereology.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/raw/document/annotations/time.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/document/annotations/time.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/event_structure_distributivity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/event_structure_distributivity.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/event_structure_natural_parts.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/event_structure_natural_parts.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/factuality.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/factuality.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/genericity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/genericity.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/protoroles.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/protoroles.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/time.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/time.zip
--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/wordsense.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/wordsense.zip
--------------------------------------------------------------------------------
/decomp/graph/__init__.py:
--------------------------------------------------------------------------------
1 | """Module for converting between NetworkX and RDFLib graphs"""
2 |
3 | from .rdf import RDFConverter
4 | from .nx import NXConverter
5 |
--------------------------------------------------------------------------------
/decomp/graph/nx.py:
--------------------------------------------------------------------------------
1 | """Module for converting from networkx to RDF"""
2 |
3 | from networkx import DiGraph, to_dict_of_dicts
4 | from rdflib import Graph, URIRef, Literal
5 |
6 |
7 | class NXConverter:
8 | """A converter between RDFLib graphs and NetworkX digraphs
9 |
10 | Parameters
11 | ----------
12 | graph
13 | the graph to convert
14 | """
15 |
16 | def __init__(self, rdfgraph: Graph):
17 | self.nxgraph = DiGraph()
18 | self.rdfgraph = rdfgraph
19 |
20 | @classmethod
21 | def rdf_to_networkx(cls, rdfgraph: Graph) -> DiGraph:
22 | """Convert an RDFLib graph to a NetworkX digraph
23 |
24 | Parameters
25 | ----------
26 | rdfgraph
27 | the RDFLib graph to convert
28 | """
29 |
30 | converter = cls(rdfgraph)
31 |
32 | raise NotImplementedError
33 |
34 | # nxdict = to_dict_of_dicts(nxgraph)
35 |
36 | # for nodeid1, edgedict in nxdict.items():
37 | # converter._add_node_attributes(nodeid1)
38 |
39 | # for nodeid2, properties in edgedict.items():
40 | # converter._add_node_attributes(nodeid2)
41 | # converter._add_edge_attributes(nodeid1, nodeid2, properties)
42 |
43 | # cls._reset_attributes()
44 |
45 | # return converter.rdfgraph
46 |
47 | # def _add_node_attributes(self, nodeid):
48 | # for propid, val in self.nxgraph.nodes[nodeid].items():
49 | # triple = self.__class__._construct_property(nodeid, propid, val)
50 | # self.rdfgraph.add(triple)
51 |
52 | # def _add_edge_attributes(self, nodeid1, nodeid2, properties):
53 | # triple = self.__class__._construct_edge(nodeid1, nodeid2)
54 | # self.rdfgraph.add(triple)
55 |
56 | # edgeid = triple[1]
57 |
58 | # for propid, val in properties.items():
59 | # triple = self.__class__._construct_property(edgeid, propid, val)
60 | # self.rdfgraph.add(triple)
61 |
62 | # @classmethod
63 | # def _construct_node(cls, nodeid):
64 | # if nodeid not in cls.NODES:
65 | # cls.NODES[nodeid] = URIRef(nodeid)
66 |
67 | # return cls.NODES[nodeid]
68 |
69 | # @classmethod
70 | # def _construct_edge(cls, nodeid1, nodeid2):
71 | # node1 = cls._construct_node(nodeid1)
72 | # node2 = cls._construct_node(nodeid2)
73 |
74 | # edgeid = nodeid1 + '%%' + nodeid2
75 |
76 | # if edgeid not in cls.EDGES:
77 | # cls.EDGES[edgeid] = URIRef(edgeid)
78 |
79 | # return (node1, cls.EDGES[edgeid], node2)
80 |
81 | # @classmethod
82 | # def _construct_property(cls, nodeid, propid, val):
83 | # if nodeid not in cls.NODES:
84 | # cls.NODES[nodeid] = URIRef(nodeid)
85 |
86 | # if propid not in cls.NODES:
87 | # cls.PROPERTIES[propid] = URIRef(propid)
88 |
89 | # if propid in ['type', 'subtype']:
90 | # if val not in cls.VALUES:
91 | # cls.VALUES[val] = URIRef(val)
92 |
93 | # return (cls.NODES[nodeid],
94 | # cls.PROPERTIES[propid],
95 | # cls.VALUES[val])
96 |
97 | # else:
98 | # return (cls.NODES[nodeid],
99 | # cls.PROPERTIES[propid],
100 | # Literal(val))
101 |
102 | # @classmethod
103 | # def _reset_attributes(cls):
104 | # cls.NODES = {}
105 | # cls.EDGES = {}
106 |
--------------------------------------------------------------------------------
/decomp/graph/rdf.py:
--------------------------------------------------------------------------------
1 | """Module for converting from networkx to RDF"""
2 |
3 | from networkx import DiGraph, to_dict_of_dicts
4 | from rdflib import Graph, URIRef, Literal
5 |
6 |
7 | class RDFConverter:
8 | """A converter between NetworkX digraphs and RDFLib graphs
9 |
10 | Parameters
11 | ----------
12 | nxgraph
13 | the graph to convert
14 | """
15 |
16 | SUBSPACES = {}
17 | PROPERTIES = {'domain': URIRef('domain'),
18 | 'type': URIRef('type'),
19 | 'subspace': URIRef('subspace'),
20 | 'confidence': URIRef('confidence')}
21 | VALUES = {}
22 |
23 | def __init__(self, nxgraph: DiGraph):
24 | self.nxgraph = nxgraph
25 | self.rdfgraph = Graph()
26 | self.nodes = {}
27 |
28 | @classmethod
29 | def networkx_to_rdf(cls, nxgraph: DiGraph) -> Graph:
30 | """Convert a NetworkX digraph to an RDFLib graph
31 |
32 | Parameters
33 | ----------
34 | nxgraph
35 | the NetworkX graph to convert
36 | """
37 |
38 | converter = cls(nxgraph)
39 |
40 | nxdict = to_dict_of_dicts(nxgraph)
41 |
42 | for nodeid1, edgedict in nxdict.items():
43 | converter._add_node_attributes(nodeid1)
44 | for nodeid2 in edgedict:
45 | converter._add_node_attributes(nodeid2)
46 | converter._add_edge_attributes(nodeid1, nodeid2)
47 |
48 | return converter.rdfgraph
49 |
50 | def _add_node_attributes(self, nodeid):
51 | self._construct_node(nodeid)
52 |
53 | self._add_attributes(nodeid,
54 | self.nxgraph.nodes[nodeid].items())
55 |
56 |
57 | def _add_edge_attributes(self, nodeid1, nodeid2):
58 | edgeid = self._construct_edge(nodeid1, nodeid2)
59 | edgetup = (nodeid1, nodeid2)
60 |
61 | self._add_attributes(edgeid,
62 | self.nxgraph.edges[edgetup].items())
63 |
64 |
65 | def _add_attributes(self, nid, attributes):
66 | triples = []
67 |
68 | for attrid1, attrs1 in attributes:
69 | if not isinstance(attrs1, dict):
70 | if isinstance(attrs1, list) or isinstance(attrs1, tuple):
71 | errmsg = 'Cannot convert list- or tuple-valued' +\
72 | ' attributes to RDF'
73 | raise ValueError(errmsg)
74 |
75 | triples += self._construct_property(nid,
76 | attrid1,
77 | attrs1)
78 |
79 | else:
80 | for attrid2, attrs2 in attrs1.items():
81 | triples += self._construct_property(nid,
82 | attrid2,
83 | attrs2,
84 | attrid1)
85 |
86 | for t in triples:
87 | self.rdfgraph.add(t)
88 |
89 | def _construct_node(self, nodeid):
90 | if nodeid not in self.nodes:
91 | self.nodes[nodeid] = URIRef(nodeid)
92 |
93 | def _construct_edge(self, nodeid1, nodeid2):
94 | edgeid = nodeid1 + '%%' + nodeid2
95 |
96 | if edgeid not in self.nodes:
97 | node1 = self.nodes[nodeid1]
98 | node2 = self.nodes[nodeid2]
99 |
100 | self.nodes[edgeid] = URIRef(edgeid)
101 | triple = (node1, self.nodes[edgeid], node2)
102 |
103 | self.rdfgraph.add(triple)
104 |
105 | return edgeid
106 |
107 | else:
108 | return edgeid
109 |
110 | def _construct_property(self, nodeid, propid, val,
111 | subspaceid=None):
112 |
113 | c = self.__class__
114 |
115 | if isinstance(val, dict) and subspaceid is not None:
116 | # We currently do not support querying on raw UDS
117 | # annotations, all of which have dict-valued 'value'
118 | # and 'confidence' fields.
119 | if isinstance(val['value'], dict) or isinstance(val['confidence'], dict):
120 | raise TypeError('Attempted query of graph with raw properties. Querying '\
121 | 'graphs with raw properties is prohibited.')
122 | triples = c._construct_subspace(subspaceid, propid)
123 | triples += [(self.nodes[nodeid],
124 | c.PROPERTIES[propid],
125 | Literal(val['value'])),
126 | (self.nodes[nodeid],
127 | c.PROPERTIES[propid+'-confidence'],
128 | Literal(val['confidence']))]
129 |
130 | elif propid in ['domain', 'type']:
131 | if val not in c.VALUES:
132 | c.VALUES[val] = URIRef(val)
133 |
134 | triples = [(self.nodes[nodeid],
135 | c.PROPERTIES[propid],
136 | c.VALUES[val])]
137 |
138 | else:
139 | if propid not in c.PROPERTIES:
140 | c.PROPERTIES[propid] = URIRef(propid)
141 |
142 | triples = [(self.nodes[nodeid],
143 | c.PROPERTIES[propid],
144 | Literal(val))]
145 |
146 | return triples
147 |
148 | @classmethod
149 | def _construct_subspace(cls, subspaceid, propid):
150 | if subspaceid not in cls.SUBSPACES:
151 | cls.SUBSPACES[subspaceid] = URIRef(subspaceid)
152 |
153 | if propid not in cls.PROPERTIES:
154 | cls.PROPERTIES[propid] = URIRef(propid)
155 | cls.PROPERTIES[propid+'-confidence'] = URIRef(propid+'-confidence')
156 |
157 | return [(cls.PROPERTIES[propid],
158 | cls.PROPERTIES['subspace'],
159 | cls.SUBSPACES[subspaceid]),
160 | (cls.PROPERTIES[propid+'-confidence'],
161 | cls.PROPERTIES['subspace'],
162 | cls.SUBSPACES[subspaceid]),
163 | (cls.PROPERTIES[propid],
164 | cls.PROPERTIES['confidence'],
165 | cls.PROPERTIES[propid+'-confidence'])]
166 |
--------------------------------------------------------------------------------
/decomp/semantics/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for representing PredPatt and UDS graphs
3 |
4 | This module represents PredPatt and UDS graphs using networkx. It
5 | incorporates the dependency parse-based graphs from the syntax module
6 | as subgraphs.
7 | """
8 |
--------------------------------------------------------------------------------
/decomp/semantics/predpatt.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=W0221
2 | # pylint: disable=R0903
3 | # pylint: disable=R1704
4 | """Module for converting PredPatt objects to networkx digraphs"""
5 |
6 | from os.path import basename, splitext
7 | from typing import Tuple, Hashable, TextIO, Optional, Union
8 | from networkx import DiGraph
9 | from predpatt import load_conllu, PredPatt, PredPattOpts
10 | from ..corpus import Corpus
11 | from ..syntax.dependency import CoNLLDependencyTreeCorpus
12 |
13 | DEFAULT_PREDPATT_OPTIONS = PredPattOpts(resolve_relcl=True,
14 | borrow_arg_for_relcl=True,
15 | resolve_conj=False,
16 | cut=True) # Resolve relative clause
17 |
18 |
19 | class PredPattCorpus(Corpus):
20 | """Container for predpatt graphs"""
21 |
22 | def _graphbuilder(self,
23 | graphid: Hashable,
24 | predpatt_depgraph: Tuple[PredPatt, DiGraph]) -> DiGraph:
25 | """
26 | Parameters
27 | ----------
28 | treeid
29 | an identifier for the tree
30 | predpatt_depgraph
31 | a pairing of the predpatt for a dependency parse and the graph
32 | representing that dependency parse
33 | """
34 |
35 | predpatt, depgraph = predpatt_depgraph
36 |
37 | return PredPattGraphBuilder.from_predpatt(predpatt, depgraph, graphid)
38 |
39 | @classmethod
40 | def from_conll(cls,
41 | corpus: Union[str, TextIO],
42 | name: str = 'ewt',
43 | options: Optional[PredPattOpts] = None) -> 'PredPattCorpus':
44 | """Load a CoNLL dependency corpus and apply predpatt
45 |
46 | Parameters
47 | ----------
48 | corpus
49 | (path to) a .conllu file
50 | name
51 | the name of the corpus; used in constructing treeids
52 | options
53 | options for predpatt extraction
54 | """
55 |
56 | options = DEFAULT_PREDPATT_OPTIONS if options is None else options
57 |
58 | corp_is_str = isinstance(corpus, str)
59 |
60 | if corp_is_str and splitext(basename(corpus))[1] == '.conllu':
61 | with open(corpus) as infile:
62 | data = infile.read()
63 |
64 | elif corp_is_str:
65 | data = corpus
66 |
67 | else:
68 | data = corpus.read()
69 |
70 | # load the CoNLL dependency parses as graphs
71 | ud_corp = {name+'-'+str(i+1): [line.split()
72 | for line in block.split('\n')
73 | if len(line) > 0
74 | if line[0] != '#']
75 | for i, block in enumerate(data.split('\n\n'))}
76 | ud_corp = CoNLLDependencyTreeCorpus(ud_corp)
77 |
78 | # extract the predpatt for those dependency parses
79 | try:
80 | predpatt = {name+'-'+sid.split('_')[1]: PredPatt(ud_parse,
81 | opts=options)
82 | for sid, ud_parse in load_conllu(data)}
83 |
84 | except ValueError:
85 | errmsg = 'PredPatt was unable to parse the CoNLL you provided.' +\
86 | ' This is likely due to using a version of UD that is' +\
87 | ' incompatible with PredPatt. Use of version 1.2 is' +\
88 | ' suggested.'
89 |
90 | raise ValueError(errmsg)
91 |
92 | return cls({n: (pp, ud_corp[n])
93 | for n, pp in predpatt.items()})
94 |
95 |
96 | class PredPattGraphBuilder:
97 | """A predpatt graph builder"""
98 |
99 | @classmethod
100 | def from_predpatt(cls,
101 | predpatt: PredPatt,
102 | depgraph: DiGraph,
103 | graphid: str = '') -> DiGraph:
104 | """Build a DiGraph from a PredPatt object and another DiGraph
105 |
106 | Parameters
107 | ----------
108 | predpatt
109 | the predpatt extraction for the dependency parse
110 | depgraph
111 | the dependency graph
112 | graphid
113 | the tree indentifier; will be a prefix of all node
114 | identifiers
115 | """
116 | # handle null graphids
117 | graphid = graphid+'-' if graphid else ''
118 |
119 | # initialize the predpatt graph
120 | # predpattgraph = DiGraph(predpatt=predpatt)
121 | predpattgraph = DiGraph()
122 | predpattgraph.name = graphid.strip('-')
123 |
124 | # include all of the syntax edges in the original dependendency graph
125 | predpattgraph.add_nodes_from([(n, attr)
126 | for n, attr in depgraph.nodes.items()])
127 | predpattgraph.add_edges_from([(n1, n2, attr)
128 | for (n1, n2), attr
129 | in depgraph.edges.items()])
130 |
131 | # add links between predicate nodes and syntax nodes
132 | predpattgraph.add_edges_from([edge
133 | for event in predpatt.events
134 | for edge
135 | in cls._instantiation_edges(graphid,
136 | event,
137 | 'pred')])
138 |
139 | # add links between argument nodes and syntax nodes
140 | edges = [edge
141 | for event in predpatt.events
142 | for arg in event.arguments
143 | for edge
144 | in cls._instantiation_edges(graphid, arg, 'arg')]
145 |
146 | predpattgraph.add_edges_from(edges)
147 |
148 | # add links between predicate nodes and argument nodes
149 | edges = [edge
150 | for event in predpatt.events
151 | for arg in event.arguments
152 | for edge in cls._predarg_edges(graphid, event, arg,
153 | arg.position
154 | in [e.position
155 | for e
156 | in predpatt.events])]
157 |
158 | predpattgraph.add_edges_from(edges)
159 |
160 | # mark that all the semantic nodes just added were from predpatt
161 | # this is done to distinguish them from nodes added through annotations
162 | for node in predpattgraph.nodes:
163 | if 'semantics' in node:
164 | predpattgraph.nodes[node]['domain'] = 'semantics'
165 | predpattgraph.nodes[node]['frompredpatt'] = True
166 |
167 | if 'arg' in node:
168 | predpattgraph.nodes[node]['type'] = 'argument'
169 | elif 'pred' in node:
170 | predpattgraph.nodes[node]['type'] = 'predicate'
171 |
172 | return predpattgraph
173 |
174 | @staticmethod
175 | def _instantiation_edges(graphid, node, typ):
176 | parent_id = graphid+'semantics-'+typ+'-'+str(node.position+1)
177 | child_head_token_id = graphid+'syntax-'+str(node.position+1)
178 | child_span_token_ids = [graphid+'syntax-'+str(tok.position+1)
179 | for tok in node.tokens
180 | if child_head_token_id !=
181 | graphid+'syntax-'+str(tok.position+1)]
182 |
183 | return [(parent_id, child_head_token_id,
184 | {'domain': 'interface',
185 | 'type': 'head'})] +\
186 | [(parent_id, tokid, {'domain': 'interface',
187 | 'type': 'nonhead'})
188 | for tokid in child_span_token_ids]
189 |
190 | @staticmethod
191 | def _predarg_edges(graphid, parent_node, child_node, pred_child):
192 | parent_id = graphid+'semantics-pred-'+str(parent_node.position+1)
193 | child_id = graphid+'semantics-arg-'+str(child_node.position+1)
194 |
195 | if pred_child:
196 | child_id_pred = graphid +\
197 | 'semantics-pred-' +\
198 | str(child_node.position+1)
199 | return [(parent_id,
200 | child_id,
201 | {'domain': 'semantics',
202 | 'type': 'dependency',
203 | 'frompredpatt': True})] +\
204 | [(child_id,
205 | child_id_pred,
206 | {'domain': 'semantics',
207 | 'type': 'head',
208 | 'frompredpatt': True})]
209 |
210 | return [(parent_id,
211 | child_id,
212 | {'domain': 'semantics',
213 | 'type': 'dependency',
214 | 'frompredpatt': True})]
215 |
--------------------------------------------------------------------------------
/decomp/semantics/uds/__init__.py:
--------------------------------------------------------------------------------
1 | """Module for representing UDS corpora, documents, graphs, and annotations."""
2 |
3 | from .corpus import UDSCorpus
4 | from .document import UDSDocument
5 | from .graph import UDSDocumentGraph
6 | from .graph import UDSSentenceGraph
7 | from .annotation import RawUDSAnnotation
8 | from .annotation import NormalizedUDSAnnotation
9 |
--------------------------------------------------------------------------------
/decomp/semantics/uds/document.py:
--------------------------------------------------------------------------------
1 | """Module for representing UDS documents."""
2 |
3 | import re
4 |
5 | from typing import Optional, Any
6 | from typing import Dict
7 |
8 | from memoized_property import memoized_property
9 | from networkx import DiGraph
10 | from .graph import UDSSentenceGraph, UDSDocumentGraph
11 |
12 |
13 | class UDSDocument:
14 | """A Universal Decompositional Semantics document
15 |
16 | Parameters
17 | ----------
18 | sentence_graphs
19 | the UDSSentenceGraphs associated with each sentence in the document
20 | sentence_ids
21 | the UD sentence IDs for each graph
22 | name
23 | the name of the document (i.e. the UD document ID)
24 | genre
25 | the genre of the document (e.g. `weblog`)
26 | timestamp
27 | the timestamp of the UD document on which this UDSDocument is based
28 | doc_graph
29 | the NetworkX DiGraph for the document. If not provided, this will be
30 | initialized without edges from sentence_graphs
31 | """
32 | def __init__(self, sentence_graphs: Dict[str, UDSSentenceGraph],
33 | sentence_ids: Dict[str, str], name: str, genre: str,
34 | timestamp: Optional[str] = None, doc_graph: Optional[UDSDocumentGraph] = None):
35 | self.sentence_graphs = {}
36 | self.sentence_ids = {}
37 | self.name = name
38 | self.genre = genre
39 | self.timestamp = timestamp
40 |
41 | # Initialize the document-level graph
42 | if doc_graph:
43 | self.document_graph = doc_graph
44 | else:
45 | self.document_graph = UDSDocumentGraph(DiGraph(), name)
46 |
47 | # Initialize the sentence-level graphs
48 | self.add_sentence_graphs(sentence_graphs, sentence_ids)
49 |
50 | def to_dict(self) -> Dict:
51 | """Convert the graph to a dictionary"""
52 | return self.document_graph.to_dict()
53 |
54 | @classmethod
55 | def from_dict(cls, document: Dict[str, Dict], sentence_graphs: Dict[str, UDSSentenceGraph],
56 | sentence_ids: Dict[str, str], name: str = 'UDS') -> 'UDSDocument':
57 | """Construct a UDSDocument from a dictionary
58 |
59 | Since only the document graphs are serialized, the sentence
60 | graphs must also be provided to this method call in order
61 | to properly associate them with their documents.
62 |
63 | Parameters
64 | ----------
65 | document
66 | a dictionary constructed by networkx.adjacency_data,
67 | containing the graph for the document
68 | sentence_graphs
69 | a dictionary containing (possibly a superset of) the
70 | sentence-level graphs for the sentences in the document
71 | sentence_ids
72 | a dictionary containing (possibly a superset of) the
73 | UD sentence IDs for each graph
74 | name
75 | identifier to append to the beginning of node ids
76 | """
77 | document_graph = UDSDocumentGraph.from_dict(document, name)
78 | sent_graph_names = set(map(lambda node: node['semantics']['graph'], document['nodes']))
79 | sent_graphs = {}
80 | sent_ids = {}
81 | for gname in sent_graph_names:
82 | sentence_graphs[gname].document_id = name
83 | sentence_graphs[gname].sentence_id = sentence_ids[gname]
84 | sent_graphs[gname] = sentence_graphs[gname]
85 | sent_ids[gname] = sentence_ids[gname]
86 | genre = name.split('-')[0]
87 | timestamp = cls._get_timestamp_from_document_name(name)
88 | return cls(sent_graphs, sent_ids, name, genre, timestamp, document_graph)
89 |
90 | @staticmethod
91 | def _get_timestamp_from_document_name(document_name):
92 | timestamp = re.search('\d{8}_?\d{6}', document_name)
93 | return timestamp[0] if timestamp else None
94 |
95 | def add_sentence_graphs(self, sentence_graphs: Dict[str, UDSSentenceGraph],
96 | sentence_ids: Dict[str, str]) -> None:
97 | """Add additional sentences to a document
98 |
99 | Parameters
100 | ----------
101 | sentence_graphs
102 | a dictionary containing the sentence-level graphs
103 | for the sentences in the document
104 | sentence_ids
105 | a dictionary containing the UD sentence IDs for each graph
106 | name
107 | identifier to append to the beginning of node ids
108 | """
109 | for gname, graph in sentence_graphs.items():
110 | sentence_graphs[gname].sentence_id = sentence_ids[gname]
111 | sentence_graphs[gname].document_id = self.name
112 | self.sentence_graphs[gname] = graph
113 | self.sentence_ids[gname] = sentence_ids[gname]
114 | for node_name, node in graph.semantics_nodes.items():
115 | semantics = {'graph': gname, 'node': node_name}
116 | document_node_name = node_name.replace('semantics', 'document')
117 | self.document_graph.graph.add_node(document_node_name,
118 | domain='document', type=node['type'],
119 | frompredpatt=False, semantics=semantics)
120 |
121 | def add_annotation(self, node_attrs: Dict[str, Dict[str, Any]],
122 | edge_attrs: Dict[str, Dict[str, Any]]) -> None:
123 | """Add node or edge annotations to the document-level graph
124 |
125 | Parameters
126 | ----------
127 | node_attrs
128 | the node annotations to be added
129 | edge_attrs
130 | the edge annotations to be added
131 | """
132 | self.document_graph.add_annotation(node_attrs, edge_attrs, self.sentence_ids)
133 |
134 | def semantics_node(self, document_node: str) -> Dict[str, Dict]:
135 | """The semantics node for a given document node
136 |
137 | Parameters
138 | ----------
139 | document_node
140 | the document domain node whose semantics node is to be
141 | retrieved
142 | """
143 | semantics = self.document_graph.nodes[document_node]['semantics']
144 | semantics_node = self.sentence_graphs[semantics['graph']].semantics_nodes[semantics['node']]
145 | return {semantics['node']: semantics_node}
146 |
147 | @memoized_property
148 | def text(self) -> str:
149 | """The document text"""
150 | return ' '.join([sent_graph.sentence for gname, sent_graph in sorted(self.sentence_graphs.items())])
151 |
--------------------------------------------------------------------------------
/decomp/syntax/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for representing CoNLL dependency tree corpora
3 |
4 | This module provides readers for corpora represented using
5 | conll-formatted dependency parses. All dependency parses are read in
6 | as networkx graphs. These graphs become subgraphs of the PredPatt and
7 | UDS graphs in the semantics module.
8 | """
9 |
--------------------------------------------------------------------------------
/decomp/syntax/dependency.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=R1717
2 | # pylint: disable=R0903
3 | """Module for building/containing dependency trees from CoNLL"""
4 |
5 | from typing import List
6 | from numpy import array
7 | from networkx import DiGraph
8 | from ..corpus import Corpus
9 |
10 | CONLL_HEAD = {'u': ['id', 'form', 'lemma', 'upos', 'xpos',
11 | 'feats', 'head', 'deprel', 'deps', 'misc'],
12 | 'x': ['id', 'form', 'lemma', 'cpostag', 'postag',
13 | 'feats', 'head', 'deprel', 'phead', 'pdeprel']}
14 |
15 | CONLL_NODE_ATTRS = {'u': {k: CONLL_HEAD['u'].index(k)
16 | for k in ['form', 'lemma', 'upos', 'xpos', 'feats']},
17 | 'x': {k: CONLL_HEAD['x'].index(k)
18 | for k in ['form', 'lemma', 'cpostag',
19 | 'postag', 'feats']}}
20 |
21 | CONLL_EDGE_ATTRS = {'u': {k: CONLL_HEAD['u'].index(k)
22 | for k in ['deprel']},
23 | 'x': {k: CONLL_HEAD['x'].index(k)
24 | for k in ['deprel']}}
25 |
26 |
27 | class CoNLLDependencyTreeCorpus(Corpus):
28 | """Class for building/containing dependency trees from CoNLL-U
29 |
30 | Attributes
31 | ----------
32 | graphs
33 | trees constructed from annotated sentences
34 | graphids
35 | ids for trees constructed from annotated sentences
36 | ngraphs
37 | number of graphs in corpus
38 | """
39 |
40 | def _graphbuilder(self, graphid: str, rawgraph: str):
41 | return DependencyGraphBuilder.from_conll(rawgraph, graphid)
42 |
43 |
44 | class DependencyGraphBuilder:
45 | """A dependency graph builder"""
46 |
47 | @classmethod
48 | def from_conll(cls,
49 | conll: List[List[str]],
50 | treeid: str='',
51 | spec: str='u') -> DiGraph:
52 | """Build DiGraph from a CoNLL representation
53 |
54 | Parameters
55 | ----------
56 | conll
57 | conll representation
58 | treeid
59 | a unique identifier for the tree
60 | spec
61 | the specification to assume of the conll representation
62 | ("u" or "x")
63 | """
64 |
65 | # handle null treeids
66 | treeid = treeid+'-' if treeid else ''
67 |
68 | # initialize the dependency graph
69 | depgraph = DiGraph(conll=array(conll))
70 | depgraph.name = treeid.strip('-')
71 |
72 | # populate graph with nodes
73 | depgraph.add_nodes_from([cls._conll_node_attrs(treeid, row, spec)
74 | for row in conll])
75 |
76 | # add the root
77 | depgraph.add_node(treeid+'root-0',
78 | position=0,
79 | domain='root',
80 | type='root')
81 |
82 | # connect nodes
83 | depgraph.add_edges_from([cls._conll_edge_attrs(treeid, row, spec)
84 | for row in conll])
85 |
86 | return depgraph
87 |
88 | @staticmethod
89 | def _conll_node_attrs(treeid, row, spec):
90 | node_id = row[0]
91 |
92 | node_attrs = {'domain': 'syntax',
93 | 'type': 'token',
94 | 'position': int(node_id)}
95 | other_attrs = {}
96 |
97 | for attr, idx in CONLL_NODE_ATTRS[spec].items():
98 | # convert features into a dictionary
99 | if attr == 'feats':
100 | if row[idx] != '_':
101 | feat_split = row[idx].split('|')
102 | other_attrs = dict([kv.split('=')
103 | for kv in feat_split])
104 |
105 | else:
106 | node_attrs[attr] = row[idx]
107 |
108 | node_attrs = dict(node_attrs, **other_attrs)
109 |
110 | return (treeid+'syntax-'+node_id, node_attrs)
111 |
112 | @staticmethod
113 | def _conll_edge_attrs(treeid, row, spec):
114 | child_id = treeid+'syntax-'+row[0]
115 |
116 | parent_position = row[CONLL_HEAD[spec].index('head')]
117 |
118 | if parent_position == '0':
119 | parent_id = treeid+'root-0'
120 | else:
121 | parent_id = treeid+'syntax-'+parent_position
122 |
123 | edge_attrs = {attr: row[idx]
124 | for attr, idx in CONLL_EDGE_ATTRS[spec].items()}
125 |
126 | edge_attrs['domain'] = 'syntax'
127 | edge_attrs['type'] = 'dependency'
128 |
129 | return (parent_id, child_id, edge_attrs)
130 |
--------------------------------------------------------------------------------
/decomp/vis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/vis/__init__.py
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # Decomp documentation
2 |
3 | To build the documentation, you will need Sphinx and three Sphinx extensions:
4 |
5 | ```bash
6 | pip install --user sphinx==3.1.2 sphinxcontrib-napoleon sphinx-autodoc-typehints sphinx_rtd_theme
7 | ```
8 |
9 | Then, while in this directory, use:
10 |
11 | ```bash
12 | make clean
13 | make html
14 | ```
15 |
16 | To view the built documentation, start a python http server with:
17 |
18 |
19 | ```bash
20 | python3 -m http.server
21 | ```
22 |
23 | Then, navigate to [http://localhost:8000/build/html/](http://localhost:8000/build/html/) in your browser.
24 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>=3.0.0
2 | sphinxcontrib-napoleon
3 | sphinx-autodoc-typehints
4 | sphinx_rtd_theme
5 | http://github.com/decompositional-semantics-initiative/decomp/tarball/master#egg=decomp
6 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # http://www.sphinx-doc.org/en/master/config
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('../../decomp/'))
16 |
17 |
18 | # -- Project information -----------------------------------------------------
19 |
20 | project = 'Decomp'
21 | copyright = '2020, Aaron Steven White'
22 | author = 'Aaron Steven White'
23 |
24 | # The full version, including alpha/beta/rc tags
25 | release = '0.2.2'
26 |
27 | # Changes root document from contents.rst to index.rst
28 | master_doc = 'index'
29 |
30 | # -- General configuration ---------------------------------------------------
31 |
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 | 'sphinx.ext.autodoc',
37 | 'sphinxcontrib.napoleon', # MUST be loaded before typehints
38 | 'sphinx_autodoc_typehints'
39 | ]
40 |
41 | # Napoleon settings
42 | napoleon_google_docstring = True
43 | napoleon_numpy_docstring = True
44 | napoleon_include_init_with_doc = False
45 | napoleon_include_private_with_doc = False
46 | napoleon_include_special_with_doc = False
47 | napoleon_use_admonition_for_examples = False
48 | napoleon_use_admonition_for_notes = False
49 | napoleon_use_admonition_for_references = False
50 | napoleon_use_ivar = False
51 | napoleon_use_param = True
52 | napoleon_use_rtype = True
53 | napoleon_use_keyword = True
54 | napoleon_custom_sections = None
55 |
56 | # Add any paths that contain templates here, relative to this directory.
57 | templates_path = ['_templates']
58 |
59 | # List of patterns, relative to source directory, that match files and
60 | # directories to ignore when looking for source files.
61 | # This pattern also affects html_static_path and html_extra_path.
62 | exclude_patterns = []
63 |
64 |
65 | # -- Options for HTML output -------------------------------------------------
66 |
67 | # The theme to use for HTML and HTML Help pages. See the documentation for
68 | # a list of builtin themes.
69 | #
70 | html_theme = 'sphinx_rtd_theme'
71 |
72 | # Add any paths that contain custom static files (such as style sheets) here,
73 | # relative to this directory. They are copied after the builtin static files,
74 | # so a file named "default.css" will overwrite the builtin "default.css".
75 | html_static_path = ['_static']
76 |
--------------------------------------------------------------------------------
/docs/source/data/document-graphs.rst:
--------------------------------------------------------------------------------
1 | Universal Decompositional Document Graphs
2 | =========================================
3 |
4 | The semantic graphs that form the third layer of annotation represent
5 | document-level relations. These graphs contain a node for each node in
6 | the document's constituent sentence-level graphs along with a pointer
7 | from the document-level node to the sentence-level node. Unlike the
8 | sentence-level graphs, they are not produced by PredPatt, so whether
9 | any two nodes in a document-level graph are joined by an edge is
10 | determined by whether the relation between the two nodes is annotated
11 | in some UDS dataset.
12 |
13 | At minimum, each of these nodes has the following attributes:
14 |
15 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph
16 |
17 | - ``domain`` (``str``): the subgraph this node is part of (always ``document``)
18 | - ``type`` (``str``): the type of object corresponding to this node in the ``semantics`` domain (either ``predicate`` or ``argument``)
19 | - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``False``, although the corresponding ``semantics`` node will have this set as ``True``)
20 | - ``semantics`` (``dict``): a two-item dictionary containing information about the corresponding ``semantics`` node. The first item, ``graph``, indicates the sentence-level graph that the semantics node comes from. The second item, ``node``, contains the name of the node.
21 |
22 | Document graphs are initialized without edges, which are created dynamically
23 | when edge attribute annotations are added. These edges may span nodes
24 | associated with different sentences within a document and may connect not
25 | only predicates to arguments, but predicates to predicates and arguments to
26 | arguments. Any annotations that are provided that cross document boundaries
27 | will be automatically filtered out. Finally, beyond the attributes provided
28 | by annotations, each edge will also contain all but the last of the core
29 | set of node attributes listed above.
30 |
31 | The `UDSDocumentGraph`_ object is wrapped by a `UDSDocument`_, which
32 | holds additional metadata associated with the document, data relating
33 | to its constituent sentences (and their graphs), and methods for
34 | interacting with it. Finally, it should be noted that querying on
35 | document graphs is not currently supported.
36 |
37 | .. _UDSDocument: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument
38 |
--------------------------------------------------------------------------------
/docs/source/data/index.rst:
--------------------------------------------------------------------------------
1 | Dataset Reference
2 | =================
3 |
4 | The Universal Decompositional Semantics (UDS) dataset consists of four
5 | layers of annotations built on top of the `English Web Treebank`_
6 | (EWT).
7 |
8 | .. toctree::
9 | :maxdepth: 2
10 | :caption: Contents:
11 |
12 | syntactic-graphs
13 | sentence-graphs
14 | document-graphs
15 | semantic-types
16 |
17 | .. _English Web Treebank: https://catalog.ldc.upenn.edu/LDC2012T13
18 |
19 | Each layer contains pointers directly to the previous layer.
20 |
--------------------------------------------------------------------------------
/docs/source/data/semantic-types.rst:
--------------------------------------------------------------------------------
1 | `Universal Decompositional Semantic`_ Types
2 | ===========================================
3 |
4 | .. _Universal Decompositional Semantic: http://decomp.io/
5 |
6 | PredPatt makes very coarse-grained typing distinctions—between
7 | predicate and argument nodes, on the one hand, and between dependency
8 | and head edges, on the other. UDS provides ultra fine-grained typing
9 | distinctions, represented as collections of real-valued
10 | attributes. The union of all node and edge attributes defined in UDS
11 | determines the *UDS type space*; any proper subset determines a *UDS
12 | type subspace*.
13 |
14 | UDS attributes are derived from crowd-sourced annotations of the heads
15 | or spans corresponding to predicates and/or arguments and are
16 | represented in the dataset as node and/or edge attributes. It is
17 | important to note that, though all nodes and edges in the semantics
18 | domain have a ``type`` attribute, UDS does not afford any special
19 | status to these types. That is, the only thing that UDS "sees" are the
20 | nodes and edges in the semantics domain. The set of nodes and edges
21 | visible to UDS is a superset of those associated with PredPatt
22 | predicates and their arguments.
23 |
24 | There are currently four node type subspaces annotated on
25 | nodes in sentence-level graphs.
26 |
27 | - `Factuality`_ (``factuality``)
28 | - `Genericity`_ (``genericity``)
29 | - `Time`_ (``time``)
30 | - `Entity type`_ (``wordsense``)
31 | - `Event structure`_ (``event_structure``)
32 |
33 | There is currently one edge type subspace annotated on
34 | edges in sentence-level graphs.
35 |
36 | - `Semantic Proto-Roles`_ (``protoroles``)
37 | - `Event structure`_ (``event_structure``)
38 |
39 | There is currently (starting in UDS2.0) one edge type subspace
40 | annotated on edges in document-level graphs.
41 |
42 | - `Time`_ (``time``)
43 | - `Event structure`_ (``event_structure``)
44 |
45 | Each subspace key lies at the same level as the ``type`` attribute and
46 | maps to a dictionary value. This dictionary maps from attribute keys
47 | (see *Attributes* in each section below) to dictionaries that always
48 | have two keys ``value`` and ``confidence``. See the below paper for
49 | information on how the these are derived from the underlying dataset.
50 |
51 | Two versions of these annotations are currently available: one
52 | containing the raw annotator data (``"raw"``) and the other containing
53 | normalized data (``"normalized"``). In the former case, both the
54 | ``value`` and ``confidence`` fields described above map to
55 | dictionaries keyed on (anonymized) annotator IDs, where the
56 | corresponding value contains that annotator's response (for the
57 | ``value`` dictionary) or confidence (for the ``confidence``
58 | dictionary). In the latter case, the ``value`` and ``confidence``
59 | fields map to single, normalized value and confidence scores,
60 | respectively.
61 |
62 | For more information on the normalization used to produce the
63 | normalized annotations, see:
64 |
65 | White, Aaron Steven, Elias Stengel-Eskin, Siddharth Vashishtha, Venkata Subrahmanyan Govindarajan, Dee Ann Reisinger, Tim Vieira, Keisuke Sakaguchi, et al. 2020. `The Universal Decompositional Semantics Dataset and Decomp Toolkit`_. *Proceedings of The 12th Language Resources and Evaluation Conference*, 5698–5707. Marseille, France: European Language Resources Association.
66 |
67 |
68 | .. _The Universal Decompositional Semantics Dataset and Decomp Toolkit: https://www.aclweb.org/anthology/2020.lrec-1.699/
69 |
70 | .. code-block:: latex
71 |
72 | @inproceedings{white-etal-2020-universal,
73 | title = "The Universal Decompositional Semantics Dataset and Decomp Toolkit",
74 | author = "White, Aaron Steven and
75 | Stengel-Eskin, Elias and
76 | Vashishtha, Siddharth and
77 | Govindarajan, Venkata Subrahmanyan and
78 | Reisinger, Dee Ann and
79 | Vieira, Tim and
80 | Sakaguchi, Keisuke and
81 | Zhang, Sheng and
82 | Ferraro, Francis and
83 | Rudinger, Rachel and
84 | Rawlins, Kyle and
85 | Van Durme, Benjamin",
86 | booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference",
87 | month = may,
88 | year = "2020",
89 | address = "Marseille, France",
90 | publisher = "European Language Resources Association",
91 | url = "https://www.aclweb.org/anthology/2020.lrec-1.699",
92 | pages = "5698--5707",
93 | ISBN = "979-10-95546-34-4",
94 | }
95 |
96 |
97 | Information about each subspace can be found below. Unless otherwise
98 | specified the properties in a particular subspace remain constant
99 | across the raw and normalized formats.
100 |
101 | Factuality
102 | ----------
103 |
104 | **Project page**
105 |
106 | ``_
107 |
108 | **Sentence-level attributes**
109 |
110 | ``factual``
111 |
112 | **First UDS version**
113 |
114 | 1.0
115 |
116 | **References**
117 |
118 | White, A.S., D. Reisinger, K. Sakaguchi, T. Vieira, S. Zhang, R. Rudinger, K. Rawlins, & B. Van Durme. 2016. `Universal Decompositional Semantics on Universal Dependencies`_. *Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing*, pages 1713–1723, Austin, Texas, November 1-5, 2016.
119 |
120 |
121 | Rudinger, R., White, A.S., & B. Van Durme. 2018. `Neural models of factuality`_. *Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)*, pages 731–744. New Orleans, Louisiana, June 1-6, 2018.
122 |
123 | .. _Neural models of factuality: https://www.aclweb.org/anthology/N18-1067
124 | .. _Universal Decompositional Semantics on Universal Dependencies: https://www.aclweb.org/anthology/D16-1177
125 |
126 | .. code-block:: latex
127 |
128 | @inproceedings{white-etal-2016-universal,
129 | title = "Universal Decompositional Semantics on {U}niversal {D}ependencies",
130 | author = "White, Aaron Steven and
131 | Reisinger, Dee Ann and
132 | Sakaguchi, Keisuke and
133 | Vieira, Tim and
134 | Zhang, Sheng and
135 | Rudinger, Rachel and
136 | Rawlins, Kyle and
137 | Van Durme, Benjamin",
138 | booktitle = "Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing",
139 | month = nov,
140 | year = "2016",
141 | address = "Austin, Texas",
142 | publisher = "Association for Computational Linguistics",
143 | url = "https://www.aclweb.org/anthology/D16-1177",
144 | doi = "10.18653/v1/D16-1177",
145 | pages = "1713--1723",
146 | }
147 |
148 | @inproceedings{rudinger-etal-2018-neural-models,
149 | title = "Neural Models of Factuality",
150 | author = "Rudinger, Rachel and
151 | White, Aaron Steven and
152 | Van Durme, Benjamin",
153 | booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)",
154 | month = jun,
155 | year = "2018",
156 | address = "New Orleans, Louisiana",
157 | publisher = "Association for Computational Linguistics",
158 | url = "https://www.aclweb.org/anthology/N18-1067",
159 | doi = "10.18653/v1/N18-1067",
160 | pages = "731--744",
161 | }
162 |
163 |
164 | Genericity
165 | ----------
166 |
167 | **Project page**
168 |
169 | ``_
170 |
171 | **Sentence-level attributes**
172 |
173 | ``arg-particular``, ``arg-kind``, ``arg-abstract``, ``pred-particular``, ``pred-dynamic``, ``pred-hypothetical``
174 |
175 | **First UDS version**
176 |
177 | 1.0
178 |
179 | **References**
180 |
181 | Govindarajan, V.S., B. Van Durme, & A.S. White. 2019. `Decomposing Generalization: Models of Generic, Habitual, and Episodic Statements`_. Transactions of the Association for Computational Linguistics.
182 |
183 | .. _Decomposing Generalization\: Models of Generic, Habitual, and Episodic Statements: https://www.aclweb.org/anthology/Q19-1035
184 |
185 | .. code-block:: latex
186 |
187 | @article{govindarajan-etal-2019-decomposing,
188 | title = "Decomposing Generalization: Models of Generic, Habitual, and Episodic Statements",
189 | author = "Govindarajan, Venkata and
190 | Van Durme, Benjamin and
191 | White, Aaron Steven",
192 | journal = "Transactions of the Association for Computational Linguistics",
193 | volume = "7",
194 | month = mar,
195 | year = "2019",
196 | url = "https://www.aclweb.org/anthology/Q19-1035",
197 | doi = "10.1162/tacl_a_00285",
198 | pages = "501--517"
199 | }
200 |
201 |
202 | Time
203 | ----
204 |
205 | **Project page**
206 |
207 | ``_
208 |
209 | **Sentence-level attributes**
210 |
211 | *normalized*
212 |
213 | ``dur-hours``, ``dur-instant``, ``dur-forever``, ``dur-weeks``, ``dur-days``, ``dur-months``, ``dur-years``, ``dur-centuries``, ``dur-seconds``, ``dur-minutes``, ``dur-decades``
214 |
215 | *raw*
216 |
217 | ``duration``
218 |
219 |
220 | **Document-level attributes**
221 |
222 | *raw*
223 |
224 | ``rel-start1``, ``rel-start2``, ``rel-end1``, ``rel-end2``
225 |
226 | **First UDS version**
227 |
228 | 1.0 (sentence-level), 2.0 (document-level)
229 |
230 | **References**
231 |
232 | Vashishtha, S., B. Van Durme, & A.S. White. 2019. `Fine-Grained Temporal Relation Extraction`_. *Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (ACL 2019)*, 2906—2919. Florence, Italy, July 29-31, 2019.
233 |
234 |
235 | .. _Fine-Grained Temporal Relation Extraction: https://www.aclweb.org/anthology/P19-1280
236 |
237 | .. code-block:: latex
238 |
239 | @inproceedings{vashishtha-etal-2019-fine,
240 | title = "Fine-Grained Temporal Relation Extraction",
241 | author = "Vashishtha, Siddharth and
242 | Van Durme, Benjamin and
243 | White, Aaron Steven",
244 | booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
245 | month = jul,
246 | year = "2019",
247 | address = "Florence, Italy",
248 | publisher = "Association for Computational Linguistics",
249 | url = "https://www.aclweb.org/anthology/P19-1280",
250 | doi = "10.18653/v1/P19-1280",
251 | pages = "2906--2919"
252 | }
253 |
254 |
255 | **Notes**
256 |
257 | 1. The Time dataset has different formats for raw and normalized annotations. The duration attributes from the normalized version are each assigned an ordinal value in the raw version (in ascending order of duration), which is assigned to the single attribute ``duration``.
258 | 2. The document-level relation annotations are *only* available in the raw format and only starting in UDS2.0.
259 |
260 | Entity type
261 | -----------
262 |
263 | **Project page**
264 |
265 | ``_
266 |
267 | **Sentence-level attributes**
268 |
269 | ``supersense-noun.shape``, ``supersense-noun.process``, ``supersense-noun.relation``, ``supersense-noun.communication``, ``supersense-noun.time``, ``supersense-noun.plant``, ``supersense-noun.phenomenon``, ``supersense-noun.animal``, ``supersense-noun.state``, ``supersense-noun.substance``, ``supersense-noun.person``, ``supersense-noun.possession``, ``supersense-noun.Tops``, ``supersense-noun.object``, ``supersense-noun.event``, ``supersense-noun.artifact``, ``supersense-noun.act``, ``supersense-noun.body``, ``supersense-noun.attribute``, ``supersense-noun.quantity``, ``supersense-noun.motive``, ``supersense-noun.location``, ``supersense-noun.cognition``, ``supersense-noun.group``, ``supersense-noun.food``, ``supersense-noun.feeling``
270 |
271 | **First UDS version**
272 |
273 | 1.0
274 |
275 | **Notes**
276 |
277 | 1. The key is called ``wordsense`` because the normalized annotations come from UDS-Word Sense (v1.0).
278 |
279 | **References**
280 |
281 | White, A.S., D. Reisinger, K. Sakaguchi, T. Vieira, S. Zhang, R. Rudinger, K. Rawlins, & B. Van Durme. 2016. `Universal Decompositional Semantics on Universal Dependencies`_. *Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing*, pages 1713–1723, Austin, Texas, November 1-5, 2016.
282 |
283 | .. code-block:: latex
284 |
285 | @inproceedings{white-etal-2016-universal,
286 | title = "Universal Decompositional Semantics on {U}niversal {D}ependencies",
287 | author = "White, Aaron Steven and
288 | Reisinger, Dee Ann and
289 | Sakaguchi, Keisuke and
290 | Vieira, Tim and
291 | Zhang, Sheng and
292 | Rudinger, Rachel and
293 | Rawlins, Kyle and
294 | Van Durme, Benjamin",
295 | booktitle = "Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing",
296 | month = nov,
297 | year = "2016",
298 | address = "Austin, Texas",
299 | publisher = "Association for Computational Linguistics",
300 | url = "https://www.aclweb.org/anthology/D16-1177",
301 | doi = "10.18653/v1/D16-1177",
302 | pages = "1713--1723",
303 | }
304 |
305 |
306 | Semantic Proto-Roles
307 | --------------------
308 |
309 | **Project page**
310 |
311 | ``_
312 |
313 | **Sentence-level attributes**
314 |
315 | ``was_used``, ``purpose``, ``partitive``, ``location``, ``instigation``, ``existed_after``, ``time``, ``awareness``, ``change_of_location``, ``manner``, ``sentient``, ``was_for_benefit``, ``change_of_state_continuous``, ``existed_during``, ``change_of_possession``, ``existed_before``, ``volition``, ``change_of_state``
316 |
317 | **References**
318 |
319 | Reisinger, D., R. Rudinger, F. Ferraro, C. Harman, K. Rawlins, & B. Van Durme. (2015). `Semantic Proto-Roles`_. *Transactions of the Association for Computational Linguistics 3*:475–488.
320 |
321 | White, A.S., D. Reisinger, K. Sakaguchi, T. Vieira, S. Zhang, R. Rudinger, K. Rawlins, & B. Van Durme. 2016. `Universal Decompositional Semantics on Universal Dependencies`_. *Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing*, pages 1713–1723, Austin, Texas, November 1-5, 2016.
322 |
323 | .. _Semantic Proto-Roles: https://www.aclweb.org/anthology/Q15-1034
324 |
325 | .. code-block:: latex
326 |
327 | @article{reisinger-etal-2015-semantic,
328 | title = "Semantic Proto-Roles",
329 | author = "Reisinger, Dee Ann and
330 | Rudinger, Rachel and
331 | Ferraro, Francis and
332 | Harman, Craig and
333 | Rawlins, Kyle and
334 | Van Durme, Benjamin",
335 | journal = "Transactions of the Association for Computational Linguistics",
336 | volume = "3",
337 | year = "2015",
338 | url = "https://www.aclweb.org/anthology/Q15-1034",
339 | doi = "10.1162/tacl_a_00152",
340 | pages = "475--488",
341 | }
342 |
343 | @inproceedings{white-etal-2016-universal,
344 | title = "Universal Decompositional Semantics on {U}niversal {D}ependencies",
345 | author = "White, Aaron Steven and
346 | Reisinger, Dee Ann and
347 | Sakaguchi, Keisuke and
348 | Vieira, Tim and
349 | Zhang, Sheng and
350 | Rudinger, Rachel and
351 | Rawlins, Kyle and
352 | Van Durme, Benjamin",
353 | booktitle = "Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing",
354 | month = nov,
355 | year = "2016",
356 | address = "Austin, Texas",
357 | publisher = "Association for Computational Linguistics",
358 | url = "https://www.aclweb.org/anthology/D16-1177",
359 | doi = "10.18653/v1/D16-1177",
360 | pages = "1713--1723",
361 | }
362 |
363 |
364 | Event structure
365 | ---------------
366 |
367 | **Project page**
368 |
369 | ``_
370 |
371 | **Sentence-level attributes**
372 |
373 | *normalized*
374 |
375 |
376 | ``distributive``, ``dynamic``, ``natural_parts``, ``part_similarity``, ``telic``, ``avg_part_duration_lbound-centuries``, ``avg_part_duration_ubound-centuries``, ``situation_duration_lbound-centuries``, ``situation_duration_ubound-centuries``, ``avg_part_duration_lbound-days``, ``avg_part_duration_ubound-days``, ``situation_duration_lbound-days``, ``situation_duration_ubound-days``, ``avg_part_duration_lbound-decades``, ``avg_part_duration_ubound-decades``, ``situation_duration_lbound-decades``, ``situation_duration_ubound-decades``, ``avg_part_duration_lbound-forever``, ``avg_part_duration_ubound-forever``, ``situation_duration_lbound-forever``, ``situation_duration_ubound-forever``, ``avg_part_duration_lbound-fractions_of_a_second``, ``avg_part_duration_ubound-fractions_of_a_second``, ``situation_duration_lbound-fractions_of_a_second``, ``situation_duration_ubound-fractions_of_a_second``, ``avg_part_duration_lbound-hours``, ``avg_part_duration_ubound-hours``, ``situation_duration_lbound-hours``, ``situation_duration_ubound-hours``, ``avg_part_duration_lbound-instant``, ``avg_part_duration_ubound-instant``, ``situation_duration_lbound-instant``, ``situation_duration_ubound-instant``, ``avg_part_duration_lbound-minutes``, ``avg_part_duration_ubound-minutes``, ``situation_duration_lbound-minutes``, ``situation_duration_ubound-minutes``, ``avg_part_duration_lbound-months``, ``avg_part_duration_ubound-months``, ``situation_duration_lbound-months``, ``situation_duration_ubound-months``, ``avg_part_duration_lbound-seconds``, ``avg_part_duration_ubound-seconds``, ``situation_duration_lbound-seconds``, ``situation_duration_ubound-seconds``, ``avg_part_duration_lbound-weeks``, ``avg_part_duration_ubound-weeks``, ``situation_duration_lbound-weeks``, ``situation_duration_ubound-weeks``, ``avg_part_duration_lbound-years``, ``avg_part_duration_ubound-years``, ``situation_duration_lbound-years``, ``situation_duration_ubound-years``
377 |
378 | *raw*
379 |
380 | ``dynamic``, ``natural_parts``, ``part_similarity``, ``telic``, ``avg_part_duration_lbound``, ``avg_part_duration_ubound``, ``situation_duration_lbound``, ``situation_duration_ubound``
381 |
382 |
383 | **Document-level attributes**
384 |
385 | ``pred1_contains_pred2``, ``pred2_contains_pred1``
386 |
387 | **First UDS version**
388 |
389 | 2.0
390 |
391 | **Notes**
392 |
393 | 1. Whether ``dynamic``, ``situation_duration_lbound``, and ``situation_duration_ubound`` are answered or ``part_similarity``, ``avg_part_duration_lbound``, and ``avg_part_duration_ubound`` are answered is dependent on the answer an annotator gives to ``natural_parts``. Thus, not all node attributes will necessarily be present on all nodes.
394 |
395 | **References**
396 |
397 | Gantt, W., L. Glass, & A.S. White. 2021. `Decomposing and Recomposing Event Structure`_. arXiv:2103.10387 [cs.CL].
398 |
399 |
400 | .. _Decomposing and Recomposing Event Structure: https://arxiv.org/abs/2103.10387
401 |
402 | .. code-block:: latex
403 |
404 | @misc{gantt2021decomposing,
405 | title={Decomposing and Recomposing Event Structure},
406 | author={William Gantt and Lelia Glass and Aaron Steven White},
407 | year={2021},
408 | eprint={2103.10387},
409 | archivePrefix={arXiv},
410 | primaryClass={cs.CL}
411 | }
412 |
413 |
414 |
415 |
--------------------------------------------------------------------------------
/docs/source/data/sentence-graphs.rst:
--------------------------------------------------------------------------------
1 | `PredPatt`_ Sentence Graphs
2 | ===========================
3 |
4 | .. _PredPatt: https://github.com/hltcoe/PredPatt
5 |
6 | The semantic graphs that form the second layer of annotation in the
7 | dataset are produced by the PredPatt_ system. PredPatt takes as input
8 | a UD parse for a single sentence and produces a set of predicates and
9 | set of arguments of each predicate in that sentence. Both predicates
10 | and arguments are associated with a single head token in the sentence
11 | as well as a set of tokens that make up the predicate or argument (its
12 | span). Predicate or argument spans may be trivial in only containinig
13 | the head token.
14 |
15 | For example, given the dependency parse for the sentence *Chris gave
16 | the book to Pat .*, PredPatt produces the following.
17 |
18 | ::
19 |
20 | ?a gave ?b to ?c
21 | ?a: Chris
22 | ?b: the book
23 | ?c: Pat
24 |
25 | Assuming UD's 1-indexation, the single predicate in this sentence
26 | (*gave...to*) has a head at position 2 and a span over positions {2,
27 | 5}. This predicate has three arguments, one headed by *Chris* at
28 | position 1, with span over position {1}; one headed by *book* at
29 | position 4, with span over positions {3, 4}; and one headed by *Pat*
30 | at position 6, with span over position {6}.
31 |
32 | See the `PredPatt documentation tests`_ for examples.
33 |
34 | .. _PredPatt documentation tests: https://github.com/hltcoe/PredPatt/blob/master/doc/DOCTEST.md
35 |
36 | Each predicate and argument produced by PredPatt is associated with a
37 | node in a digraph with identifier
38 | ``ewt-SPLIT-SENTNUM-semantics-TYPE-HEADTOKNUM``, where ``TYPE`` is
39 | always either ``pred`` or ``arg`` and ``HEADTOKNUM`` is the ordinal
40 | position of the head token within the sentence (1-indexed, following
41 | the convention in UD-EWT). At minimum, each such node has the
42 | following attributes.
43 |
44 | - ``domain`` (``str``): the subgraph this node is part of (always ``semantics``)
45 | - ``type`` (``str``): the type of the object in the particular domain (either ``predicate`` or ``argument``)
46 | - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``)
47 |
48 | Predicate and argument nodes produced by PredPatt furthermore always
49 | have at least one outgoing *instance* edge that points to nodes in the
50 | syntax domain that correspond to the associated span of the predicate
51 | or argument. At minimum, each such edge has the following attributes.
52 |
53 | - ``domain`` (``str``): the subgraph this node is part of (always ``interface``)
54 | - ``type`` (``str``): the type of the object in the particular domain (either ``head`` or ``nonhead``)
55 | - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``)
56 |
57 | Because PredPatt produces a unique head for each predicate and
58 | argument, there is always exactly one instance edge of type ``head``
59 | from any particular node in the semantics domain. There may or may not
60 | be instance edges of type ``nonhead``.
61 |
62 | In addition to instance edges, predicate nodes always have exactly one
63 | outgoing edge connecting them to each of the nodes corresponding to
64 | their arguments. At minimum, each such edge has the following
65 | attributes.
66 |
67 | - ``domain`` (``str``): the subgraph this node is part of (always ``semantics``)
68 | - ``type`` (``str``): the type of the object in the particular domain (always ``dependency``)
69 | - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``)
70 |
71 | There is one special case where an argument nodes has an outgoing edge
72 | that points to a predicate node: clausal subordination.
73 |
74 | For example, given the dependency parse for the sentence *Gene thought
75 | that Chris gave the book to Pat .*, PredPatt produces the following.
76 |
77 | ::
78 |
79 | ?a thinks ?b
80 | ?a: Gene
81 | ?b: SOMETHING := that Chris gave the book to Pat
82 |
83 | ?a gave ?b to ?c
84 | ?a: Chris
85 | ?b: the book
86 | ?c: Pat
87 |
88 | In this case, the second argument of the predicate headed by *thinks*
89 | is the argument *that Chris gave the book to Pat*, which is headed by
90 | *gave*. This argument is associated with a node of type ``argument``
91 | with span over positions {3, 4, 5, 6, 7, 8, 9} and identifier
92 | ``ewt-SPLIT-SENTNUM-semantics-arg-5``. In addition, there is a
93 | predicate headed by *gave*. This predicate is associated with a node
94 | with span over positions {5, 8} and identifier
95 | ``ewt-SPLIT-SENTNUM-semantics-pred-5``. Node
96 | ``ewt-SPLIT-SENTNUM-semantics-arg-5`` then has an outgoing edge
97 | pointing to ``ewt-SPLIT-SENTNUM-semantics-pred-5``. At minimum, each
98 | such edge has the following attributes.
99 |
100 | - ``domain`` (``str``): the subgraph this node is part of (always ``semantics``)
101 | - ``type`` (``str``): the type of the object in the particular domain (always ``head``)
102 | - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``)
103 |
104 | The ``type`` attribute in this case has the same value as instance
105 | edges, but crucially the ``domain`` attribute is distinct. In the case
106 | of instance edges, it is ``interface`` and in the case of clausal
107 | subordination, it is ``semantics``. This matters when making queries
108 | against the graph.
109 |
110 | If the ``frompredpatt`` attribute has value ``True``, it is guaranteed
111 | that the only semantics edges of type ``head`` are ones that involve
112 | clausal subordination like the above. This is not guaranteed for nodes
113 | for which the ``frompredpatt`` attribute has value ``False``.
114 |
115 | Every semantic graph contains at least four additional *performative*
116 | nodes that are note produced by PredPatt (and thus, for which the
117 | ``frompredpatt`` attribute has value ``False``).
118 |
119 | - ``ewt-SPLIT-SENTNUM-semantics-arg-0``: an argument node representing the entire sentence in the same way complement clauses are represented
120 | - ``ewt-SPLIT-SENTNUM-semantics-pred-root``: a predicate node representing the author's production of the entire sentence directed at the addressee
121 | - ``ewt-SPLIT-SENTNUM-semantics-arg-speaker``: an argument node representing the author
122 | - ``ewt-SPLIT-SENTNUM-semantics-arg-addressee``: an argument node representing the addressee
123 |
124 | All of these nodes have a ``domain`` attribute with value ``semantics``. Unlike nodes associated with PredPatt predicates and arguments, ``ewt-SPLIT-SENTNUM-semantics-pred-root``, ``ewt-SPLIT-SENTNUM-semantics-arg-speaker``, and ``ewt-SPLIT-SENTNUM-semantics-arg-addressee`` have no instance edges connecting them to syntactic nodes. In contrast, ``ewt-SPLIT-SENTNUM-semantics-arg-0`` has an instance head edge to ``ewt-SPLIT-SENTNUM-root-0``.
125 |
126 | The ``ewt-SPLIT-SENTNUM-semantics-arg-0`` node has semantics head edges to each of the predicate nodes in the graph that are not dominated by any other semantics node. This node, in addition to ``ewt-SPLIT-SENTNUM-semantics-arg-speaker`` and ``ewt-SPLIT-SENTNUM-semantics-arg-addressee``, has a dependency edge to ``ewt-SPLIT-SENTNUM-semantics-pred-root``.
127 |
128 | These nodes are included for purposes of forward compatibility. None of them currently have attributes, but future releases of decomp will include annotations on either them or their edges.
129 |
--------------------------------------------------------------------------------
/docs/source/data/syntactic-graphs.rst:
--------------------------------------------------------------------------------
1 | `Universal Dependencies`_ Syntactic Graphs
2 | ==========================================
3 |
4 | .. _Universal Dependencies: https://universaldependencies.org/
5 |
6 | The syntactic graphs that form the first layer of annotation in the dataset come from gold UD dependency parses provided in the UD-EWT_ treebank, which contains sentences from the Linguistic Data Consortium's constituency parsed EWT_. UD-EWT has predefined training (``train``), development (``dev``), and test (``test``) data in corresponding files in `CoNLL-U format`_: ``en_ewt-ud-train.conllu``, ``en_ewt-ud-dev.conllu``, and ``en_ewt-ud-test.conllu``. Henceforth, ``SPLIT`` ranges over ``train``, ``dev``, and ``test``.
7 |
8 | .. _UD-EWT: https://github.com/UniversalDependencies/UD_English-EWT
9 | .. _EWT: https://catalog.ldc.upenn.edu/LDC2012T13
10 | .. _CoNLL-U format: https://universaldependencies.org/format.html
11 |
12 | In UDS, each dependency parsed sentence in UD-EWT is represented as a rooted_ `directed graph`_ (digraph). Each graph's identifier takes the form ``ewt-SPLIT-SENTNUM``, where ``SENTNUM`` is the ordinal position (1-indexed) of the sentence within ``en_ewt-ud-SPLIT.conllu``.
13 |
14 | .. _rooted: https://en.wikipedia.org/wiki/Rooted_graph
15 | .. _directed graph: https://en.wikipedia.org/wiki/Directed_graph
16 |
17 | Each token in a sentence is associated with a node with identifier ``ewt-SPLIT-SENTNUM-syntax-TOKNUM``, where ``TOKNUM`` is the token's ordinal position within the sentence (1-indexed, following the convention in UD-EWT). At minimum, each node has the following attributes.
18 |
19 | - ``position`` (``int``): the ordinal position (``TOKNUM``) of that node as an integer (again, 1-indexed)
20 | - ``domain`` (``str``): the subgraph this node is part of (always ``syntax``)
21 | - ``type`` (``str``): the type of the object in the particular domain (always ``token``)
22 | - ``form`` (``str``): the actual token
23 | - ``lemma`` (``str``): the lemma corresponding to the actual token
24 | - ``upos`` (``str``): the UD part-of-speech tag
25 | - ``xpos`` (``str``): the Penn TreeBank part-of-speech tag
26 | - any attribute found in the features column of the CoNLL-U
27 |
28 | For information about the values ``upos``, ``xpos``, and the attributes contained in the features column can take on, see the `UD Guidelines`_.
29 |
30 | .. _UD Guidelines: https://universaldependencies.org/guidelines.html
31 |
32 | Each graph also has a special root node with identifier ``ewt-SPLIT-SENTNUM-root-0``. This node always has a ``position`` attribute set to ``0`` and ``domain`` and ``type`` attributes set to ``root``.
33 |
34 | Edges within the graph represent the grammatical relations (dependencies) annotated in UD-EWT. These dependencies are always represented as directed edges pointing from the head to the dependent. At minimum, each edge has the following attributes.
35 |
36 | - ``domain`` (``str``): the subgraph this node is part of (always ``syntax``)
37 | - ``type`` (``str``): the type of the object in the particular domain (always ``dependency``)
38 | - ``deprel`` (``str``): the UD dependency relation tag
39 |
40 | For information about the values ``deprel`` can take on, see the `UD Guidelines`_.
41 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | Decomp: A toolkit for decompositional semantics
2 | ===============================================
3 |
4 | Decomp_ is a toolkit for working with the `Universal Decompositional
5 | Semantics (UDS) dataset`_, which is a collection of directed acyclic
6 | semantic graphs with real-valued node and edge attributes pointing
7 | into `Universal Dependencies`_ syntactic dependency trees.
8 |
9 | The toolkit is built on top of NetworkX_ and RDFLib_ making it
10 | straightforward to:
11 |
12 | - read the UDS dataset from its native JSON format
13 | - query both the syntactic and semantic subgraphs of UDS (as well as
14 | pointers between them) using SPARQL 1.1 queries
15 | - serialize UDS graphs to many common formats, such as Notation3_,
16 | N-Triples_, turtle_, and JSON-LD_, as well as any other format
17 | supported by NetworkX
18 |
19 | The toolkit was built by `Aaron Steven White`_ and is maintained by
20 | the `Decompositional Semantics Initiative`_. The UDS dataset was
21 | constructed from annotations collected by the `Decompositional
22 | Semantics Initiative`_.
23 |
24 | If you use either UDS or Decomp in your research, we ask that you cite the following paper:
25 |
26 | White, Aaron Steven, Elias Stengel-Eskin, Siddharth Vashishtha, Venkata Subrahmanyan Govindarajan, Dee Ann Reisinger, Tim Vieira, Keisuke Sakaguchi, et al. 2020. `The Universal Decompositional Semantics Dataset and Decomp Toolkit`_. *Proceedings of The 12th Language Resources and Evaluation Conference*, 5698–5707. Marseille, France: European Language Resources Association.
27 |
28 | .. code-block:: latex
29 |
30 | @inproceedings{white-etal-2020-universal,
31 | title = "The Universal Decompositional Semantics Dataset and Decomp Toolkit",
32 | author = "White, Aaron Steven and
33 | Stengel-Eskin, Elias and
34 | Vashishtha, Siddharth and
35 | Govindarajan, Venkata Subrahmanyan and
36 | Reisinger, Dee Ann and
37 | Vieira, Tim and
38 | Sakaguchi, Keisuke and
39 | Zhang, Sheng and
40 | Ferraro, Francis and
41 | Rudinger, Rachel and
42 | Rawlins, Kyle and
43 | Van Durme, Benjamin",
44 | booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference",
45 | month = may,
46 | year = "2020",
47 | address = "Marseille, France",
48 | publisher = "European Language Resources Association",
49 | url = "https://www.aclweb.org/anthology/2020.lrec-1.699",
50 | pages = "5698--5707",
51 | ISBN = "979-10-95546-34-4",
52 | }
53 |
54 |
55 | .. _Decomp: https://github.com/decompositional-semantics-initiative/decomp
56 | .. _Universal Decompositional Semantics (UDS) dataset: http://decomp.io
57 | .. _Universal Dependencies: https://universaldependencies.org/
58 | .. _NetworkX: https://github.com/networkx/networkx
59 | .. _RDFLib: https://github.com/RDFLib/rdflib
60 | .. _matplotlib: https://matplotlib.org/
61 | .. _D3: https://d3js.org/
62 | .. _Notation3: https://www.w3.org/TeamSubmission/n3/
63 | .. _N-Triples: https://www.w3.org/TR/n-triples/
64 | .. _turtle: https://www.w3.org/TeamSubmission/turtle/
65 | .. _JSON-LD: https://json-ld.org/
66 | .. _Aaron Steven White: http://aaronstevenwhite.io/
67 | .. _Decompositional Semantics Initiative: http://decomp.io/
68 | .. _The Universal Decompositional Semantics Dataset and Decomp Toolkit: https://www.aclweb.org/anthology/2020.lrec-1.699/
69 |
70 | .. toctree::
71 | :maxdepth: 2
72 | :caption: Contents:
73 |
74 | install
75 | tutorial/index
76 | data/index
77 | package/index
78 |
79 |
80 | Indices and tables
81 | ==================
82 |
83 | * :ref:`genindex`
84 | * :ref:`modindex`
85 | * :ref:`search`
86 |
--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
1 | .. _install:
2 |
3 | ============
4 | Installation
5 | ============
6 |
7 | The most painless way to get started quickly is to use the included
8 | barebones Python 3.6-based Dockerfile. To build the image and start a
9 | python interactive prompt, use:
10 |
11 | .. code-block:: bash
12 |
13 | git clone git://gitlab.hltcoe.jhu.edu/aswhite/decomp.git
14 | cd decomp
15 | docker build -t decomp .
16 | docker run -it decomp python
17 |
18 | A jupyter notebook can then be opened in the standard way.
19 |
20 | Decomp can also be installed to a local environment using ``pip``.
21 |
22 | .. code-block:: bash
23 |
24 | pip install git+git://github.com/decompositional-semantics-initiative/decomp.git
25 |
26 |
27 | As an alternative to ``pip`` you can clone the decomp repository and use the included ``setup.py`` with the ``install`` flag.
28 |
29 | .. code-block:: bash
30 |
31 | git clone https://github.com/decompositional-semantics-initiative/decomp.git
32 | cd decomp
33 | pip install --user --no-cache-dir -r ./requirements.txt
34 | python setup.py install
35 |
36 |
37 | If you would like to install the package for the purposes of development, you can use the included ``setup.py`` with the ``develop`` flag.
38 |
39 | .. code-block:: bash
40 |
41 | git clone https://github.com/decompositional-semantics-initiative/decomp.git
42 | cd decomp
43 | pip install --user --no-cache-dir -r ./requirements.txt
44 | python setup.py develop
45 |
46 |
47 | If you have trouble installing via setup.py or pip on OS X Mojave, adding the following environment variables may help.
48 |
49 | .. code-block:: bash
50 |
51 | CXXFLAGS=-stdlib=libc++ CFLAGS=-stdlib=libc++ python setup.py install
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.corpus.corpus.rst:
--------------------------------------------------------------------------------
1 | decomp.corpus.corpus
2 | ====================
3 |
4 | .. automodule:: decomp.corpus.corpus
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.corpus.rst:
--------------------------------------------------------------------------------
1 | decomp.corpus
2 | =============
3 |
4 | .. automodule:: decomp.corpus
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
9 | .. toctree::
10 | decomp.corpus.corpus
11 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.graph.nx.rst:
--------------------------------------------------------------------------------
1 | decomp.graph.nx
2 | ===============
3 |
4 | .. automodule:: decomp.graph.nx
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.graph.rdf.rst:
--------------------------------------------------------------------------------
1 | decomp.graph.rdf
2 | ================
3 |
4 | .. automodule:: decomp.graph.rdf
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.graph.rst:
--------------------------------------------------------------------------------
1 | decomp.graph
2 | =============
3 |
4 | .. automodule:: decomp.graph
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
9 | .. toctree::
10 | decomp.graph.rdf
11 | decomp.graph.nx
12 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.predpatt.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.predpatt
2 | =========================
3 |
4 | .. automodule:: decomp.semantics.predpatt
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics
2 | ================
3 |
4 | .. automodule:: decomp.semantics
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
9 | .. toctree::
10 | decomp.semantics.predpatt
11 | decomp.semantics.uds
12 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.annotation.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.uds.annotation
2 | ===============================
3 |
4 | .. automodule:: decomp.semantics.uds.annotation
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.corpus.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.uds.corpus
2 | ===========================
3 |
4 | .. automodule:: decomp.semantics.uds.corpus
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.document.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.uds.document
2 | =============================
3 |
4 | .. automodule:: decomp.semantics.uds.document
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.graph.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.uds.graph
2 | ==========================
3 |
4 | .. automodule:: decomp.semantics.uds.graph
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.metadata.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.uds.metadata
2 | =============================
3 |
4 | .. automodule:: decomp.semantics.uds.metadata
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.uds
2 | ====================
3 |
4 | .. automodule:: decomp.semantics.uds
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
9 | .. toctree::
10 | decomp.semantics.uds.corpus
11 | decomp.semantics.uds.document
12 | decomp.semantics.uds.graph
13 | decomp.semantics.uds.annotation
14 | decomp.semantics.uds.metadata
15 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.syntax.dependency.rst:
--------------------------------------------------------------------------------
1 | decomp.syntax.dependency
2 | ========================
3 |
4 | .. automodule:: decomp.syntax.dependency
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.syntax.rst:
--------------------------------------------------------------------------------
1 | decomp.syntax
2 | =============
3 |
4 | .. automodule:: decomp.syntax
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
9 | .. toctree::
10 | decomp.syntax.dependency
11 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.vis.rst:
--------------------------------------------------------------------------------
1 | decomp.vis
2 | =============
3 |
4 | .. automodule:: decomp.vis
5 | :members:
6 | :undoc-members:
7 | :show-inheritance:
8 |
9 | .. toctree::
10 | decomp.vis.uds_vis
11 |
--------------------------------------------------------------------------------
/docs/source/package/decomp.vis.uds_vis.rst:
--------------------------------------------------------------------------------
1 | decomp.vis.uds_vis
2 | ==================
3 |
4 | .. automodule:: decomp.vis.uds_vis
5 | :members:
6 |
--------------------------------------------------------------------------------
/docs/source/package/index.rst:
--------------------------------------------------------------------------------
1 | Package Reference
2 | =================
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 |
7 | decomp.syntax
8 | decomp.semantics
9 | decomp.corpus
10 | decomp.graph
11 | decomp.vis
12 |
--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_genericity_no_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_genericity_no_syntax.png
--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_no_protoroles_no_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_no_protoroles_no_syntax.png
--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_no_protoroles_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_no_protoroles_syntax.png
--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_no_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_no_syntax.png
--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_node_props_no_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_node_props_no_syntax.png
--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_node_props_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_node_props_syntax.png
--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_protoroles_no_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_protoroles_no_syntax.png
--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_protoroles_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_protoroles_syntax.png
--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_syntax.png
--------------------------------------------------------------------------------
/docs/source/tutorial/index.rst:
--------------------------------------------------------------------------------
1 | Tutorials
2 | =========
3 |
4 | If you have not already :doc:`installed ` the decomp
5 | package, follow those instructions before continuing the tutorial.
6 |
7 | .. toctree::
8 | :maxdepth: 2
9 | :caption: Contents:
10 |
11 | quick-start
12 | reading
13 | querying
14 | serializing
15 | visualization
16 |
--------------------------------------------------------------------------------
/docs/source/tutorial/querying.rst:
--------------------------------------------------------------------------------
1 | Querying UDS Graphs
2 | ===================
3 |
4 | Decomp provides a rich array of methods for querying UDS graphs: both
5 | pre-compiled and user-specified. Arbitrary user-specified graph
6 | queries can be performed using the `UDSSentenceGraph.query`_ instance
7 | method. This method accepts arbitrary SPARQL 1.1 queries, either as
8 | strings or as precompiled `Query`_ objects built using RDFlib's
9 | `prepareQuery`_.
10 |
11 | .. _UDSSentenceGraph.query: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph.query
12 | .. _Query: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.plugins.sparql.html#rdflib.plugins.sparql.sparql.Query
13 | .. _prepareQuery: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.plugins.sparql.html?highlight=preparequery#rdflib.plugins.sparql.processor.prepareQuery
14 |
15 |
16 | **NOTE:** Querying is not currently supported for document-level graphs
17 | (`UDSDocumentGraph`_ objects) or for sentence-level graphs that contain
18 | raw annotations (`RawUDSDataset`_).
19 |
20 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph
21 | .. _RawUDSDataset: ../package/decomp.semantics.uds.html#decomp.semantics.uds.RawUDSDataset
22 |
23 | Pre-compiled queries
24 | --------------------
25 |
26 | For many use cases, the various instance attributes and methods for
27 | accessing nodes, edges, and their attributes in the UDS graphs will
28 | likely be sufficient; there is no need to use ``query``. For
29 | example, to get a dictionary mapping identifiers for syntax nodes in
30 | the UDS graph to their attributes, you can use:
31 |
32 | .. code-block:: python
33 |
34 | uds["ewt-train-12"].syntax_nodes
35 |
36 | To get a dictionary mapping identifiers for semantics nodes in the UDS
37 | graph to their attributes, you can use:
38 |
39 | .. code-block:: python
40 |
41 | uds["ewt-train-12"].semantics_nodes
42 |
43 | To get a dictionary mapping identifiers for semantics edges (tuples of
44 | node identifiers) in the UDS graph to their attributes, you can use:
45 |
46 | .. code-block:: python
47 |
48 | uds["ewt-train-12"].semantics_edges()
49 |
50 | To get a dictionary mapping identifiers for semantics edges (tuples of
51 | node identifiers) in the UDS graph involving the predicate headed by
52 | the 7th token to their attributes, you can use:
53 |
54 | .. code-block:: python
55 |
56 | uds["ewt-train-12"].semantics_edges('ewt-train-12-semantics-pred-7')
57 |
58 | To get a dictionary mapping identifiers for syntax edges (tuples of
59 | node identifiers) in the UDS graph to their attributes, you can use:
60 |
61 | .. code-block:: python
62 |
63 | uds["ewt-train-12"].syntax_edges()
64 |
65 | And to get a dictionary mapping identifiers for syntax edges (tuples
66 | of node identifiers) in the UDS graph involving the node for the 7th
67 | token to their attributes, you can use:
68 |
69 | .. code-block:: python
70 |
71 | uds["ewt-train-12"].syntax_edges('ewt-train-12-syntax-7')
72 |
73 |
74 | There are also methods for accessing relationships between semantics
75 | and syntax nodes. For example, you can get a tuple of the ordinal
76 | position for the head syntax node in the UDS graph that maps of the
77 | predicate headed by the 7th token in the corresponding sentence to a
78 | list of the form and lemma attributes for that token, you can use:
79 |
80 | .. code-block:: python
81 |
82 | uds["ewt-train-12"].head('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
83 |
84 | And if you want the same information for every token in the span, you
85 | can use:
86 |
87 | .. code-block:: python
88 |
89 | uds["ewt-train-12"].span('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
90 |
91 | This will return a dictionary mapping ordinal position for syntax
92 | nodes in the UDS graph that make of the predicate headed by the 7th
93 | token in the corresponding sentence to a list of the form and lemma
94 | attributes for the corresponding tokens.
95 |
96 | Custom queries
97 | --------------
98 |
99 | Where the above methods generally turn out to be insufficient is in
100 | selecting nodes and edges on the basis of (combinations of their
101 | attributes). This is where having the full power of SPARQL comes in
102 | handy. This power comes with substantial slow downs in the speed of
103 | queries, however, so if you can do a query without using SPARQL you
104 | should try to.
105 |
106 | For example, if you were interested in extracting only predicates
107 | referring to events that likely happened and likely lasted for
108 | minutes, you could use:
109 |
110 | .. code-block:: python
111 |
112 | querystr = """
113 | SELECT ?pred
114 | WHERE { ?pred ;
115 | ;
116 | ?factual ;
117 | ?duration
118 | FILTER ( ?factual > 0 && ?duration > 0 )
119 | }
120 | """
121 |
122 | results = {gid: graph.query(querystr, query_type='node', cache_rdf=False)
123 | for gid, graph in uds.items()}
124 |
125 | Or more tersely (but equivalently):
126 |
127 | .. code-block:: python
128 |
129 | results = uds.query(querystr, query_type='node', cache_rdf=False)
130 |
131 | Note that the ``query_type`` parameter is set to ``'node'``. This
132 | setting means that a dictionary mapping node identifiers to node
133 | attribute values will be returned. If no such query type is passed, an
134 | RDFLib `Result`_ object will be returned, which you will need to
135 | postprocess yourself. This is necessary if, for instance, you are
136 | making a ``CONSTRUCT``, ``ASK``, or ``DESCRIBE`` query.
137 |
138 | Also, note that the ``cache_rdf`` parameter is set to ``False``. This is a
139 | memory-saving measure, as ``UDSSentenceGraph.query`` implicitly builds an RDF
140 | graph on the backend, and these graphs can be quite large. Leaving
141 | ``cache_rdf`` at its defaults of ``True`` will substantially speed up
142 | later queries at the expense of sometimes substantial memory costs.
143 |
144 | .. _Result: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.query.Result
145 |
146 | Constraints can also make reference to node and edge attributes of
147 | other nodes. For instance, if you were interested in extracting all
148 | predicates referring to events that are likely spatiotemporally
149 | delimited and have at least one spatiotemporally delimited participant
150 | that was volitional in the event, you could use:
151 |
152 | .. code-block:: python
153 |
154 | querystr = """
155 | SELECT DISTINCT ?node
156 | WHERE { ?node ?edge ?arg ;
157 | ;
158 | ;
159 | ?predparticular
160 | FILTER ( ?predparticular > 0 ) .
161 | ?arg ;
162 | ;
163 | ?argparticular
164 | FILTER ( ?argparticular > 0 ) .
165 | ?edge ?volition
166 | FILTER ( ?volition > 0 ) .
167 | }
168 | """
169 |
170 | results = uds.query(querystr, query_type='node', cache_rdf=False)
171 |
172 | Disjunctive constraints are also possible. For instance, for the last
173 | query, if you were interested in either volitional or sentient
174 | arguments, you could use:
175 |
176 | .. code-block:: python
177 |
178 | querystr = """
179 | SELECT DISTINCT ?node
180 | WHERE { ?node ?edge ?arg ;
181 | ;
182 | ;
183 | ?predparticular
184 | FILTER ( ?predparticular > 0 ) .
185 | ?arg ;
186 | ;
187 | ?argparticular
188 | FILTER ( ?argparticular > 0 ) .
189 | { ?edge ?volition
190 | FILTER ( ?volition > 0 )
191 | } UNION
192 | { ?edge ?sentient
193 | FILTER ( ?sentient > 0 )
194 | }
195 | }
196 | """
197 |
198 | results = uds.query(querystr, query_type='node', cache_rdf=False)
199 |
200 | Beyond returning node attributes based on complex constraints, you can
201 | also return edge attributes. For instance, for the last query, if you
202 | were interested in all the attributes of edges connecting predicates
203 | and arguments satisfying the constraints of the last query, you could
204 | simply change which variable is bound by ``SELECT`` and set
205 | ``query_type`` to ``'edge'``.
206 |
207 | .. code-block:: python
208 |
209 | querystr = """
210 | SELECT ?edge
211 | WHERE { ?node ?edge ?arg ;
212 | ;
213 | ;
214 | ?predparticular
215 | FILTER ( ?predparticular > 0 ) .
216 | ?arg ;
217 | ;
218 | ?argparticular
219 | FILTER ( ?argparticular > 0 ) .
220 | { ?edge ?volition
221 | FILTER ( ?volition > 0 )
222 | } UNION
223 | { ?edge ?sentient
224 | FILTER ( ?sentient > 0 )
225 | }
226 | }
227 | """
228 |
229 | results = uds.query(querystr, query_type='edge', cache_rdf=False)
230 |
--------------------------------------------------------------------------------
/docs/source/tutorial/quick-start.rst:
--------------------------------------------------------------------------------
1 | Quick Start
2 | ===========
3 |
4 | To read the Universal Decompositional Semantics (UDS) dataset, use:
5 |
6 | .. code-block:: python
7 |
8 | from decomp import UDSCorpus
9 |
10 | uds = UDSCorpus()
11 |
12 | This imports a `UDSCorpus`_ object ``uds``, which contains all
13 | graphs across all splits in the data. If you would like a corpus,
14 | e.g., containing only a particular split, see other loading options in
15 | :doc:`reading`.
16 |
17 | .. _UDSCorpus: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus
18 |
19 | The first time you read UDS, it will take several minutes to
20 | complete while the dataset is built from the `Universal Dependencies
21 | English Web Treebank`_, which is not shipped with the package (but is
22 | downloaded automatically on import in the background), and the `UDS
23 | annotations`_, which are shipped with the package. Subsequent uses
24 | will be faster, since the dataset is cached on build.
25 |
26 | .. _Universal Dependencies English Web Treebank: https://github.com/UniversalDependencies/UD_English-EWT
27 | .. _UDS annotations: http://decomp.io/data/
28 |
29 | `UDSSentenceGraph`_ objects in the corpus can be accessed using standard
30 | dictionary getters or iteration. For instance, to get the UDS graph
31 | corresponding to the 12th sentence in ``en-ud-train.conllu``, you can
32 | use:
33 |
34 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph
35 |
36 | .. code-block:: python
37 |
38 | uds["ewt-train-12"]
39 |
40 | To access documents (`UDSDocument`_ objects, each of which has an associated
41 | `UDSDocumentGraph`_), you can use:
42 |
43 | .. _UDSDocument: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument
44 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph
45 |
46 | .. code-block:: python
47 |
48 | uds.documents["reviews-112579"]
49 |
50 |
51 | To get the associated document graph, use:
52 |
53 | .. code-block:: python
54 |
55 | uds.documents["reviews-112579"].document_graph
56 |
57 |
58 | More generally, ``UDSCorpus`` objects behave like dictionaries. For
59 | example, to print all the sentence-level graph identifiers in the corpus
60 | (e.g. ``"ewt-train-12"``), you can use:
61 |
62 | .. code-block:: python
63 |
64 | for graphid in uds:
65 | print(graphid)
66 |
67 |
68 | To print all the document identifiers in the corpus, which correspond
69 | directly to English Web Treebank file IDs (e.g. ``"reviews-112579"``), you
70 | can use:
71 |
72 | .. code-block:: python
73 |
74 | for documentid in uds.documents:
75 | print(documentid)
76 |
77 |
78 | Similarly, to print all the sentence-level graph identifiers in the corpus
79 | (e.g. ``"ewt-train-12"``) along with the corresponding sentence, you can use:
80 |
81 | .. code-block:: python
82 |
83 | for graphid, graph in uds.items():
84 | print(graphid)
85 | print(graph.sentence)
86 |
87 |
88 | Likewise, the following will print all document identifiers, along with each
89 | document's entire text:
90 |
91 | .. code-block:: python
92 |
93 | for documentid, document in uds.documents.items():
94 | print(documentid)
95 | print(document.text)
96 |
97 |
98 | A list of sentence-level graph identifiers can also be accessed via the
99 | ``graphids`` attribute of the UDSCorpus. A mapping from these identifiers
100 | and the corresponding graph can be accessed via the ``graphs`` attribute.
101 |
102 | .. code-block:: python
103 |
104 | # a list of the sentence-level graph identifiers in the corpus
105 | uds.graphids
106 |
107 | # a dictionary mapping the sentence-level
108 | # graph identifiers to the corresponding graph
109 | uds.graphs
110 |
111 |
112 | A list of document identifiers can also be accessed via the ``document_ids``
113 | attribute of the UDSCorpus:
114 |
115 | .. code-block:: python
116 |
117 | uds.document_ids
118 |
119 |
120 | For sentence-level graphs, there are various instance attributes and
121 | methods for accessing nodes, edges, and their attributes in the UDS
122 | sentence-level graphs. For example, to get a dictionary mapping identifiers for syntax nodes in a sentence-level graph to their attributes, you can use:
123 |
124 | .. code-block:: python
125 |
126 | uds["ewt-train-12"].syntax_nodes
127 |
128 | To get a dictionary mapping identifiers for semantics nodes in the UDS
129 | graph to their attributes, you can use:
130 |
131 | .. code-block:: python
132 |
133 | uds["ewt-train-12"].semantics_nodes
134 |
135 | To get a dictionary mapping identifiers for semantics edges (tuples of
136 | node identifiers) in the UDS graph to their attributes, you can use:
137 |
138 | .. code-block:: python
139 |
140 | uds["ewt-train-12"].semantics_edges()
141 |
142 | To get a dictionary mapping identifiers for semantics edges (tuples of
143 | node identifiers) in the UDS graph involving the predicate headed by
144 | the 7th token to their attributes, you can use:
145 |
146 | .. code-block:: python
147 |
148 | uds["ewt-train-12"].semantics_edges('ewt-train-12-semantics-pred-7')
149 |
150 | To get a dictionary mapping identifiers for syntax edges (tuples of
151 | node identifiers) in the UDS graph to their attributes, you can use:
152 |
153 | .. code-block:: python
154 |
155 | uds["ewt-train-12"].syntax_edges()
156 |
157 | And to get a dictionary mapping identifiers for syntax edges (tuples
158 | of node identifiers) in the UDS graph involving the node for the 7th
159 | token to their attributes, you can use:
160 |
161 | .. code-block:: python
162 |
163 | uds["ewt-train-12"].syntax_edges('ewt-train-12-syntax-7')
164 |
165 |
166 | There are also methods for accessing relationships between semantics
167 | and syntax nodes. For example, you can get a tuple of the ordinal
168 | position for the head syntax node in the UDS graph that maps of the
169 | predicate headed by the 7th token in the corresponding sentence to a
170 | list of the form and lemma attributes for that token, you can use:
171 |
172 | .. code-block:: python
173 |
174 | uds["ewt-train-12"].head('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
175 |
176 | And if you want the same information for every token in the span, you
177 | can use:
178 |
179 | .. code-block:: python
180 |
181 | uds["ewt-train-12"].span('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
182 |
183 | This will return a dictionary mapping ordinal position for syntax
184 | nodes in the UDS graph that make of the predicate headed by the 7th
185 | token in the corresponding sentence to a list of the form and lemma
186 | attributes for the corresponding tokens.
187 |
188 | More complicated queries of a sentence-level UDS graph can be performed
189 | using the ``query`` method, which accepts arbitrary SPARQL 1.1 queries. See
190 | :doc:`querying` for details.
191 |
192 | Queries on document-level graphs are not currently supported. However, each
193 | `UDSDocument`_ does contain a number of useful attributes, including its ``genre``
194 | (corresponding to the English Web Treebank subcorpus); its ``text`` (as
195 | demonstrated above); its ``timestamp``; the ``sentence_ids`` of its
196 | constituent sentences; and the sentence-level graphs (``sentence_graphs``)
197 | associated with those sentences. Additionally, one can also look up the
198 | semantics node associated with a particular node in the document graph via
199 | the `semantics_node`_ instance method.
200 |
201 | .. _UDSDocument: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument
202 | .. _semantics_node: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument.semantics_node
203 |
204 |
205 | Lastly, iterables for the nodes and edges of a document-level graph may be
206 | accessed as follows:
207 |
208 |
209 | .. code-block:: python
210 |
211 | uds.documents["reviews-112579"].document_graph.nodes
212 | uds.documents["reviews-112579"].document_graph.edges
213 |
214 |
215 | Unlike the nodes and edges in a sentence-level graph, the ones in a document-
216 | level graph all share a common (``document``) domain. By default, document
217 | graphs are initialized without edges and with one node for each semantics node
218 | in the sentence-level graphs associated with the constituent sentences. Edges
219 | may be added by supplying annotations (see :doc:`reading`).
220 |
--------------------------------------------------------------------------------
/docs/source/tutorial/reading.rst:
--------------------------------------------------------------------------------
1 | Reading the UDS dataset
2 | =======================
3 |
4 | The most straightforward way to read the Universal Decompositional
5 | Semantics (UDS) dataset is to import it.
6 |
7 | .. code-block:: python
8 |
9 | from decomp import UDSCorpus
10 |
11 | uds = UDSCorpus()
12 |
13 | This loads a `UDSCorpus`_ object ``uds``, which contains all
14 | graphs across all splits in the data.
15 |
16 | .. _UDSCorpus: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus
17 |
18 | As noted in :doc:`quick-start`, the first time you do read UDS, it
19 | will take several minutes to complete while the dataset is built from
20 | the `Universal Dependencies English Web Treebank`_ (UD-EWT), which is not
21 | shipped with the package (but is downloaded automatically on import in
22 | the background), and the `UDS annotations`_, which are shipped with
23 | the package as package data. Normalized annotations are loaded by default.
24 | To load raw annotations, specify ``"raw"`` as the argument to the UDSCorpus
25 | ``annotation_format`` keyword arugment as follows:
26 |
27 | .. code-block:: python
28 |
29 | from decomp import UDSCorpus
30 |
31 | uds = UDSCorpus(annotation_format="raw")
32 |
33 | (See `Adding annotations`_ below for more detail on annotation types.)
34 | Subsequent uses of the corpus will be faster after the initial build,
35 | since the built dataset is cached.
36 |
37 | .. _Universal Dependencies English Web Treebank: https://github.com/UniversalDependencies/UD_English-EWT
38 | .. _UDS annotations: http://decomp.io/data/
39 |
40 | Standard splits
41 | ---------------
42 |
43 | If you would rather read only the graphs in the training, development,
44 | or test split, you can do that by specifying the ``split`` parameter
45 | of ``UDSCorpus``.
46 |
47 | .. code-block:: python
48 |
49 | from decomp import UDSCorpus
50 |
51 | # read the train split of the UDS corpus
52 | uds_train = UDSCorpus(split='train')
53 |
54 | Adding annotations
55 | ------------------
56 |
57 | Additional annotations beyond the standard UDS annotations can be
58 | added using this method by passing a list of `UDSAnnotation`_
59 | objects. These annotations can be added at two levels: the sentence level
60 | and the document level. Sentence-level annotations contain attributes of
61 | `UDSSentenceGraph`_ nodes or edges. Document-level annotations contain
62 | attributes for `UDSDocumentGraph`_ nodes or edges. Document-level
63 | edge annotations may relate nodes associated with different sentences
64 | in a document, although they are added as annotations only to the
65 | the appropriate `UDSDocumentGraph`_.
66 |
67 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph
68 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph
69 | .. _UDSAnnotation: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSAnnotation
70 |
71 | Sentence-level and document-level annotations share the same two in-memory
72 | representations: ``RawUDSDataset`` and ``NormalizedUDSDataset``. The former
73 | may have multiple annotations for the same node or edge attribute, while the
74 | latter must have only a single annotation. Both are loaded from
75 | JSON-formatted files, but differ in the expected format (see the
76 | `from_json`_ methods of each class for formatting guidelines). For example,
77 | if you have some additional *normalized* sentence-level annotations in a file
78 | ``new_annotations.json``, those can be added to the existing UDS annotations
79 | using:
80 |
81 | .. _NormalizedUDSDataset: ../package/decomp.semantics.uds.html#decomp.semantics.uds.NormalizedUDSDataset
82 | .. _from_json: ../package/decomp.semantics.uds.html#decomp.semantics.uds.NormalizedUDSDataset.from_json
83 |
84 | .. code-block:: python
85 |
86 | from decomp import NormalizedUDSDataset
87 |
88 | # read annotations
89 | new_annotations = [NormalizedUDSDataset.from_json("new_annotations.json")]
90 |
91 | # read the train split of the UDS corpus and append new annotations
92 | uds_train_plus = UDSCorpus(split='train', sentence_annotations=new_annotations)
93 |
94 | If instead you wished to add *raw* annotations (and supposing those
95 | annotations were still in "new_annotations.json"), you would do the following:
96 |
97 | .. code-block:: python
98 |
99 | from decomp import RawUDSDataset
100 |
101 | # read annotations
102 | new_annotations = [RawUDSDataset.from_json("new_annotations.json")]
103 |
104 | # read the train split of the UDS corpus and append new annotations
105 | uds_train_plus = UDSCorpus(split='train', sentence_annotations=new_annotations,
106 | annotation_format="raw")
107 |
108 | If ``new_annotations.json`` contained document-level annotations
109 | you would pass ``new_annotations.json`` to the constructor keyword
110 | argument ``document_annotations`` instead of to ``sentence_annotations``.
111 | Importantly, these annotations are added *in addition* to the existing
112 | UDS annotations that ship with the toolkit. You do not need to add these
113 | manually.
114 |
115 | Finally, it should be noted that querying is currently **not** supported
116 | for document-level graphs or for sentence-level graphs containing raw
117 | annotations.
118 |
119 | Reading from an alternative location
120 | ------------------------------------
121 |
122 | If you would like to read the dataset from an alternative
123 | location—e.g. if you have serialized the dataset to JSON, using the
124 | `to_json`_ instance method—this can be accomplished using
125 | ``UDSCorpus`` class methods (see :doc:`serializing` for more
126 | information on serialization). For example, if you serialize
127 | ``uds_train`` to the files ``uds-ewt-sentences-train.json`` (for
128 | sentences) and ``uds-ewt-documents-train.json`` (for the documents),
129 | you can read it back into memory using:
130 |
131 | .. _to_json: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus.to_json
132 |
133 | .. code-block:: python
134 |
135 | # serialize uds_train to JSON
136 | uds_train.to_json("uds-ewt-sentences-train.json", "uds-ewt-documents-train.json")
137 |
138 | # read JSON serialized uds_train
139 | uds_train = UDSCorpus.from_json("uds-ewt-sentences-train.json", "uds-ewt-documents-train.json")
140 |
141 | Rebuilding the corpus
142 | ---------------------
143 |
144 | If you would like to rebuild the corpus from the UD-EWT CoNLL files
145 | and some set of JSON-formatted annotation files, you can use the
146 | analogous `from_conll`_ class method. Importantly, unlike the
147 | standard instance initialization described above, the UDS annotations
148 | are *not* automatically added. For example, if ``en-ud-train.conllu``
149 | is in the current working directory and you have already loaded
150 | ``new_annotations`` as above, a corpus containing only those
151 | annotations (without the UDS annotations) can be loaded using:
152 |
153 | .. _from_conll: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus.from_conll
154 |
155 | .. code-block:: python
156 |
157 | # read the train split of the UD corpus and append new annotations
158 | uds_train_annotated = UDSCorpus.from_conll("en-ud-train.conllu", sentence_annotations=new_annotations)
159 |
160 | This also means that if you only want the semantic graphs as implied
161 | by PredPatt (without annotations), you can use the ``from_conll``
162 | class method to load them.
163 |
164 | .. code-block:: python
165 |
166 | # read the train split of the UD corpus
167 | ud_train = UDSCorpus.from_conll("en-ud-train.conllu")
168 |
169 | Note that, because PredPatt is used for predicate-argument extraction,
170 | only versions of UD-EWT that are compatible with PredPatt can be used
171 | here. Version 1.2 is suggested.
172 |
173 | Though other serialization formats are available (see
174 | :doc:`serializing`), these formats are not yet supported for reading.
175 |
--------------------------------------------------------------------------------
/docs/source/tutorial/serializing.rst:
--------------------------------------------------------------------------------
1 | Serializing the UDS dataset
2 | ===========================
3 |
4 | The canonical serialization format for the Universal Decompositional
5 | Semantics (UDS) dataset is JSON. Sentence- and document-level graphs
6 | are serialized separately. For example, if you wanted to serialize
7 | the entire UDS dataset to the files ``uds-sentence.json`` (for
8 | sentences) and ``uds-document.json`` (for documents), you would use:
9 |
10 | .. code-block:: python
11 |
12 | from decomp import uds
13 |
14 | uds.to_json("uds-sentence.json", "uds-document.json")
15 |
16 | The particular format is based directly on the `adjacency_data`_
17 | method implemented in `NetworkX`_
18 |
19 | .. _adjacency_data: https://networkx.github.io/documentation/stable/reference/readwrite/generated/networkx.readwrite.json_graph.adjacency_data.html#networkx.readwrite.json_graph.adjacency_data
20 | .. _NetworkX: https://github.com/networkx/networkx
21 |
22 | For the sentence-level graphs only, in addition to this JSON format,
23 | any serialization format supported by `RDFLib`_ can also be used by
24 | accessing the `rdf`_ attribute of each `UDSSentenceGraph`_ object.
25 | This attribute exposes an `rdflib.graph.Graph`_ object, which implements
26 | a `serialize`_ method. By default, this method outputs rdf/xml. The
27 | ``format`` parameter can also be set to ``'n3'``, ``'turtle'``,
28 | ``'nt'``, ``'pretty-xml'``, ``'trix'``, ``'trig'``, or ``'nquads'``;
29 | and additional formats, such as JSON-LD, can be supported by installing
30 | plugins for RDFLib.
31 |
32 | .. _serialize: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.graph.Graph.serialize
33 | .. _rdf: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph.rdf
34 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph
35 | .. _rdflib.graph.Graph: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#graph-module
36 |
37 | Before considering serialization to such a format, be aware that only
38 | the JSON format mentioned above can be read by the
39 | toolkit. Additionally, note that if your aim is to query the graphs in
40 | the corpus, this can be done using the `query`_ instance method in
41 | ``UDSSentenceGraph``. See :doc:`querying` for details.
42 |
43 | .. _RDFLib: https://github.com/RDFLib/rdflib
44 | .. _query: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph.query
45 |
--------------------------------------------------------------------------------
/docs/source/tutorial/visualization.rst:
--------------------------------------------------------------------------------
1 | Visualizing UDS Graphs
2 | ======================
3 |
4 | Decomp comes with a built-in interactive visualization tool using the `UDSVisualization`_ object. This object visualizes a `UDSSentenceGraph`_.
5 |
6 | .. _UDSVisualization: ../package/decomp.vis.uds_vis.html#decomp.vis.uds_vis.UDSVisualization
7 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph
8 |
9 | A visualization (which is based on `Dash`_) is served to your local browser via port 8050 (e.g. `http://localhost:8050`).
10 | The following snippet visualizes the first graph in the dev split:
11 |
12 | .. _Dash: https://dash.plotly.com
13 |
14 |
15 | .. code-block:: python
16 |
17 | graph = uds["ewt-dev-1"]
18 | vis = UDSVisualization(graph)
19 | vis.serve()
20 |
21 | The browser window will look like this:
22 |
23 | .. image:: assets/vis_no_syntax.png
24 |
25 | Black edges indicate edges in the semantic graph, while gray arrows are instance edges between semantics and syntax nodes.
26 | Thick gray arrows indicate the syntactic head of a semantic argument or predicate.
27 | Semantics nodes have a thick outline when they are annotated with decomp properties.
28 | Hovering over such a node will reveal the annotations in a pop-out window.
29 |
30 | .. image:: assets/vis_node_props_no_syntax.png
31 |
32 | Similarly, yellow boxes on edges indicate protorole annotations, and can be hovered over to reveal their values.
33 |
34 | .. image:: assets/vis_protoroles_no_syntax.png
35 |
36 | Using the checkboxes at the top left, annotation subspaces can be selected and de-selected.
37 | If all the annotations for a node or edge are de-selected, it will become non-bolded or disappear
38 |
39 | .. image:: assets/vis_no_protoroles_no_syntax.png
40 |
41 |
42 | Several options can be supplied to a visualization via arguments. For example, we can visualize the syntactic parse along with the semantic parse by setting
43 |
44 | .. code-block:: python
45 |
46 | vis = UDSVisualization(graph, add_syntax_edges = True)
47 |
48 | which results in the following visualization.
49 |
50 |
51 | .. image:: assets/vis_syntax.png
52 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests==2.22.0
2 | networkx>=2.5.1
3 | memoized_property==1.0.3
4 | typing==3.6.2
5 | rdflib==4.2.2
6 | setuptools>=52.0.0
7 | numpy>=1.16.4
8 | pyparsing==2.2.0
9 | overrides==3.1.0
10 | http://github.com/hltcoe/PredPatt/tarball/master#egg=predpatt
11 | dash[testing]==1.9.1
12 | selenium==3.141.0
13 | jsonpickle==1.4.1
14 | pytest==6.2.2
15 | matplotlib==3.2.1
16 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 | setup(name='decomp',
4 | version='0.2.2',
5 | description='Toolkit for working with Universal\
6 | Decompositional Semantics graphs',
7 | url='https://decomp.io/',
8 | author='Aaron Steven White',
9 | author_email='aaron.white@rochester.edu',
10 | license='MIT',
11 | packages=find_packages(),
12 | package_dir={'decomp': 'decomp'},
13 | package_data={'decomp': ['data/*']},
14 | install_requires=['requests==2.22.0',
15 | 'networkx>=2.5.1',
16 | 'memoized_property==1.0.3',
17 | 'overrides==3.1.0',
18 | 'typing==3.6.2',
19 | 'rdflib==4.2.2',
20 | 'setuptools>=52.0.0',
21 | 'numpy>=1.16.4',
22 | 'pyparsing==2.2.0',
23 | 'predpatt @ http://github.com/hltcoe/PredPatt/tarball/master#egg=predpatt'],
24 | test_suite='nose.collector',
25 | tests_require=['nose'],
26 | include_package_data=True,
27 | zip_safe=False)
28 |
--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | This directory contains the tests for the [Decomp
2 | toolkit](https://github.com/decompositional-semantics-initiative/decomp). Theses
3 | tests use the [`pytest` framework](https://docs.pytest.org/).
4 |
5 | # Installation
6 |
7 | To run the tests in this directory, ensure that both the toolkit and
8 | `pytest` are installed.
9 |
10 | ```bash
11 | pip install --user pytest==6.0.* git+git://github.com/decompositional-semantics-initiative/decomp.git
12 | ```
13 |
14 | # Running the test suite
15 |
16 | The entire test suite can be run from the root directory of the
17 | toolkit installation using:
18 |
19 | ```bash
20 | pytest
21 | ```
22 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import os
4 |
5 | from decomp.semantics.uds.annotation import NormalizedUDSAnnotation
6 | from decomp.semantics.uds.annotation import RawUDSAnnotation
7 |
8 | def pytest_configure(config):
9 | config.addinivalue_line(
10 | "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')"
11 | )
12 |
13 | def pytest_addoption(parser):
14 | parser.addoption(
15 | "--runslow", action="store_true", default=False, help="run slow tests"
16 | )
17 |
18 | def pytest_collection_modifyitems(config, items):
19 | if config.getoption("--runslow"):
20 | # --runslow given in cli: do not skip slow tests
21 | return
22 |
23 | skip_slow = pytest.mark.skip(reason="need --runslow option to run")
24 |
25 | for item in items:
26 | if "slow" in item.keywords:
27 | item.add_marker(skip_slow)
28 |
29 | @pytest.fixture
30 | def test_dir():
31 | return os.path.dirname(os.path.abspath(__file__))
32 |
33 | @pytest.fixture
34 | def test_data_dir(test_dir):
35 | return os.path.join(test_dir, 'data/')
36 |
37 |
38 | @pytest.fixture
39 | def normalized_node_sentence_annotation(test_data_dir):
40 | fpath = os.path.join(test_data_dir, 'normalized_node_sentence_annotation.json')
41 |
42 | with open(fpath) as f:
43 | return f.read()
44 |
45 | @pytest.fixture
46 | def normalized_edge_sentence_annotation(test_data_dir):
47 | fpath = os.path.join(test_data_dir, 'normalized_edge_sentence_annotation.json')
48 |
49 | with open(fpath) as f:
50 | return f.read()
51 |
52 | @pytest.fixture
53 | def normalized_sentence_annotations(normalized_node_sentence_annotation,
54 | normalized_edge_sentence_annotation):
55 | norm_node_ann = NormalizedUDSAnnotation.from_json(normalized_node_sentence_annotation)
56 | norm_edge_ann = NormalizedUDSAnnotation.from_json(normalized_edge_sentence_annotation)
57 |
58 | return norm_node_ann, norm_edge_ann
59 |
60 | @pytest.fixture
61 | def raw_node_sentence_annotation(test_data_dir):
62 | fpath = os.path.join(test_data_dir, 'raw_node_sentence_annotation.json')
63 |
64 | with open(fpath) as f:
65 | return f.read()
66 |
67 | @pytest.fixture
68 | def raw_edge_sentence_annotation(test_data_dir):
69 | fpath = os.path.join(test_data_dir, 'raw_edge_sentence_annotation.json')
70 |
71 | with open(fpath) as f:
72 | return f.read()
73 |
74 | @pytest.fixture
75 | def raw_sentence_annotations(raw_node_sentence_annotation,
76 | raw_edge_sentence_annotation):
77 | raw_node_ann = RawUDSAnnotation.from_json(raw_node_sentence_annotation)
78 | raw_edge_ann = RawUDSAnnotation.from_json(raw_edge_sentence_annotation)
79 |
80 | return raw_node_ann, raw_edge_ann
81 |
--------------------------------------------------------------------------------
/tests/data/normalized_edge_document_annotation.json:
--------------------------------------------------------------------------------
1 | {"answers-20111105112131AA6gIX6_ans": {"ewt-train-7192-document-pred-20%%ewt-train-7192-document-arg-2": {"protoroles": {"instigation": {"confidence": 1.0, "value": -0.0}, "change_of_possession": {"confidence": 1.0, "value": -0.0}, "existed_before": {"confidence": 0.6796, "value": 0.0111}, "was_for_benefit": {"confidence": 1.0, "value": -0.0}, "change_of_state_continuous": {"confidence": 0.1675, "value": 0.0032}, "change_of_state": {"confidence": 0.1675, "value": 0.0032}, "volition": {"confidence": 1.0, "value": -0.0}, "change_of_location": {"confidence": 1.0, "value": -0.0}, "partitive": {"confidence": 0.564, "value": -0.0941}, "existed_during": {"confidence": 1.0, "value": 1.3421}, "existed_after": {"confidence": 0.6796, "value": 0.0111}, "awareness": {"confidence": 1.0, "value": -0.0}, "sentient": {"confidence": 1.0, "value": -0.9348}, "was_used": {"confidence": 0.564, "value": -0.0}}}, "ewt-train-7192-document-pred-25%%ewt-train-7191-document-arg-18": {"protoroles": {"instigation": {"confidence": 1.0, "value": 1.3557}, "change_of_possession": {"confidence": 0.7724, "value": -0.0}, "existed_before": {"confidence": 1.0, "value": 1.3527}, "was_for_benefit": {"confidence": 0.1976, "value": -0.0504}, "change_of_state_continuous": {"confidence": 1.0, "value": -0.0}, "change_of_state": {"confidence": 0.2067, "value": -0.0548}, "volition": {"confidence": 1.0, "value": 1.3545}, "change_of_location": {"confidence": 0.272, "value": -0.0922}, "partitive": {"confidence": 0.1148, "value": -0.0018}, "existed_during": {"confidence": 1.0, "value": 1.3557}, "existed_after": {"confidence": 1.0, "value": 1.3527}, "awareness": {"confidence": 1.0, "value": 1.3526}, "sentient": {"confidence": 1.0, "value": 1.354}, "was_used": {"confidence": 0.4373, "value": -0.0207}}}, "ewt-train-7192-document-pred-20%%ewt-train-7190-document-arg-3": {"protoroles": {"instigation": {"confidence": 1.0, "value": -1.5074}, "change_of_possession": {"confidence": 1.0, "value": -0.3909}, "existed_before": {"confidence": 1.0, "value": 1.3954}, "was_for_benefit": {"confidence": 0.3418, "value": 0.0008}, "change_of_state_continuous": {"confidence": 0.0791, "value": -0.0351}, "change_of_state": {"confidence": 0.3333, "value": -0.0085}, "volition": {"confidence": 1.0, "value": -0.3909}, "change_of_location": {"confidence": 0.1395, "value": -0.0549}, "partitive": {"confidence": 0.0791, "value": -0.1354}, "existed_during": {"confidence": 1.0, "value": 1.3959}, "existed_after": {"confidence": 0.6567, "value": 0.124}, "awareness": {"confidence": 0.1395, "value": -0.0549}, "sentient": {"confidence": 1.0, "value": -1.508}, "was_used": {"confidence": 0.3333, "value": -0.0085}}}}}
2 |
--------------------------------------------------------------------------------
/tests/data/normalized_edge_sentence_annotation.json:
--------------------------------------------------------------------------------
1 | {"metadata": {"protoroles": {"awareness": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_location": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_possession": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_state": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_state_continuous": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "existed_after": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "existed_before": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "existed_during": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "instigation": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "location": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "manner": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "partitive": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "purpose": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "sentient": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "time": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "volition": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "was_for_benefit": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "was_used": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}}}, "data": {"tree1": {"tree1-semantics-pred-11%%tree1-semantics-arg-13": {"protoroles": {"instigation": {"confidence": 1.0, "value": -0.0}, "change_of_possession": {"confidence": 1.0, "value": -0.0}, "existed_before": {"confidence": 0.6796, "value": 0.0111}, "was_for_benefit": {"confidence": 1.0, "value": -0.0}, "change_of_state_continuous": {"confidence": 0.1675, "value": 0.0032}, "change_of_state": {"confidence": 0.1675, "value": 0.0032}, "volition": {"confidence": 1.0, "value": -0.0}, "change_of_location": {"confidence": 1.0, "value": -0.0}, "partitive": {"confidence": 0.564, "value": -0.0941}, "existed_during": {"confidence": 1.0, "value": 1.3421}, "existed_after": {"confidence": 0.6796, "value": 0.0111}, "awareness": {"confidence": 1.0, "value": -0.0}, "sentient": {"confidence": 1.0, "value": -0.9348}, "was_used": {"confidence": 0.564, "value": -0.0}}}, "tree1-semantics-pred-7%%tree1-semantics-arg-3": {"protoroles": {"instigation": {"confidence": 1.0, "value": 1.3557}, "change_of_possession": {"confidence": 0.7724, "value": -0.0}, "existed_before": {"confidence": 1.0, "value": 1.3527}, "was_for_benefit": {"confidence": 0.1976, "value": -0.0504}, "change_of_state_continuous": {"confidence": 1.0, "value": -0.0}, "change_of_state": {"confidence": 0.2067, "value": -0.0548}, "volition": {"confidence": 1.0, "value": 1.3545}, "change_of_location": {"confidence": 0.272, "value": -0.0922}, "partitive": {"confidence": 0.1148, "value": -0.0018}, "existed_during": {"confidence": 1.0, "value": 1.3557}, "existed_after": {"confidence": 1.0, "value": 1.3527}, "awareness": {"confidence": 1.0, "value": 1.3526}, "sentient": {"confidence": 1.0, "value": 1.354}, "was_used": {"confidence": 0.4373, "value": -0.0207}}}, "tree1-semantics-pred-11%%tree1-semantics-arg-9": {"protoroles": {"instigation": {"confidence": 1.0, "value": -1.5074}, "change_of_possession": {"confidence": 1.0, "value": -0.3909}, "existed_before": {"confidence": 1.0, "value": 1.3954}, "was_for_benefit": {"confidence": 0.3418, "value": 0.0008}, "change_of_state_continuous": {"confidence": 0.0791, "value": -0.0351}, "change_of_state": {"confidence": 0.3333, "value": -0.0085}, "volition": {"confidence": 1.0, "value": -0.3909}, "change_of_location": {"confidence": 0.1395, "value": -0.0549}, "partitive": {"confidence": 0.0791, "value": -0.1354}, "existed_during": {"confidence": 1.0, "value": 1.3959}, "existed_after": {"confidence": 0.6567, "value": 0.124}, "awareness": {"confidence": 0.1395, "value": -0.0549}, "sentient": {"confidence": 1.0, "value": -1.508}, "was_used": {"confidence": 0.3333, "value": -0.0085}}}}}}
2 |
--------------------------------------------------------------------------------
/tests/data/normalized_node_document_annotation.json:
--------------------------------------------------------------------------------
1 | {"answers-20111105112131AA6gIX6_ans": {"ewt-train-7189-document-arg-2": {"genericity": {"arg-kind": {"confidence": 1.0, "value": 1.1619}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7192-document-pred-25": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.54}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "ewt-train-7191-document-arg-18": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7192-document-pred-20": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.5399}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "ewt-train-7192-document-pred-20": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": -1.5399}, "pred-hypothetical": {"confidence": 1.0, "value": 0.7748}, "pred-particular": {"confidence": 1.0, "value": -1.54}}}, "ewt-train-7194-document-arg-13": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7194-document-arg-1": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7192-document-arg-2": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}}}
2 |
--------------------------------------------------------------------------------
/tests/data/normalized_node_sentence_annotation.json:
--------------------------------------------------------------------------------
1 | {"metadata": {"genericity": {"pred-dynamic": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "pred-hypothetical": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "pred-particular": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "arg-abstract": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "arg-kind": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "arg-particular": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}}}, "data": {"tree1": {"tree1-semantics-arg-15": {"genericity": {"arg-kind": {"confidence": 1.0, "value": 1.1619}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-pred-7": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.54}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "tree1-semantics-arg-3": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-pred-11": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.5399}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "tree1-semantics-pred-20": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": -1.5399}, "pred-hypothetical": {"confidence": 1.0, "value": 0.7748}, "pred-particular": {"confidence": 1.0, "value": -1.54}}}, "tree1-semantics-arg-23": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-arg-9": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-arg-13": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}}}}
2 |
--------------------------------------------------------------------------------
/tests/data/raw_edge_sentence_annotators.txt:
--------------------------------------------------------------------------------
1 | protoroles-annotator-0
2 | protoroles-annotator-1
3 | protoroles-annotator-10
4 | protoroles-annotator-11
5 | protoroles-annotator-12
6 | protoroles-annotator-13
7 | protoroles-annotator-14
8 | protoroles-annotator-15
9 | protoroles-annotator-16
10 | protoroles-annotator-17
11 | protoroles-annotator-18
12 | protoroles-annotator-19
13 | protoroles-annotator-2
14 | protoroles-annotator-20
15 | protoroles-annotator-21
16 | protoroles-annotator-22
17 | protoroles-annotator-23
18 | protoroles-annotator-24
19 | protoroles-annotator-25
20 | protoroles-annotator-26
21 | protoroles-annotator-27
22 | protoroles-annotator-28
23 | protoroles-annotator-29
24 | protoroles-annotator-3
25 | protoroles-annotator-30
26 | protoroles-annotator-31
27 | protoroles-annotator-32
28 | protoroles-annotator-33
29 | protoroles-annotator-34
30 | protoroles-annotator-35
31 | protoroles-annotator-36
32 | protoroles-annotator-37
33 | protoroles-annotator-38
34 | protoroles-annotator-39
35 | protoroles-annotator-4
36 | protoroles-annotator-40
37 | protoroles-annotator-41
38 | protoroles-annotator-42
39 | protoroles-annotator-43
40 | protoroles-annotator-44
41 | protoroles-annotator-45
42 | protoroles-annotator-5
43 | protoroles-annotator-6
44 | protoroles-annotator-7
45 | protoroles-annotator-8
46 | protoroles-annotator-9
47 |
--------------------------------------------------------------------------------
/tests/data/rawtree.conllu:
--------------------------------------------------------------------------------
1 | 1 The the DET DT Definite=Def|PronType=Art 3 det _ _
2 | 2 police police NOUN NN Number=Sing 3 compound _ _
3 | 3 commander commander NOUN NN Number=Sing 7 nsubj _ _
4 | 4 of of ADP IN _ 6 case _ _
5 | 5 Ninevah Ninevah PROPN NNP Number=Sing 6 compound _ _
6 | 6 Province Province PROPN NNP Number=Sing 3 nmod _ _
7 | 7 announced announce VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 0 root _ _
8 | 8 that that SCONJ IN _ 11 mark _ _
9 | 9 bombings bombing NOUN NNS Number=Plur 11 nsubj _ _
10 | 10 had have AUX VBD Mood=Ind|Tense=Past|VerbForm=Fin 11 aux _ _
11 | 11 declined decline VERB VBN Tense=Past|VerbForm=Part 7 ccomp _ _
12 | 12 80 80 NUM CD NumType=Card 13 nummod _ _
13 | 13 percent percent NOUN NN Number=Sing 11 dobj _ _
14 | 14 in in ADP IN _ 15 case _ _
15 | 15 Mosul Mosul PROPN NNP Number=Sing 11 nmod _ SpaceAfter=No
16 | 16 , , PUNCT , _ 11 punct _ _
17 | 17 whereas whereas SCONJ IN _ 20 mark _ _
18 | 18 there there PRON EX _ 20 expl _ _
19 | 19 had have AUX VBD Mood=Ind|Tense=Past|VerbForm=Fin 20 aux _ _
20 | 20 been be VERB VBN Tense=Past|VerbForm=Part 11 advcl _ _
21 | 21 a a DET DT Definite=Ind|PronType=Art 23 det _ _
22 | 22 big big ADJ JJ Degree=Pos 23 amod _ _
23 | 23 jump jump NOUN NN Number=Sing 20 nsubj _ _
24 | 24 in in ADP IN _ 26 case _ _
25 | 25 the the DET DT Definite=Def|PronType=Art 26 det _ _
26 | 26 number number NOUN NN Number=Sing 23 nmod _ _
27 | 27 of of ADP IN _ 28 case _ _
28 | 28 kidnappings kidnapping NOUN NNS Number=Plur 26 nmod _ SpaceAfter=No
29 | 29 . . PUNCT . _ 7 punct _ _
--------------------------------------------------------------------------------
/tests/data/vis_data.json:
--------------------------------------------------------------------------------
1 | {"directed": true, "multigraph": false, "graph": [["name", "ewt-dev-1"]], "nodes": [{"domain": "syntax", "type": "token", "position": 1, "form": "From", "lemma": "from", "upos": "ADP", "xpos": "IN", "id": "ewt-dev-1-syntax-1"}, {"domain": "syntax", "type": "token", "position": 2, "form": "the", "lemma": "the", "upos": "DET", "xpos": "DT", "Definite": "Def", "PronType": "Art", "id": "ewt-dev-1-syntax-2"}, {"domain": "syntax", "type": "token", "position": 3, "form": "AP", "lemma": "AP", "upos": "PROPN", "xpos": "NNP", "Number": "Sing", "id": "ewt-dev-1-syntax-3"}, {"domain": "syntax", "type": "token", "position": 4, "form": "comes", "lemma": "come", "upos": "VERB", "xpos": "VBZ", "Mood": "Ind", "Number": "Sing", "Person": "3", "Tense": "Pres", "VerbForm": "Fin", "id": "ewt-dev-1-syntax-4"}, {"domain": "syntax", "type": "token", "position": 5, "form": "this", "lemma": "this", "upos": "DET", "xpos": "DT", "Number": "Sing", "PronType": "Dem", "id": "ewt-dev-1-syntax-5"}, {"domain": "syntax", "type": "token", "position": 6, "form": "story", "lemma": "story", "upos": "NOUN", "xpos": "NN", "Number": "Sing", "id": "ewt-dev-1-syntax-6"}, {"domain": "syntax", "type": "token", "position": 7, "form": ":", "lemma": ":", "upos": "PUNCT", "xpos": ":", "id": "ewt-dev-1-syntax-7"}, {"position": 0, "domain": "root", "type": "root", "id": "ewt-dev-1-root-0"}, {"domain": "semantics", "frompredpatt": true, "type": "predicate", "factuality": {"factual": {"confidence": 1.0, "value": 0.967}}, "time": {"dur-weeks": {"confidence": 0.2564, "value": -1.3247}, "dur-decades": {"confidence": 0.2564, "value": -1.1146}, "dur-days": {"confidence": 0.2564, "value": 0.8558}, "dur-hours": {"confidence": 0.2564, "value": 0.9952}, "dur-seconds": {"confidence": 0.2564, "value": 0.8931}, "dur-forever": {"confidence": 0.2564, "value": -1.4626}, "dur-centuries": {"confidence": 0.2564, "value": -1.1688}, "dur-instant": {"confidence": 0.2564, "value": -1.4106}, "dur-years": {"confidence": 0.2564, "value": 0.9252}, "dur-minutes": {"confidence": 0.2564, "value": -0.9337}, "dur-months": {"confidence": 0.2564, "value": -1.2142}}, "genericity": {"pred-dynamic": {"confidence": 0.627, "value": -0.0469}, "pred-hypothetical": {"confidence": 0.5067, "value": -0.0416}, "pred-particular": {"confidence": 1.0, "value": 1.1753}}, "id": "ewt-dev-1-semantics-pred-4"}, {"domain": "semantics", "frompredpatt": true, "type": "argument", "genericity": {"arg-kind": {"confidence": 1.0, "value": -1.1642}, "arg-abstract": {"confidence": 1.0, "value": -1.1642}, "arg-particular": {"confidence": 1.0, "value": 1.2257}}, "id": "ewt-dev-1-semantics-arg-3"}, {"domain": "semantics", "frompredpatt": true, "type": "argument", "wordsense": {"supersense-noun.object": {"confidence": 1.0, "value": -3.0}, "supersense-noun.Tops": {"confidence": 1.0, "value": -3.0}, "supersense-noun.quantity": {"confidence": 1.0, "value": -3.0}, "supersense-noun.feeling": {"confidence": 1.0, "value": -3.0}, "supersense-noun.food": {"confidence": 1.0, "value": -3.0}, "supersense-noun.shape": {"confidence": 1.0, "value": -3.0}, "supersense-noun.event": {"confidence": 1.0, "value": -3.0}, "supersense-noun.motive": {"confidence": 1.0, "value": -3.0}, "supersense-noun.substance": {"confidence": 1.0, "value": -3.0}, "supersense-noun.time": {"confidence": 1.0, "value": -3.0}, "supersense-noun.person": {"confidence": 1.0, "value": -3.0}, "supersense-noun.process": {"confidence": 1.0, "value": -3.0}, "supersense-noun.attribute": {"confidence": 1.0, "value": -3.0}, "supersense-noun.artifact": {"confidence": 1.0, "value": -1.3996}, "supersense-noun.group": {"confidence": 1.0, "value": -3.0}, "supersense-noun.animal": {"confidence": 1.0, "value": -3.0}, "supersense-noun.location": {"confidence": 1.0, "value": -3.0}, "supersense-noun.plant": {"confidence": 1.0, "value": -3.0}, "supersense-noun.possession": {"confidence": 1.0, "value": -3.0}, "supersense-noun.relation": {"confidence": 1.0, "value": -3.0}, "supersense-noun.phenomenon": {"confidence": 1.0, "value": -3.0}, "supersense-noun.cognition": {"confidence": 1.0, "value": -3.0}, "supersense-noun.act": {"confidence": 1.0, "value": -3.0}, "supersense-noun.state": {"confidence": 1.0, "value": -3.0}, "supersense-noun.communication": {"confidence": 1.0, "value": 0.2016}, "supersense-noun.body": {"confidence": 1.0, "value": -3.0}}, "genericity": {"arg-kind": {"confidence": 0.7138, "value": -0.035}, "arg-abstract": {"confidence": 1.0, "value": -1.1685}, "arg-particular": {"confidence": 1.0, "value": 1.2257}}, "id": "ewt-dev-1-semantics-arg-6"}, {"domain": "semantics", "type": "predicate", "frompredpatt": false, "id": "ewt-dev-1-semantics-pred-root"}, {"domain": "semantics", "type": "argument", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-0"}, {"domain": "semantics", "type": "argument", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-author"}, {"domain": "semantics", "type": "argument", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-addressee"}], "adjacency": [[], [], [{"deprel": "case", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-1"}, {"deprel": "det", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-2"}], [{"deprel": "nmod", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-3"}, {"deprel": "nsubj", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-6"}, {"deprel": "punct", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-7"}], [], [{"deprel": "det", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-5"}], [], [{"deprel": "root", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-4"}], [{"domain": "interface", "type": "head", "id": "ewt-dev-1-syntax-4"}, {"domain": "interface", "type": "nonhead", "id": "ewt-dev-1-syntax-1"}, {"domain": "semantics", "type": "dependency", "frompredpatt": true, "protoroles": {"manner": {"confidence": 1.0, "value": -1.3932}, "location": {"confidence": 1.0, "value": 1.4353}, "time": {"confidence": 1.0, "value": -1.3913}, "purpose": {"confidence": 1.0, "value": -1.3941}}, "id": "ewt-dev-1-semantics-arg-3"}, {"domain": "semantics", "type": "dependency", "frompredpatt": true, "protoroles": {"instigation": {"confidence": 0.1128, "value": 0.0458}, "change_of_possession": {"confidence": 0.7669, "value": -0.0561}, "existed_before": {"confidence": 0.1128, "value": 0.1096}, "was_for_benefit": {"confidence": 0.7669, "value": -0.1343}, "change_of_state_continuous": {"confidence": 1.0, "value": -0.0}, "change_of_state": {"confidence": 0.7669, "value": -0.1343}, "volition": {"confidence": 0.3073, "value": -0.0}, "change_of_location": {"confidence": 0.7669, "value": -0.0561}, "partitive": {"confidence": 0.5736, "value": -0.2656}, "existed_during": {"confidence": 0.4211, "value": 0.236}, "existed_after": {"confidence": 0.4211, "value": 0.236}, "awareness": {"confidence": 0.7669, "value": -0.0}, "sentient": {"confidence": 0.4612, "value": -0.3556}, "was_used": {"confidence": 0.013, "value": -0.0204}}, "id": "ewt-dev-1-semantics-arg-6"}], [{"domain": "interface", "type": "head", "id": "ewt-dev-1-syntax-3"}, {"domain": "interface", "type": "nonhead", "id": "ewt-dev-1-syntax-2"}], [{"domain": "interface", "type": "head", "id": "ewt-dev-1-syntax-6"}, {"domain": "interface", "type": "nonhead", "id": "ewt-dev-1-syntax-5"}], [{"domain": "semantics", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-0"}, {"domain": "semantics", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-author"}, {"domain": "semantics", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-addressee"}], [{"domain": "semantics", "type": "head", "frompredpatt": false, "id": "ewt-dev-1-semantics-pred-4"}, {"domain": "interface", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-root-0"}], [], []]}
--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest==6.0.*
2 |
--------------------------------------------------------------------------------
/tests/test_dependency.py:
--------------------------------------------------------------------------------
1 | from numpy import array
2 | from networkx import DiGraph
3 | from decomp.syntax.dependency import DependencyGraphBuilder, CoNLLDependencyTreeCorpus
4 |
5 | rawtree = '''1 I I PRON PRP Case=Nom|Number=Sing|Person=1|PronType=Prs 4 nsubj _ _
6 | 2 ca can AUX MD VerbForm=Fin 4 aux _ SpaceAfter=No
7 | 3 n't not PART RB _ 4 advmod _ _
8 | 4 imagine imagine VERB VB VerbForm=Inf 0 root _ _
9 | 5 they they PRON PRP Case=Nom|Number=Plur|Person=3|PronType=Prs 6 nsubj _ _
10 | 6 wanted want VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 4 ccomp _ _
11 | 7 to to PART TO _ 8 mark _ _
12 | 8 do do VERB VB VerbForm=Inf 6 xcomp _ _
13 | 9 this this PRON DT Number=Sing|PronType=Dem 8 obj _ SpaceAfter=No
14 | 10 . . PUNCT . _ 4 punct _ _'''
15 |
16 | sentence = "I ca n't imagine they wanted to do this ."
17 |
18 | listtree = [l.split() for l in rawtree.split('\n')]
19 |
20 |
21 | def setup_tree():
22 | # build and extract tree
23 | graph = DependencyGraphBuilder().from_conll(listtree, 'tree1')
24 |
25 | return graph
26 |
27 |
28 | def setup_corpus():
29 | listtrees = {'tree1': listtree,
30 | 'tree2': listtree}
31 |
32 | corpus = CoNLLDependencyTreeCorpus(listtrees)
33 |
34 | return corpus
35 |
36 |
37 | # could use @nose.with_setup
38 | def test_dependency_tree_builder():
39 | tree = setup_tree()
40 |
41 | assert tree.name == 'tree1'
42 | assert (tree.graph['conll'] == array(listtree)).all()
43 |
44 | print(tree.nodes['tree1-root-0'])
45 | # test the root
46 | # test syntax nodes
47 | assert tree.nodes['tree1-root-0'] == {'position': 0,
48 | 'domain': 'root',
49 | 'type': 'root'}
50 |
51 | for idx, node in tree.nodes.items():
52 | for row in listtree:
53 | if int(row[0]) == idx:
54 | assert node['form'] == row[1]
55 | assert node['lemma'] == row[2]
56 | assert node['upos'] == row[3]
57 | assert node['xpos'] == row[4]
58 |
59 | for (idx1, idx2), edge in tree.edges.items():
60 | for row in listtree:
61 | if int(row[0]) == idx2:
62 | assert int(row[6]) == idx1
63 | assert row[7] == edge['deprel']
64 |
65 |
66 | def test_dependency_tree_corpus():
67 | corpus = setup_corpus()
68 |
69 | assert all([isinstance(t, DiGraph) for gid, t in corpus.graphs.items()])
70 | assert all([isinstance(t, DiGraph) for gid, t in corpus.items()])
71 | assert all([isinstance(gid, str) for gid in corpus])
72 |
--------------------------------------------------------------------------------
/tests/test_predpatt.py:
--------------------------------------------------------------------------------
1 | from io import StringIO
2 | from networkx import DiGraph
3 | from predpatt import load_conllu, PredPatt, PredPattOpts
4 | from decomp.syntax.dependency import DependencyGraphBuilder
5 | from decomp.semantics.predpatt import PredPattCorpus, PredPattGraphBuilder
6 |
7 | rawtree = '''1 The the DET DT Definite=Def|PronType=Art 3 det _ _
8 | 2 police police NOUN NN Number=Sing 3 compound _ _
9 | 3 commander commander NOUN NN Number=Sing 7 nsubj _ _
10 | 4 of of ADP IN _ 6 case _ _
11 | 5 Ninevah Ninevah PROPN NNP Number=Sing 6 compound _ _
12 | 6 Province Province PROPN NNP Number=Sing 3 nmod _ _
13 | 7 announced announce VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 0 root _ _
14 | 8 that that SCONJ IN _ 11 mark _ _
15 | 9 bombings bombing NOUN NNS Number=Plur 11 nsubj _ _
16 | 10 had have AUX VBD Mood=Ind|Tense=Past|VerbForm=Fin 11 aux _ _
17 | 11 declined decline VERB VBN Tense=Past|VerbForm=Part 7 ccomp _ _
18 | 12 80 80 NUM CD NumType=Card 13 nummod _ _
19 | 13 percent percent NOUN NN Number=Sing 11 dobj _ _
20 | 14 in in ADP IN _ 15 case _ _
21 | 15 Mosul Mosul PROPN NNP Number=Sing 11 nmod _ SpaceAfter=No
22 | 16 , , PUNCT , _ 11 punct _ _
23 | 17 whereas whereas SCONJ IN _ 20 mark _ _
24 | 18 there there PRON EX _ 20 expl _ _
25 | 19 had have AUX VBD Mood=Ind|Tense=Past|VerbForm=Fin 20 aux _ _
26 | 20 been be VERB VBN Tense=Past|VerbForm=Part 11 advcl _ _
27 | 21 a a DET DT Definite=Ind|PronType=Art 23 det _ _
28 | 22 big big ADJ JJ Degree=Pos 23 amod _ _
29 | 23 jump jump NOUN NN Number=Sing 20 nsubj _ _
30 | 24 in in ADP IN _ 26 case _ _
31 | 25 the the DET DT Definite=Def|PronType=Art 26 det _ _
32 | 26 number number NOUN NN Number=Sing 23 nmod _ _
33 | 27 of of ADP IN _ 28 case _ _
34 | 28 kidnappings kidnapping NOUN NNS Number=Plur 26 nmod _ SpaceAfter=No
35 | 29 . . PUNCT . _ 7 punct _ _'''
36 |
37 | sentence = 'The police commander of Ninevah Province announced that bombings had declined 80 percent in Mosul , whereas there had been a big jump in the number of kidnappings .'
38 |
39 | listtree = [l.split() for l in rawtree.split('\n')]
40 |
41 | def setup_graph():
42 | ud = DependencyGraphBuilder.from_conll(listtree, 'tree1')
43 |
44 | pp = PredPatt(next(load_conllu(rawtree))[1],
45 | opts=PredPattOpts(resolve_relcl=True,
46 | borrow_arg_for_relcl=True,
47 | resolve_conj=False,
48 | cut=True))
49 |
50 | graph = PredPattGraphBuilder.from_predpatt(pp, ud, 'tree1')
51 |
52 | return pp, graph
53 |
54 | def setup_corpus_from_str():
55 | return PredPattCorpus.from_conll(rawtree)
56 |
57 | def setup_corpus_from_io():
58 | rawfile = StringIO(rawtree)
59 | return PredPattCorpus.from_conll(rawfile)
60 |
61 | ## could use @nose.with_setup
62 | def test_predpatt_graph_builder():
63 | pp, pp_graph = setup_graph()
64 |
65 | assert pp_graph.name == 'tree1'
66 | assert all(['tree1' in nodeid
67 | for nodeid in pp_graph.nodes])
68 |
69 | # test syntax nodes
70 | print(pp_graph.nodes['tree1-root-0'])
71 | assert pp_graph.nodes['tree1-root-0'] == {'position': 0,
72 | 'domain': 'root',
73 | 'type': 'root'}
74 |
75 | for idx, node in pp_graph.nodes.items():
76 | if 'syntax' in idx:
77 | idx = idx.split('-')[-1]
78 | for row in listtree:
79 | if int(row[0]) == idx:
80 | assert node['form'] == row[1]
81 | assert node['lemma'] == row[2]
82 | assert node['upos'] == row[3]
83 | assert node['xpos'] == row[4]
84 |
85 | for (idx1, idx2), edge in pp_graph.edges.items():
86 | if 'syntax' in idx1 and 'syntax' in idx2:
87 | idx1, idx2 = idx1.split('-')[-1], idx2.split('-')[-1]
88 | for row in listtree:
89 | if int(row[0]) == idx2:
90 | assert int(row[6]) == idx1
91 | assert row[7] == edge['deprel']
92 |
93 | # test semantics nodes
94 | assert 'tree1-semantics-pred-0' not in pp_graph.nodes
95 | assert 'tree1-semantics-arg-0' not in pp_graph.nodes
96 |
97 | assert all(['arg' in nodeid or 'pred' in nodeid
98 | for nodeid in pp_graph.nodes
99 | if 'semantics' in nodeid])
100 |
101 | assert all(['domain' in pp_graph.nodes[nodeid]
102 | for nodeid in pp_graph.nodes
103 | if 'semantics' in nodeid])
104 |
105 | assert all([pp_graph.nodes[nodeid]['domain'] == 'semantics'
106 | for nodeid in pp_graph.nodes
107 | if 'semantics' in nodeid])
108 |
109 | assert all(['type' in pp_graph.nodes[nodeid]
110 | for nodeid in pp_graph.nodes
111 | if 'semantics' in nodeid])
112 |
113 | assert all([pp_graph.nodes[nodeid]['type'] in ['argument', 'predicate']
114 | for nodeid in pp_graph.nodes
115 | if 'semantics' in nodeid])
116 |
117 | assert all([('arg' in nodeid) ==
118 | (pp_graph.nodes[nodeid]['type'] == 'argument')
119 | for nodeid in pp_graph.nodes
120 | if 'semantics' in nodeid])
121 |
122 | assert all([('pred' in nodeid) ==
123 | (pp_graph.nodes[nodeid]['type'] == 'predicate')
124 | for nodeid in pp_graph.nodes
125 | if 'semantics' in nodeid])
126 |
127 | assert all(['arg' not in nodeid and 'pred' not in nodeid
128 | for nodeid in pp_graph.nodes
129 | if 'syntax' in nodeid])
130 |
131 | # test argument edges
132 | assert all([pp_graph.edges[(nodeid2, nodeid1)]['domain'] == 'semantics' and
133 | pp_graph.edges[(nodeid2, nodeid1)]['type'] == 'dependency'
134 | for nodeid1, node1 in pp_graph.nodes.items()
135 | for nodeid2 in pp_graph.nodes
136 | if 'semantics-arg' in nodeid1
137 | if 'semantics-pred' in nodeid2
138 | if (nodeid2, nodeid1) in pp_graph.edges])
139 |
140 | # tests subpredicate edges
141 | subprededge = ('tree1-semantics-arg-11', 'tree1-semantics-pred-11')
142 | assert pp_graph.edges[subprededge]['domain'] == 'semantics'
143 | assert pp_graph.edges[subprededge]['type'] == 'head'
144 |
145 | assert all([(nodeid2, nodeid1) in pp_graph.edges and
146 | pp_graph.edges[(nodeid2, nodeid1)]['domain'] == 'semantics' and
147 | pp_graph.edges[(nodeid2, nodeid1)]['type'] == 'head'
148 | for nodeid1, node1 in pp_graph.nodes.items()
149 | for nodeid2 in pp_graph.nodes
150 | if 'semantics-pred' in nodeid1
151 | if 'semantics-arg' in nodeid2
152 | if nodeid1.split('-')[-1] == nodeid2.split('-')[-1]])
153 |
154 | def test_predpatt_corpus():
155 | corpus = setup_corpus_from_str()
156 |
157 | assert all([isinstance(t, DiGraph) for gid, t in corpus.graphs.items()])
158 | assert all([isinstance(t, DiGraph) for gid, t in corpus.items()])
159 | assert all([isinstance(gid, str) for gid in corpus])
160 |
161 | corpus = setup_corpus_from_io()
162 |
163 | assert all([isinstance(t, DiGraph) for gid, t in corpus.graphs.items()])
164 | assert all([isinstance(t, DiGraph) for gid, t in corpus.items()])
165 | assert all([isinstance(gid, str) for gid in corpus])
166 |
--------------------------------------------------------------------------------
/tests/test_uds_annotation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import os, json
4 |
5 | from pprint import pprint
6 |
7 | from decomp.semantics.uds.metadata import UDSAnnotationMetadata
8 | from decomp.semantics.uds.annotation import UDSAnnotation
9 | from decomp.semantics.uds.annotation import NormalizedUDSAnnotation
10 | from decomp.semantics.uds.annotation import RawUDSAnnotation
11 |
12 | class TestUDSAnnotation:
13 |
14 | def test_direct_instantiation_of_uds_annotation_fails(self):
15 | with pytest.raises(TypeError):
16 | UDSAnnotation(None)
17 |
18 | class TestNormalizedUDSAnnotation:
19 |
20 | def test_from_json(self,
21 | normalized_node_sentence_annotation,
22 | normalized_edge_sentence_annotation,
23 | normalized_sentence_annotations):
24 | norm_node_ann, norm_edge_ann = normalized_sentence_annotations
25 | norm_node_ann_direct = json.loads(normalized_node_sentence_annotation)
26 | norm_edge_ann_direct = json.loads(normalized_edge_sentence_annotation)
27 |
28 | assert norm_node_ann.metadata == UDSAnnotationMetadata.from_dict(norm_node_ann_direct['metadata'])
29 | assert norm_edge_ann.metadata == UDSAnnotationMetadata.from_dict(norm_edge_ann_direct['metadata'])
30 |
31 | assert all([not edge_attrs
32 | for n, (node_attrs, edge_attrs) in norm_node_ann.items()])
33 |
34 | assert all([norm_node_ann_direct['data']['tree1'][k] == v
35 | for n, (node_attrs, edge_attrs) in norm_node_ann.items()
36 | for k, v in node_attrs.items()])
37 |
38 | assert all([not node_attrs
39 | for n, (node_attrs, edge_attrs) in norm_edge_ann.items()])
40 |
41 | assert all([norm_edge_ann_direct['data']['tree1']['%%'.join(k)] == v
42 | for n, (node_attrs, edge_attrs) in norm_edge_ann.items()
43 | for k, v in edge_attrs.items()])
44 |
45 | class TestRawUDSAnnotation:
46 |
47 | def test_from_json(self,
48 | raw_node_sentence_annotation,
49 | raw_edge_sentence_annotation,
50 | raw_sentence_annotations):
51 | raw_node_ann, raw_edge_ann = raw_sentence_annotations
52 | raw_node_ann_direct = json.loads(raw_node_sentence_annotation)
53 | raw_edge_ann_direct = json.loads(raw_edge_sentence_annotation)
54 |
55 | assert raw_node_ann.metadata == UDSAnnotationMetadata.from_dict(raw_node_ann_direct['metadata'])
56 | assert raw_edge_ann.metadata == UDSAnnotationMetadata.from_dict(raw_edge_ann_direct['metadata'])
57 |
58 | assert all([not edge_attrs
59 | for n, (node_attrs, edge_attrs) in raw_node_ann.items()])
60 |
61 | assert all([raw_node_ann_direct['data']['tree1'][k] == v
62 | for n, (node_attrs, edge_attrs) in raw_node_ann.items()
63 | for k, v in node_attrs.items()])
64 |
65 | assert all([not node_attrs
66 | for n, (node_attrs, edge_attrs) in raw_edge_ann.items()])
67 |
68 | assert all([raw_edge_ann_direct['data']['tree1']['%%'.join(k)] == v
69 | for n, (node_attrs, edge_attrs) in raw_edge_ann.items()
70 | for k, v in edge_attrs.items()])
71 |
72 |
73 | def test_annotators(self, raw_sentence_annotations, test_data_dir):
74 | raw_node_ann, raw_edge_ann = raw_sentence_annotations
75 |
76 | with open(os.path.join(test_data_dir, 'raw_node_sentence_annotators.txt')) as f:
77 | assert raw_node_ann.annotators() == {line.strip() for line in f}
78 |
79 | with open(os.path.join(test_data_dir, 'raw_edge_sentence_annotators.txt')) as f:
80 | assert raw_edge_ann.annotators() == {line.strip() for line in f}
81 |
82 | def test_items(self, raw_sentence_annotations):
83 | raw_node_ann, raw_edge_ann = raw_sentence_annotations
84 |
85 | # verify that items by annotator generator works
86 | for gid, (node_attrs, edge_attrs) in raw_node_ann.items(annotator_id='genericity-pred-annotator-88'):
87 | assert gid == 'tree1'
88 | assert json.dumps(node_attrs) == '{"tree1-semantics-pred-7": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-11": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-20": {"genericity": {"pred-dynamic": {"confidence": 0, "value": 1}, "pred-hypothetical": {"confidence": 0, "value": 1}, "pred-particular": {"confidence": 0, "value": 1}}}}'
89 | assert json.dumps(edge_attrs) == '{}'
90 |
91 | # verify that node attribute-only generator works
92 | for gid, node_attrs in raw_node_ann.items(annotation_type="node",
93 | annotator_id='genericity-pred-annotator-88'):
94 | assert gid == 'tree1'
95 | assert json.dumps(node_attrs) == '{"tree1-semantics-pred-7": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-11": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-20": {"genericity": {"pred-dynamic": {"confidence": 0, "value": 1}, "pred-hypothetical": {"confidence": 0, "value": 1}, "pred-particular": {"confidence": 0, "value": 1}}}}'
96 |
97 | # generator for edge attributes for the node attribute-only annotation
98 | # should yield empty results for the graph
99 | with pytest.raises(ValueError):
100 | for gid, edge_attrs in raw_node_ann.items(annotation_type="edge",
101 | annotator_id='genericity-pred-annotator-88'):
102 | pass
103 |
104 | # verify that edge attribute-only generator works
105 | for gid, (node_attrs, edge_attrs) in raw_edge_ann.items(annotator_id='protoroles-annotator-14'):
106 | assert gid == 'tree1'
107 | assert json.dumps({'%%'.join(e): attrs for e, attrs in edge_attrs.items()}) == '{"tree1-semantics-pred-11%%tree1-semantics-arg-9": {"protoroles": {"awareness": {"confidence": 1, "value": 4}, "change_of_location": {"confidence": 1, "value": 4}, "change_of_possession": {"confidence": 1, "value": 4}, "change_of_state": {"confidence": 1, "value": 4}, "change_of_state_continuous": {"confidence": 1, "value": 4}, "existed_after": {"confidence": 1, "value": 4}, "existed_before": {"confidence": 1, "value": 4}, "existed_during": {"confidence": 1, "value": 4}, "instigation": {"confidence": 1, "value": 4}, "partitive": {"confidence": 1, "value": 4}, "sentient": {"confidence": 1, "value": 4}, "volition": {"confidence": 1, "value": 4}, "was_for_benefit": {"confidence": 1, "value": 4}, "was_used": {"confidence": 1, "value": 4}}}}'
108 |
109 | # generator for node attributes for the edge attribute-only annotation
110 | # should yield empty results for the graph
111 | with pytest.raises(ValueError):
112 | for gid, node_attrs in raw_edge_ann.items(annotation_type="node",
113 | annotator_id='protoroles-annotator-14'):
114 | pass
115 |
--------------------------------------------------------------------------------
/tests/test_uds_corpus.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import logging
4 | import pytest
5 |
6 | from glob import glob
7 | from pkg_resources import resource_filename
8 | from decomp.semantics.uds import UDSCorpus
9 |
10 | test_document_name = 'answers-20111105112131AA6gIX6_ans'
11 | test_document_genre = 'answers'
12 | test_document_timestamp = '20111105112131'
13 | test_document_text = 'My dad just does n\'t understand ? Ugh my dad is so stupid ... he just does n\'t understand anything ! I have 5 sisters and so including my mom ... he is the only guy in a house of six females . Now I \'m the youngest and I just got my period so now we all have ours and he thinks it \'s a good thing ? He \'s always like " ohh you must be so happy to finally have yours , I wish I had mine ! " and he is n\'t even joking . I think just living in a house with so many girls is making him go crazy ? Yep , the females are just getting to him ... dads .. Do n\'t blame him please , he feels lonely and wants to show his attention to all of you to look after you , please forgive and sympathy if he miss something . I am sorry for him , he is a good dad'
14 | test_document_sentence_ids = {'ewt-train-7189': 'answers-20111105112131AA6gIX6_ans-0001',
15 | 'ewt-train-7190': 'answers-20111105112131AA6gIX6_ans-0002',
16 | 'ewt-train-7191': 'answers-20111105112131AA6gIX6_ans-0003',
17 | 'ewt-train-7192': 'answers-20111105112131AA6gIX6_ans-0004',
18 | 'ewt-train-7193': 'answers-20111105112131AA6gIX6_ans-0005',
19 | 'ewt-train-7194': 'answers-20111105112131AA6gIX6_ans-0006',
20 | 'ewt-train-7195': 'answers-20111105112131AA6gIX6_ans-0007',
21 | 'ewt-train-7196': 'answers-20111105112131AA6gIX6_ans-0008',
22 | 'ewt-train-7197': 'answers-20111105112131AA6gIX6_ans-0009'}
23 | test_document_node = 'ewt-train-7195-document-pred-7'
24 | test_document_semantics_node_normalized = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics',
25 | 'frompredpatt': True,
26 | 'type': 'predicate',
27 | 'factuality': {'factual': {'confidence': 1.0, 'value': 1.2225}},
28 | 'time': {'dur-weeks': {'confidence': 0.3991, 'value': 0.7263},
29 | 'dur-decades': {'confidence': 0.3991, 'value': -1.378},
30 | 'dur-days': {'confidence': 0.3991, 'value': 0.7498},
31 | 'dur-hours': {'confidence': 0.3991, 'value': -1.1733},
32 | 'dur-seconds': {'confidence': 0.3991, 'value': -1.4243},
33 | 'dur-forever': {'confidence': 0.3991, 'value': -1.2803},
34 | 'dur-centuries': {'confidence': 0.3991, 'value': -1.1213},
35 | 'dur-instant': {'confidence': 0.3991, 'value': -1.3219},
36 | 'dur-years': {'confidence': 0.3991, 'value': -1.1953},
37 | 'dur-minutes': {'confidence': 0.3991, 'value': 0.8558},
38 | 'dur-months': {'confidence': 0.3991, 'value': 0.6852}},
39 | 'genericity': {'pred-dynamic': {'confidence': 1.0, 'value': 1.1508},
40 | 'pred-hypothetical': {'confidence': 1.0, 'value': -1.1583},
41 | 'pred-particular': {'confidence': 1.0, 'value': 1.1508}}}}
42 | test_document_semantics_node_raw = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics', 'frompredpatt': True, 'type': 'predicate', 'factuality': {'factual': {'value': {'factuality-annotator-26': 1, 'factuality-annotator-34': 1}, 'confidence': {'factuality-annotator-26': 4, 'factuality-annotator-34': 4}}}, 'time': {'duration': {'value': {'time-annotator-508': 4, 'time-annotator-619': 6, 'time-annotator-310': 5, 'time-annotator-172': 4, 'time-annotator-448': 5, 'time-annotator-548': 6}, 'confidence': {'time-annotator-508': 2, 'time-annotator-619': 4, 'time-annotator-310': 4, 'time-annotator-172': 4, 'time-annotator-448': 1, 'time-annotator-548': 2}}}, 'genericity': {'pred-dynamic': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-hypothetical': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-particular': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}}}}
43 |
44 |
45 | total_graphs = 16622
46 | total_documents = 1174
47 |
48 |
49 | data_dir = resource_filename('decomp', 'data/')
50 |
51 |
52 | def _load_corpus(base, version, annotation_format):
53 | UDSCorpus.CACHE_DIR = base
54 |
55 | try:
56 | os.makedirs(os.path.join(base,
57 | version,
58 | annotation_format,
59 | 'sentence/'))
60 | os.makedirs(os.path.join(base,
61 | version,
62 | annotation_format,
63 | 'document/'))
64 |
65 | except FileExistsError:
66 | pass
67 |
68 | return UDSCorpus(version=version,
69 | annotation_format=annotation_format)
70 |
71 | def _assert_correct_corpus_initialization(uds, raw):
72 | # Assert all graphs and documents initialized
73 | assert uds.ngraphs == total_graphs
74 | assert uds.ndocuments == total_documents
75 |
76 | n_sentence_graphs = 0
77 |
78 | for doc_id in uds.documentids:
79 | n_sentence_graphs += len(uds.documents[doc_id].sentence_graphs)
80 |
81 | assert n_sentence_graphs == total_graphs
82 |
83 | # Inspect a test document
84 | test_doc = uds.documents[test_document_name]
85 | assert test_doc.genre == test_document_genre
86 | assert test_doc.timestamp == test_document_timestamp
87 | assert test_doc.sentence_ids == test_document_sentence_ids
88 | assert test_doc.text == test_document_text
89 | assert test_doc.document_graph is not None
90 |
91 | print(test_doc.semantics_node(test_document_node))
92 |
93 | if raw:
94 | assert uds.annotation_format == 'raw'
95 | #assert test_doc.semantics_node(test_document_node) == test_document_semantics_node_raw
96 | else:
97 | assert uds.annotation_format == 'normalized'
98 | #assert test_doc.semantics_node(test_document_node) == test_document_semantics_node_normalized
99 |
100 | def _assert_document_annotation(uds, raw):
101 | if raw:
102 | node_ann, edge_ann = setup_raw_document_annotations()
103 | else:
104 | node_ann, edge_ann = setup_normalized_document_annotations()
105 |
106 | document = list(node_ann.node_attributes.keys())[0]
107 |
108 | # assert node annotations
109 | node_ann_attrs = dict(list(node_ann.node_attributes.values())[0])
110 |
111 | for doc_node, node_annotation in node_ann_attrs.items():
112 | for k, v in node_annotation.items():
113 | assert uds.documents[document].document_graph.nodes[doc_node][k] == v
114 |
115 | # assert edge annotations
116 | edge_ann_attrs = dict(list(edge_ann.edge_attributes.values())[0])
117 |
118 | for doc_edge, edge_annotation in edge_ann_attrs.items():
119 | for k, v in edge_annotation.items():
120 | assert uds.documents[document].document_graph.edges[doc_edge][k] == v
121 |
122 | class TestUDSCorpus:
123 |
124 | # @pytest.mark.slow
125 | # def test_load_v1_normalized(self, tmp_path, caplog):
126 | # caplog.set_level(logging.WARNING)
127 |
128 | # uds = _load_corpus(tmp_path, '1.0', 'normalized')
129 |
130 | # raw = False
131 |
132 | # _assert_correct_corpus_initialization(uds, raw)
133 | # #_assert_document_annotation(uds, raw)
134 |
135 | # # reload the UDSCorpus, which will initialize it from
136 | # # the now-serialized graphs
137 | # uds_cached = _load_corpus(tmp_path, '1.0', 'normalized')
138 |
139 | # _assert_correct_corpus_initialization(uds_cached, raw)
140 | # #_assert_document_annotation(uds_cached, raw)
141 |
142 |
143 | # @pytest.mark.slow
144 | # def test_load_v2_normalized(self, tmp_path, caplog):
145 | # caplog.set_level(logging.WARNING)
146 |
147 | # uds = _load_corpus(tmp_path, '2.0', 'normalized')
148 |
149 | # raw = False
150 |
151 | # _assert_correct_corpus_initialization(uds, raw)
152 | # #_assert_document_annotation(uds, raw)
153 |
154 | # # reload the UDSCorpus, which will initialize it from
155 | # # the now-serialized graphs
156 | # uds_cached = _load_corpus(tmp_path, '2.0', 'normalized')
157 |
158 | # _assert_correct_corpus_initialization(uds_cached, raw)
159 | # #_assert_document_annotation(uds_cached, raw)
160 |
161 | # @pytest.mark.slow
162 | # def test_load_v1_raw(self, tmp_path, caplog):
163 | # caplog.set_level(logging.WARNING)
164 |
165 | # uds = _load_corpus(tmp_path, '1.0', 'raw')
166 |
167 | # raw = True
168 |
169 | # _assert_correct_corpus_initialization(uds, raw)
170 | # #_assert_document_annotation(uds, raw)
171 |
172 | # # reload the UDSCorpus, which will initialize it from
173 | # # the now-serialized graphs
174 | # uds_cached = _load_corpus(tmp_path, '1.0', 'raw')
175 |
176 | # _assert_correct_corpus_initialization(uds_cached, raw)
177 | # #_assert_document_annotation(uds_cached, raw)
178 |
179 | @pytest.mark.slow
180 | def test_load_v2_raw(self, tmp_path, caplog):
181 | caplog.set_level(logging.WARNING)
182 |
183 | uds = _load_corpus(tmp_path, '2.0', 'raw')
184 |
185 | raw = True
186 |
187 | #print(uds.metadata.to_dict())
188 |
189 | print(uds._sentences_paths)
190 | print(uds._documents_paths)
191 | _assert_correct_corpus_initialization(uds, raw)
192 | #_assert_document_annotation(uds, raw)
193 |
194 | # reload the UDSCorpus, which will initialize it from
195 | # the now-serialized graphs
196 | uds_cached = _load_corpus(tmp_path, '2.0', 'raw')
197 |
198 | print()
199 | #print(uds_cached.metadata.to_dict())
200 |
201 | raise Exception
202 |
203 |
204 | _assert_correct_corpus_initialization(uds_cached, raw)
205 | #_assert_document_annotation(uds_cached, raw)
206 |
207 | # def _test_uds_corpus_load(version, raw, data_dir):
208 | # # Remove cached graphs
209 | # if raw:
210 | # annotation_format = 'raw'
211 | # else:
212 | # annotation_format = 'normalized'
213 |
214 | # sentence_path = os.path.join(data_dir, version, annotation_format, 'sentence')
215 | # doc_path = os.path.join(data_dir, version, annotation_format, 'document')
216 |
217 | # if glob(os.path.join(sentence_path, '*.json')):
218 | # os.system('rm ' + sentence_path + '/*.json')
219 |
220 | # if glob(os.path.join(doc_path, '*.json')):
221 | # os.system('rm ' + doc_path + '/*.json')
222 |
223 |
224 | # annotations_dir = os.path.join(doc_path, 'annotations')
225 | # if not glob(annotations_dir):
226 | # os.system('mkdir ' + annotations_dir)
227 | # if raw:
228 | # # Dump the test anontations to JSON files
229 | # raw_node_ann = json.loads(raw_node_document_annotation)
230 | # raw_edge_ann = json.loads(raw_edge_document_annotation)
231 | # raw_node_ann_path = os.path.join(annotations_dir, 'raw_node.json')
232 | # raw_edge_ann_path = os.path.join(annotations_dir, 'raw_edge.json')
233 | # annotations = [raw_node_ann, raw_edge_ann]
234 | # paths = [raw_node_ann_path, raw_edge_ann_path]
235 | # else:
236 | # norm_node_ann = json.loads(normalized_node_document_annotation)
237 | # norm_edge_ann = json.loads(normalized_edge_document_annotation)
238 | # norm_node_ann_path = os.path.join(annotations_dir, 'norm_node.json')
239 | # norm_edge_ann_path = os.path.join(annotations_dir, 'norm_edge.json')
240 | # annotations = [norm_node_ann, norm_edge_ann]
241 | # paths = [norm_node_ann_path, norm_edge_ann_path]
242 |
243 |
244 | # for ann, path in zip(annotations, paths):
245 | # os.system('touch ' + path)
246 | # with open(path, 'w') as out:
247 | # json.dump(ann, out)
248 |
249 | # # Load the UDSCorpus without any options
250 | # uds = UDSCorpus(version=version, annotation_format=annotation_format)
251 | # assert_correct_corpus_initialization(uds, raw)
252 | # assert_document_annotation(uds, raw)
253 |
254 | # # Reload the UDSCorpus, which will initialize it from
255 | # # the now-serialized graphs
256 | # uds_cached = UDSCorpus(version=version, annotation_format=annotation_format)
257 | # assert_correct_corpus_initialization(uds_cached, raw)
258 | # assert_document_annotation(uds, raw)
259 |
260 | # # Remove the cached graphs and annotations
261 | # os.system('rm ' + sentence_path + '/*.json')
262 | # os.system('rm ' + doc_path + '/*.json')
263 | # for path in paths:
264 | # os.system('rm ' + path)
265 |
--------------------------------------------------------------------------------
/tests/test_uds_document.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | test_document_name = 'answers-20111105112131AA6gIX6_ans'
4 | test_document_genre = 'answers'
5 | test_document_timestamp = '20111105112131'
6 |
7 |
8 | test_document_text = 'My dad just does n\'t understand ? Ugh my dad is so stupid ... he just does n\'t understand anything ! I have 5 sisters and so including my mom ... he is the only guy in a house of six females . Now I \'m the youngest and I just got my period so now we all have ours and he thinks it \'s a good thing ? He \'s always like " ohh you must be so happy to finally have yours , I wish I had mine ! " and he is n\'t even joking . I think just living in a house with so many girls is making him go crazy ? Yep , the females are just getting to him ... dads .. Do n\'t blame him please , he feels lonely and wants to show his attention to all of you to look after you , please forgive and sympathy if he miss something . I am sorry for him , he is a good dad'
9 |
10 | test_document_sentence_ids = {'ewt-train-7189': 'answers-20111105112131AA6gIX6_ans-0001',
11 | 'ewt-train-7190': 'answers-20111105112131AA6gIX6_ans-0002',
12 | 'ewt-train-7191': 'answers-20111105112131AA6gIX6_ans-0003',
13 | 'ewt-train-7192': 'answers-20111105112131AA6gIX6_ans-0004',
14 | 'ewt-train-7193': 'answers-20111105112131AA6gIX6_ans-0005',
15 | 'ewt-train-7194': 'answers-20111105112131AA6gIX6_ans-0006',
16 | 'ewt-train-7195': 'answers-20111105112131AA6gIX6_ans-0007',
17 | 'ewt-train-7196': 'answers-20111105112131AA6gIX6_ans-0008',
18 | 'ewt-train-7197': 'answers-20111105112131AA6gIX6_ans-0009'}
19 |
20 | test_document_node = 'ewt-train-7195-document-pred-7'
21 |
22 | test_document_semantics_node_normalized = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics',
23 | 'frompredpatt': True,
24 | 'type': 'predicate',
25 | 'factuality': {'factual': {'confidence': 1.0, 'value': 1.2225}},
26 | 'time': {'dur-weeks': {'confidence': 0.3991, 'value': 0.7263},
27 | 'dur-decades': {'confidence': 0.3991, 'value': -1.378},
28 | 'dur-days': {'confidence': 0.3991, 'value': 0.7498},
29 | 'dur-hours': {'confidence': 0.3991, 'value': -1.1733},
30 | 'dur-seconds': {'confidence': 0.3991, 'value': -1.4243},
31 | 'dur-forever': {'confidence': 0.3991, 'value': -1.2803},
32 | 'dur-centuries': {'confidence': 0.3991, 'value': -1.1213},
33 | 'dur-instant': {'confidence': 0.3991, 'value': -1.3219},
34 | 'dur-years': {'confidence': 0.3991, 'value': -1.1953},
35 | 'dur-minutes': {'confidence': 0.3991, 'value': 0.8558},
36 | 'dur-months': {'confidence': 0.3991, 'value': 0.6852}},
37 | 'genericity': {'pred-dynamic': {'confidence': 1.0, 'value': 1.1508},
38 | 'pred-hypothetical': {'confidence': 1.0, 'value': -1.1583},
39 | 'pred-particular': {'confidence': 1.0, 'value': 1.1508}}}}
40 |
41 | test_document_semantics_node_raw = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics', 'frompredpatt': True, 'type': 'predicate', 'factuality': {'factual': {'value': {'factuality-annotator-26': 1, 'factuality-annotator-34': 1}, 'confidence': {'factuality-annotator-26': 4, 'factuality-annotator-34': 4}}}, 'time': {'duration': {'value': {'time-annotator-508': 4, 'time-annotator-619': 6, 'time-annotator-310': 5, 'time-annotator-172': 4, 'time-annotator-448': 5, 'time-annotator-548': 6}, 'confidence': {'time-annotator-508': 2, 'time-annotator-619': 4, 'time-annotator-310': 4, 'time-annotator-172': 4, 'time-annotator-448': 1, 'time-annotator-548': 2}}}, 'genericity': {'pred-dynamic': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-hypothetical': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-particular': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}}}}
42 |
43 | @pytest.fixture
44 | def normalized_node_document_annotation(test_data_dir):
45 | fpath = os.path.join(test_data_dir,
46 | 'normalized_node_document_annotation.json')
47 | with open(fpath) as f:
48 | return f.read()
49 |
50 |
51 | @pytest.fixture
52 | def normalized_edge_document_annotation(test_data_dir):
53 | fpath = os.path.join(test_data_dir,
54 | 'normalized_edge_document_annotation.json')
55 | with open(fpath) as f:
56 | return f.read()
57 |
58 |
59 | @pytest.fixture
60 | def normalized_document_annotations(normalized_node_document_annotation,
61 | normalized_edge_document_annotation):
62 | norm_node_ann = NormalizedUDSAnnotation.from_json(normalized_node_document_annotation)
63 | norm_edge_ann = NormalizedUDSAnnotation.from_json(normalized_edge_document_annotation)
64 |
65 | return norm_node_ann, norm_edge_ann
66 |
67 |
68 | @pytest.fixture
69 | def raw_node_document_annotation():
70 | return '{"answers-20111105112131AA6gIX6_ans": {"ewt-train-7192-document-pred-25": {"subspace": {"property": {"confidence": {"annotator1": 0.12}, "value": {"annotator1": 0.0}}}}, "ewt-train-7192-document-pred-20": {"subspace": {"property": {"confidence": {"annotator2": 0.55, "annotator3": 0.07}, "value": {"annotator2": 0.0, "annotator3": 0.0}}}}, "ewt-train-7192-document-pred-20": {"subspace": {"property": {"confidence": {"annotator2": 0.55}, "value": {"annotator2": 0.0}}}}}}'
71 |
72 |
73 | @pytest.fixture
74 | def raw_edge_document_annotation():
75 | return '{"answers-20111105112131AA6gIX6_ans": {"ewt-train-7192-document-pred-20%%ewt-train-7192-document-arg-2": {"subspace": {"property": {"confidence": {"annotator1": 0.12}, "value": {"annotator1": 0.0}}}}, "ewt-train-7192-document-pred-20%%ewt-train-7189-document-arg-2": {"subspace": {"property": {"confidence": {"annotator2": 0.55, "annotator3": 0.07}, "value": {"annotator2": 0.0, "annotator3": 0.0}}}}, "ewt-train-7192-document-pred-25%%ewt-train-7191-document-arg-18": {"subspace": {"property": {"confidence": {"annotator2": 0.55}, "value": {"annotator2": 0.0}}}}}}'
76 |
77 | @pytest.fixture
78 | def raw_document_annotations(raw_node_document_annotation,
79 | raw_edge_document_annotation):
80 | raw_node_ann = RawUDSAnnotation.from_json(raw_node_document_annotation)
81 | raw_edge_ann = RawUDSAnnotation.from_json(raw_edge_document_annotation)
82 |
83 | return raw_node_ann, raw_edge_ann
84 |
--------------------------------------------------------------------------------
/tests/test_uds_metadata.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from copy import deepcopy
4 | from typing import List
5 |
6 | from decomp.semantics.uds.metadata import _dtype
7 | from decomp.semantics.uds.metadata import UDSDataType
8 | from decomp.semantics.uds.metadata import UDSPropertyMetadata
9 | from decomp.semantics.uds.metadata import UDSAnnotationMetadata
10 |
11 | def test_dtype():
12 | assert _dtype('int') is int
13 | assert _dtype('str') is str
14 | assert _dtype('float') is float
15 | assert _dtype('bool') is bool
16 |
17 |
18 | class TestUDSDataType:
19 |
20 | catdict = {'int': [1, 2, 3, 4, 5],
21 | 'str': ['yes', 'maybe', 'no']}
22 |
23 | cases = [({'datatype': 'int',
24 | 'categories': [1, 2, 3, 4, 5],
25 | 'ordered': True},
26 | {'datatype': 'int',
27 | 'categories': [1, 2, 3, 4, 5],
28 | 'ordered': True,
29 | 'lower_bound': 1,
30 | 'upper_bound': 5}),
31 | ({'datatype': 'int'},
32 | {'datatype': 'int'}),
33 | ({'datatype': 'float',
34 | 'lower_bound': 0.0,
35 | 'upper_bound': 1.0},
36 | {'datatype': 'float',
37 | 'ordered': True,
38 | 'lower_bound': 0.0,
39 | 'upper_bound': 1.0})]
40 |
41 | def test_init_simple(self):
42 | UDSDataType(datatype=str)
43 | UDSDataType(datatype=int)
44 | UDSDataType(datatype=bool)
45 | UDSDataType(datatype=float)
46 |
47 | def test_init_categorical(self):
48 | for t, c in self.catdict.items():
49 | for o in [True, False]:
50 | t = int if t == 'int' else str
51 | UDSDataType(datatype=t,
52 | categories=c,
53 | ordered=o)
54 |
55 | def test_from_dict_simple(self):
56 | UDSDataType.from_dict({'datatype': 'str'})
57 | UDSDataType.from_dict({'datatype': 'int'})
58 | UDSDataType.from_dict({'datatype': 'bool'})
59 | UDSDataType.from_dict({'datatype': 'float'})
60 |
61 | def test_from_dict_categorical(self):
62 | # the name for the categories key is "categories"
63 | with pytest.raises(KeyError):
64 | UDSDataType.from_dict({'datatype': 'int',
65 | 'category': [1, 2, 3, 4, 5],
66 | 'ordered': True})
67 |
68 | # floats cannot be categorical
69 | with pytest.raises(ValueError):
70 | UDSDataType.from_dict({'datatype': 'float',
71 | 'categories': [1, 2, 3, 4, 5],
72 | 'ordered': True})
73 |
74 | # bounds can only be specified if ordered is not specified or
75 | # is True
76 | with pytest.raises(ValueError):
77 | UDSDataType.from_dict({'datatype': 'str',
78 | 'categories': ["no", "maybe", "yes"],
79 | 'ordered': False,
80 | 'lower_bound': "no",
81 | 'upper_bound': "yes"})
82 |
83 | # these are good
84 | for t, c in self.catdict.items():
85 | for o in [True, False]:
86 | dt = UDSDataType.from_dict({'datatype': t,
87 | 'categories': c,
88 | 'ordered': o})
89 |
90 | assert dt.is_categorical
91 | assert dt.is_ordered_categorical == o
92 |
93 | if o:
94 | assert dt.categories == c
95 | else:
96 | assert dt.categories == set(c)
97 |
98 | def test_from_dict_bounded(self):
99 | # bounded datatypes should only be float or int
100 | with pytest.raises(ValueError):
101 | UDSDataType.from_dict({'datatype': 'str',
102 | 'categories': ['yes', 'maybe', 'no'],
103 | 'ordered': True,
104 | 'lower_bound': 'no',
105 | 'upper_bound': 'yes'})
106 |
107 | # the the datatype is categorical, the lower bound should
108 | # match the category lower bound
109 | with pytest.raises(ValueError):
110 | UDSDataType.from_dict({'datatype': 'int',
111 | 'categories': [1, 2, 3, 4, 5],
112 | 'ordered': True,
113 | 'lower_bound': 2,
114 | 'upper_bound': 5})
115 |
116 | # these are good
117 | for c, _ in self.cases:
118 | UDSDataType.from_dict(c)
119 |
120 | def test_to_dict(self):
121 | for c_in, c_out in self.cases:
122 | loaded = UDSDataType.from_dict(c_in)
123 | assert loaded.to_dict() == c_out
124 |
125 | def test_eq(self):
126 | for c_in, c_out in self.cases:
127 | loaded1 = UDSDataType.from_dict(c_in)
128 | loaded2 = UDSDataType.from_dict(c_out)
129 |
130 | assert loaded1 == loaded2
131 |
132 | sentence_metadata_example = {'protoroles': {'awareness': {'annotators': ['protoroles-annotator-8',
133 | 'protoroles-annotator-9'],
134 | 'confidence': {'categories': [0, 1],
135 | 'datatype': 'int',
136 | 'ordered': False},
137 | 'value': {'categories': [1, 2, 3, 4, 5],
138 | 'datatype': 'int',
139 | 'ordered': True}},
140 | 'change_of_location': {'annotators': ['protoroles-annotator-0',
141 | 'protoroles-annotator-1'],
142 | 'confidence': {'categories': [0, 1],
143 | 'datatype': 'int',
144 | 'ordered': False},
145 | 'value': {'categories': [1, 2, 3, 4, 5],
146 | 'datatype': 'int',
147 | 'ordered': True}}}}
148 |
149 | sentence_metadata_example_full = {'protoroles': {'awareness': {'annotators': ['protoroles-annotator-8',
150 | 'protoroles-annotator-9'],
151 | 'confidence': {'categories': [0, 1],
152 | 'datatype': 'int',
153 | 'ordered': False},
154 | 'value': {'categories': [1, 2, 3, 4, 5],
155 | 'datatype': 'int',
156 | 'ordered': True,
157 | 'lower_bound': 1,
158 | 'upper_bound': 5}},
159 | 'change_of_location': {'annotators': ['protoroles-annotator-0',
160 | 'protoroles-annotator-1'],
161 | 'confidence': {'categories': [0, 1],
162 | 'datatype': 'int',
163 | 'ordered': False},
164 | 'value': {'categories': [1, 2, 3, 4, 5],
165 | 'datatype': 'int',
166 | 'ordered': True,
167 | 'lower_bound': 1,
168 | 'upper_bound': 5}}}}
169 |
170 |
171 | sentence_metadata_example_noann = deepcopy(sentence_metadata_example)
172 |
173 | for subspace, propdict in sentence_metadata_example_noann.items():
174 | for prop, md in propdict.items():
175 | del md['annotators']
176 |
177 |
178 | class TestUDSPropertyMetadata:
179 |
180 | def test_init(self):
181 | pass
182 |
183 | def test_from_dict(self):
184 | metadatadict = sentence_metadata_example['protoroles']['awareness']
185 | metadata = UDSPropertyMetadata.from_dict(metadatadict)
186 |
187 | assert isinstance(metadata.value, UDSDataType)
188 | assert isinstance(metadata.confidence, UDSDataType)
189 |
190 | assert metadata.value.datatype is int
191 | assert metadata.confidence.datatype is int
192 |
193 | assert metadata.value.categories == [1, 2, 3, 4, 5]
194 | assert metadata.confidence.categories == {0, 1}
195 |
196 | assert metadata.annotators == {'protoroles-annotator-8',
197 | 'protoroles-annotator-9'}
198 |
199 | def test_to_dict(self):
200 | metadatadict = sentence_metadata_example['protoroles']['awareness']
201 | metadata = UDSPropertyMetadata.from_dict(metadatadict)
202 |
203 | out_in_out = UDSPropertyMetadata.from_dict(metadata.to_dict()).to_dict()
204 |
205 | # have to check that the set of annotators is equal, because
206 | # they could be put out of order when loaded in
207 | assert set(sentence_metadata_example_full['protoroles']['awareness']['annotators']) ==\
208 | set(out_in_out['annotators'])
209 |
210 | assert sentence_metadata_example_full['protoroles']['awareness']['value'] ==\
211 | out_in_out['value']
212 |
213 | assert sentence_metadata_example_full['protoroles']['awareness']['confidence'] ==\
214 | out_in_out['confidence']
215 |
216 | class TestUDSAnnotationMetadata:
217 |
218 | metadata = UDSAnnotationMetadata.from_dict(sentence_metadata_example)
219 | metadata_noann = UDSAnnotationMetadata.from_dict(sentence_metadata_example_noann)
220 |
221 | def test_getitem(self):
222 | self.metadata['protoroles']
223 | self.metadata['protoroles', 'awareness']
224 | self.metadata['protoroles']['awareness']
225 | self.metadata['protoroles', 'awareness'].value
226 |
227 | with pytest.raises(TypeError):
228 | self.metadata['protoroles', 'awareness', 'value']
229 |
230 | def test_add(self):
231 | assert self.metadata == self.metadata + self.metadata
232 |
233 | metadatadict1 = {'protoroles': {'awareness': sentence_metadata_example['protoroles']['awareness']}}
234 | metadatadict2 = {'protoroles': {'change_of_location': sentence_metadata_example['protoroles']['change_of_location']}}
235 |
236 | metadata1 = UDSAnnotationMetadata.from_dict(metadatadict1)
237 | metadata2 = UDSAnnotationMetadata.from_dict(metadatadict2)
238 |
239 | metadata = metadata1 + metadata2
240 |
241 | def test_subspaces(self):
242 | assert self.metadata.subspaces == {'protoroles'}
243 |
244 | def test_properties(self):
245 | assert self.metadata.properties() == {'awareness',
246 | 'change_of_location'}
247 |
248 | assert self.metadata.properties('protoroles') == {'awareness',
249 | 'change_of_location'}
250 |
251 | def test_annotators(self):
252 | assert self.metadata.annotators() == {'protoroles-annotator-0',
253 | 'protoroles-annotator-1',
254 | 'protoroles-annotator-8',
255 | 'protoroles-annotator-9'}
256 |
257 | assert self.metadata.annotators('protoroles') == {'protoroles-annotator-0',
258 | 'protoroles-annotator-1',
259 | 'protoroles-annotator-8',
260 | 'protoroles-annotator-9'}
261 |
262 | assert self.metadata.annotators('protoroles', 'awareness') == {'protoroles-annotator-8',
263 | 'protoroles-annotator-9'}
264 |
265 |
266 | with pytest.raises(ValueError):
267 | self.metadata.annotators(prop='awareness')
268 |
269 | assert self.metadata_noann.annotators() is None
270 |
271 | def test_has_annotators(self):
272 | assert self.metadata.has_annotators()
273 | assert self.metadata.has_annotators('protoroles')
274 | assert self.metadata.has_annotators('protoroles', 'awareness')
275 | assert not self.metadata_noann.has_annotators()
276 |
277 |
278 | class TestUDSCorpusMetadata:
279 |
280 | metadata = UDSAnnotationMetadata.from_dict(sentence_metadata_example)
281 |
--------------------------------------------------------------------------------
/tests/test_vis.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from predpatt import PredPatt, PredPattOpts, load_conllu
4 | from decomp.syntax.dependency import DependencyGraphBuilder
5 | from decomp.semantics.predpatt import PredPattGraphBuilder
6 | from decomp.semantics.uds import UDSSentenceGraph, UDSCorpus
7 | from decomp.vis.uds_vis import UDSVisualization
8 | from decomp import NormalizedUDSAnnotation
9 | import pdb
10 |
11 | from test_uds_graph import raw_sentence_graph, rawtree, listtree
12 | import pytest
13 | import dash
14 | from dash.testing.application_runners import import_app
15 |
16 |
17 | @pytest.fixture
18 | def basic_sentence_graph(test_data_dir):
19 | graph_data = json.load(open(os.path.join(test_data_dir, "vis_data.json")))
20 | graph = UDSSentenceGraph.from_dict(graph_data)
21 | return graph
22 |
23 | def test_vis_basic(basic_sentence_graph, dash_duo):
24 | vis = UDSVisualization(basic_sentence_graph, add_syntax_edges=True)
25 | app = vis.serve(do_return = True)
26 | dash_duo.start_server(app)
27 | assert(dash_duo.find_element("title") is not None)
28 |
29 | def test_vis_raw(raw_sentence_graph):
30 | with pytest.raises(AttributeError):
31 | vis = UDSVisualization(raw_sentence_graph, add_syntax_edges=True)
32 | vis.serve()
33 |
--------------------------------------------------------------------------------
/uds-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/uds-graph.png
--------------------------------------------------------------------------------