├── .dockerignore ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── decomp ├── __init__.py ├── corpus │ ├── __init__.py │ └── corpus.py ├── data │ ├── 1.0 │ │ ├── normalized │ │ │ ├── document │ │ │ │ └── annotations │ │ │ │ │ └── .gitkeep │ │ │ └── sentence │ │ │ │ └── annotations │ │ │ │ ├── factuality.zip │ │ │ │ ├── genericity.zip │ │ │ │ ├── protoroles.zip │ │ │ │ ├── time.zip │ │ │ │ └── wordsense.zip │ │ └── raw │ │ │ └── sentence │ │ │ └── annotations │ │ │ ├── factuality.zip │ │ │ ├── genericity.zip │ │ │ ├── protoroles.zip │ │ │ ├── time.zip │ │ │ └── wordsense.zip │ ├── 2.0 │ │ ├── normalized │ │ │ ├── document │ │ │ │ ├── .gitkeep │ │ │ │ └── annotations │ │ │ │ │ ├── .gitkeep │ │ │ │ │ └── event_structure_mereology.zip │ │ │ └── sentence │ │ │ │ └── annotations │ │ │ │ ├── .gitkeep │ │ │ │ ├── event_structure_distributivity.zip │ │ │ │ ├── event_structure_natural_parts.zip │ │ │ │ ├── factuality.zip │ │ │ │ ├── genericity.zip │ │ │ │ ├── protoroles.zip │ │ │ │ ├── time.zip │ │ │ │ └── wordsense.zip │ │ └── raw │ │ │ ├── document │ │ │ └── annotations │ │ │ │ ├── event_structure_mereology.zip │ │ │ │ └── time.zip │ │ │ └── sentence │ │ │ └── annotations │ │ │ ├── event_structure_distributivity.zip │ │ │ ├── event_structure_natural_parts.zip │ │ │ ├── factuality.zip │ │ │ ├── genericity.zip │ │ │ ├── protoroles.zip │ │ │ ├── time.zip │ │ │ └── wordsense.zip │ ├── LICENSE │ └── ud_ids.json ├── graph │ ├── __init__.py │ ├── nx.py │ └── rdf.py ├── semantics │ ├── __init__.py │ ├── predpatt.py │ └── uds │ │ ├── __init__.py │ │ ├── annotation.py │ │ ├── corpus.py │ │ ├── document.py │ │ ├── graph.py │ │ └── metadata.py ├── syntax │ ├── __init__.py │ └── dependency.py └── vis │ ├── __init__.py │ └── uds_vis.py ├── docs ├── Makefile ├── README.md ├── make.bat ├── requirements.txt └── source │ ├── conf.py │ ├── data │ ├── document-graphs.rst │ ├── index.rst │ ├── semantic-types.rst │ ├── sentence-graphs.rst │ └── syntactic-graphs.rst │ ├── index.rst │ ├── install.rst │ ├── package │ ├── decomp.corpus.corpus.rst │ ├── decomp.corpus.rst │ ├── decomp.graph.nx.rst │ ├── decomp.graph.rdf.rst │ ├── decomp.graph.rst │ ├── decomp.semantics.predpatt.rst │ ├── decomp.semantics.rst │ ├── decomp.semantics.uds.annotation.rst │ ├── decomp.semantics.uds.corpus.rst │ ├── decomp.semantics.uds.document.rst │ ├── decomp.semantics.uds.graph.rst │ ├── decomp.semantics.uds.metadata.rst │ ├── decomp.semantics.uds.rst │ ├── decomp.syntax.dependency.rst │ ├── decomp.syntax.rst │ ├── decomp.vis.rst │ ├── decomp.vis.uds_vis.rst │ └── index.rst │ └── tutorial │ ├── assets │ ├── vis_genericity_no_syntax.png │ ├── vis_no_protoroles_no_syntax.png │ ├── vis_no_protoroles_syntax.png │ ├── vis_no_syntax.png │ ├── vis_node_props_no_syntax.png │ ├── vis_node_props_syntax.png │ ├── vis_protoroles_no_syntax.png │ ├── vis_protoroles_syntax.png │ └── vis_syntax.png │ ├── index.rst │ ├── querying.rst │ ├── quick-start.rst │ ├── reading.rst │ ├── serializing.rst │ └── visualization.rst ├── requirements.txt ├── setup.py ├── tests ├── README.md ├── conftest.py ├── data │ ├── normalized_edge_document_annotation.json │ ├── normalized_edge_sentence_annotation.json │ ├── normalized_node_document_annotation.json │ ├── normalized_node_sentence_annotation.json │ ├── raw_edge_sentence_annotation.json │ ├── raw_edge_sentence_annotators.json │ ├── raw_edge_sentence_annotators.txt │ ├── raw_node_sentence_annotation.json │ ├── raw_node_sentence_annotators.txt │ ├── rawtree.conllu │ └── vis_data.json ├── requirements.txt ├── test_dependency.py ├── test_predpatt.py ├── test_uds_annotation.py ├── test_uds_corpus.py ├── test_uds_document.py ├── test_uds_graph.py ├── test_uds_metadata.py └── test_vis.py └── uds-graph.png /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | 3 | WORKDIR /usr/src/decomp 4 | 5 | COPY . . 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt && \ 8 | pip install --no-cache-dir . && \ 9 | python -c "from decomp import UDSCorpus; UDSCorpus()" -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Aaron Steven White 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include decomp/ * 2 | recursive-include docs/ * 3 | recursive-include tests/ * 4 | include requirements.txt 5 | include README.md 6 | include LICENSE 7 | include Dockerfile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | [Decomp](https://github.com/decompositional-semantics-initiative/decomp) 4 | is a toolkit for working with the [Universal Decompositional Semantics 5 | (UDS) dataset](http://decomp.io), which is a collection of directed 6 | acyclic semantic graphs with real-valued node and edge attributes 7 | pointing into [Universal 8 | Dependencies](https://universaldependencies.org/) syntactic dependency 9 | trees. 10 | 11 | ![UDS graph example](https://github.com/decompositional-semantics-initiative/decomp/raw/master/uds-graph.png) 12 | 13 | The toolkit is built on top of 14 | [NetworkX](https://github.com/networkx/networkx) and 15 | [RDFLib](https://github.com/RDFLib/rdflib) making it straightforward to: 16 | 17 | - read the UDS dataset from its native JSON format 18 | - query both the syntactic and semantic subgraphs of UDS (as well as 19 | pointers between them) using SPARQL 1.1 queries 20 | - serialize UDS graphs to many common formats, such as 21 | [Notation3](https://www.w3.org/TeamSubmission/n3/), 22 | [N-Triples](https://www.w3.org/TR/n-triples/), 23 | [turtle](https://www.w3.org/TeamSubmission/turtle/), and 24 | [JSON-LD](https://json-ld.org/), as well as any other format 25 | supported by NetworkX 26 | 27 | The toolkit was built by [Aaron Steven 28 | White](http://aaronstevenwhite.io/) and is maintained by the 29 | [Decompositional Semantics Initiative](http://decomp.io/). The UDS 30 | dataset was constructed from annotations collected by the 31 | [Decompositional Semantics Initiative](http://decomp.io/). 32 | 33 | # Documentation 34 | 35 | The [full documentation for the 36 | package](https://decomp.readthedocs.io/en/latest/index.html) is hosted 37 | at [Read the Docs](https://readthedocs.org/). 38 | 39 | # Citation 40 | 41 | If you make use of the dataset and/or toolkit in your research, we ask 42 | that you please cite the following paper in addition to the paper that 43 | introduces the underlying dataset(s) on which UDS is based. 44 | 45 | > White, Aaron Steven, Elias Stengel-Eskin, Siddharth Vashishtha, Venkata Subrahmanyan Govindarajan, Dee Ann Reisinger, Tim Vieira, Keisuke Sakaguchi, et al. 2020. [The Universal Decompositional Semantics Dataset and Decomp Toolkit](https://www.aclweb.org/anthology/2020.lrec-1.699/). In Proceedings of The 12th Language Resources and Evaluation Conference, 5698–5707. Marseille, France: European Language Resources Association. 46 | 47 | ```latex 48 | @inproceedings{white-etal-2020-universal, 49 | title = "The Universal Decompositional Semantics Dataset and Decomp Toolkit", 50 | author = "White, Aaron Steven and 51 | Stengel-Eskin, Elias and 52 | Vashishtha, Siddharth and 53 | Govindarajan, Venkata Subrahmanyan and 54 | Reisinger, Dee Ann and 55 | Vieira, Tim and 56 | Sakaguchi, Keisuke and 57 | Zhang, Sheng and 58 | Ferraro, Francis and 59 | Rudinger, Rachel and 60 | Rawlins, Kyle and 61 | Van Durme, Benjamin", 62 | booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference", 63 | month = may, 64 | year = "2020", 65 | address = "Marseille, France", 66 | publisher = "European Language Resources Association", 67 | url = "https://www.aclweb.org/anthology/2020.lrec-1.699", 68 | pages = "5698--5707", 69 | ISBN = "979-10-95546-34-4", 70 | } 71 | ``` 72 | 73 | # License 74 | 75 | Everything besides the contents of `decomp/data` are covered by the 76 | MIT License contained at the same directory level as this README. All 77 | contents of `decomp/data` are covered by the CC-BY-SA 4.0 license 78 | contained in that directory. 79 | 80 | # Installation 81 | 82 | The most painless way to get started quickly is to use the included 83 | barebones Python 3.6-based Dockerfile. To build the image and start a 84 | python interactive prompt, use: 85 | 86 | ```bash 87 | git clone git://github.com/decompositional-semantics-initiative/decomp.git 88 | cd decomp 89 | docker build -t decomp . 90 | docker run -it decomp python 91 | ``` 92 | 93 | If you prefer to install directly to your local environment, simply 94 | use `pip`. 95 | 96 | ```bash 97 | pip install --user git+git://github.com/decompositional-semantics-initiative/decomp.git 98 | ``` 99 | 100 | You can also clone and use the included `setup.py`. 101 | 102 | ```bash 103 | git clone git://github.com/decompositional-semantics-initiative/decomp.git 104 | cd decomp 105 | pip install --user --no-cache-dir -r ./requirements.txt 106 | python setup.py install 107 | ``` 108 | 109 | If you would like to install the package for the purposes of 110 | development, use: 111 | 112 | ```bash 113 | git clone git://github.com/decompositional-semantics-initiative/decomp.git 114 | cd decomp 115 | pip install --user --no-cache-dir -r ./requirements.txt 116 | python setup.py develop 117 | ``` 118 | 119 | # Quick Start 120 | 121 | The UDS corpus can be read by directly importing it. 122 | 123 | ```python 124 | from decomp import UDSCorpus 125 | 126 | uds = UDSCorpus() 127 | ``` 128 | 129 | This imports a `UDSCorpus` object `uds`, which contains all graphs 130 | across all splits in the data. If you would like a corpus, e.g., 131 | containing only a particular split, see other loading options in [the 132 | tutorial on reading the 133 | corpus](https://decomp.readthedocs.io/en/latest/tutorial/reading.html) 134 | for details. 135 | 136 | The first time you read UDS, it will take several minutes to complete 137 | while the dataset is built from the [Universal Dependencies English Web 138 | Treebank](https://github.com/UniversalDependencies/UD_English-EWT), 139 | which is not shipped with the package (but is downloaded automatically 140 | on import in the background), and the [UDS 141 | annotations](http://decomp.io/data/), which are shipped with the 142 | package. Subsequent uses will be faster, since the dataset is cached on 143 | build. 144 | 145 | `UDSGraph` objects in the corpus can be accessed using standard 146 | dictionary getters or iteration. For instance, to get the UDS graph 147 | corresponding to the 12th sentence in `en-ud-train.conllu`, you can 148 | use: 149 | 150 | ``` python 151 | uds["ewt-train-12"] 152 | ``` 153 | 154 | More generally, `UDSCorpus` objects behave like dictionaries. For 155 | example, to print all the graph identifiers in the corpus (e.g. 156 | `"ewt-train-12"`), you can use: 157 | 158 | ``` python 159 | for graphid in uds: 160 | print(graphid) 161 | ``` 162 | 163 | Similarly, to print all the graph identifiers in the corpus (e.g. 164 | "ewt-in-12") along with the corresponding sentence, you can use: 165 | 166 | ``` python 167 | for graphid, graph in uds.items(): 168 | print(graphid) 169 | print(graph.sentence) 170 | ``` 171 | 172 | A list of graph identifiers can also be accessed via the `graphids` 173 | attribute of the UDSCorpus. A mapping from these identifiers and the 174 | corresponding graph can be accessed via the `graphs` attribute. 175 | 176 | ``` python 177 | # a list of the graph identifiers in the corpus 178 | uds.graphids 179 | 180 | # a dictionary mapping the graph identifiers to the 181 | # corresponding graph 182 | uds.graphs 183 | ``` 184 | 185 | There are various instance attributes and methods for accessing nodes, 186 | edges, and their attributes in the UDS graphs. For example, to get a 187 | dictionary mapping identifiers for syntax nodes in the UDS graph to 188 | their attributes, you can use: 189 | 190 | ``` python 191 | uds["ewt-train-12"].syntax_nodes 192 | ``` 193 | 194 | To get a dictionary mapping identifiers for semantics nodes in the UDS 195 | graph to their attributes, you can use: 196 | 197 | ``` python 198 | uds["ewt-train-12"].semantics_nodes 199 | ``` 200 | 201 | To get a dictionary mapping identifiers for semantics edges (tuples of 202 | node identifiers) in the UDS graph to their attributes, you can use: 203 | 204 | ``` python 205 | uds["ewt-train-12"].semantics_edges() 206 | ``` 207 | 208 | To get a dictionary mapping identifiers for semantics edges (tuples of 209 | node identifiers) in the UDS graph involving the predicate headed by the 210 | 7th token to their attributes, you can use: 211 | 212 | ``` python 213 | uds["ewt-train-12"].semantics_edges('ewt-train-12-semantics-pred-7') 214 | ``` 215 | 216 | To get a dictionary mapping identifiers for syntax edges (tuples of node 217 | identifiers) in the UDS graph to their attributes, you can use: 218 | 219 | ``` python 220 | uds["ewt-train-12"].syntax_edges() 221 | ``` 222 | 223 | And to get a dictionary mapping identifiers for syntax edges (tuples of 224 | node identifiers) in the UDS graph involving the node for the 7th token 225 | to their attributes, you can use: 226 | 227 | ``` python 228 | uds["ewt-train-12"].syntax_edges('ewt-train-12-syntax-7') 229 | ``` 230 | 231 | There are also methods for accessing relationships between semantics and 232 | syntax nodes. For example, you can get a tuple of the ordinal position 233 | for the head syntax node in the UDS graph that maps of the predicate 234 | headed by the 7th token in the corresponding sentence to a list of the 235 | form and lemma attributes for that token, you can use: 236 | 237 | ``` python 238 | uds["ewt-train-12"].head('ewt-train-12-semantics-pred-7', ['form', 'lemma']) 239 | ``` 240 | 241 | And if you want the same information for every token in the span, you 242 | can use: 243 | 244 | ``` python 245 | uds["ewt-train-12"].span('ewt-train-12-semantics-pred-7', ['form', 'lemma']) 246 | ``` 247 | 248 | This will return a dictionary mapping ordinal position for syntax nodes 249 | in the UDS graph that make of the predicate headed by the 7th token in 250 | the corresponding sentence to a list of the form and lemma attributes 251 | for the corresponding tokens. 252 | 253 | More complicated queries of the UDS graph can be performed using the 254 | `query` method, which accepts arbitrary SPARQL 1.1 queries. See [the 255 | tutorial on querying the 256 | corpus](https://decomp.readthedocs.io/en/latest/tutorial/querying.html) 257 | for details. 258 | -------------------------------------------------------------------------------- /decomp/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from pkg_resources import resource_filename 4 | from logging import basicConfig, DEBUG 5 | 6 | DATA_DIR = resource_filename('decomp', 'data/') 7 | basicConfig(filename=os.path.join(DATA_DIR, 'build.log'), 8 | filemode='w', 9 | level=DEBUG) 10 | 11 | from .semantics.uds import UDSCorpus 12 | from .semantics.uds import NormalizedUDSAnnotation 13 | from .semantics.uds import RawUDSAnnotation 14 | -------------------------------------------------------------------------------- /decomp/corpus/__init__.py: -------------------------------------------------------------------------------- 1 | """Module for defining abstract corpus readers""" 2 | 3 | from .corpus import * 4 | -------------------------------------------------------------------------------- /decomp/corpus/corpus.py: -------------------------------------------------------------------------------- 1 | """Module for defining abstract graph corpus readers""" 2 | 3 | from abc import ABCMeta, abstractmethod 4 | 5 | from random import sample 6 | from logging import warning 7 | from typing import Dict, List, Tuple, Iterable, Hashable, Any, TypeVar 8 | 9 | InGraph = TypeVar('InGraph') # the input graph type 10 | OutGraph = TypeVar('OutGraph') # the output graph type 11 | 12 | 13 | class Corpus(metaclass=ABCMeta): 14 | """Container for graphs 15 | 16 | Parameters 17 | ---------- 18 | graphs_raw 19 | a sequence of graphs in a format that the graphbuilder for a 20 | subclass of this abstract class can process 21 | """ 22 | 23 | def __init__(self, graphs_raw: Iterable[InGraph]): 24 | self._graphs_raw = graphs_raw 25 | self._build_graphs() 26 | 27 | def __iter__(self) -> Iterable[Hashable]: 28 | return iter(self._graphs) 29 | 30 | def items(self) -> Iterable[Tuple[Hashable, OutGraph]]: 31 | """Dictionary-like iterator for (graphid, graph) pairs""" 32 | return self._graphs.items() 33 | 34 | def __getitem__(self, k: Hashable) -> Any: 35 | return self._graphs[k] 36 | 37 | def __contains__(self, k: Hashable) -> bool: 38 | return k in self._graphs 39 | 40 | def __len__(self) -> int: 41 | return len(self._graphs) 42 | 43 | def _build_graphs(self) -> None: 44 | self._graphs = {} 45 | 46 | for graphid, rawgraph in self._graphs_raw.items(): 47 | try: 48 | self._graphs[graphid] = self._graphbuilder(graphid, rawgraph) 49 | except ValueError: 50 | warning(graphid+' has no or multiple root nodes') 51 | except RecursionError: 52 | warning(graphid+' has loops') 53 | 54 | @abstractmethod 55 | def _graphbuilder(self, 56 | graphid: Hashable, 57 | rawgraph: InGraph) -> OutGraph: 58 | raise NotImplementedError 59 | 60 | @property 61 | def graphs(self) -> Dict[Hashable, OutGraph]: 62 | """the graphs in corpus""" 63 | return self._graphs 64 | 65 | @property 66 | def graphids(self) -> List[Hashable]: 67 | """The graph ids in corpus""" 68 | 69 | return list(self._graphs) 70 | 71 | @property 72 | def ngraphs(self) -> int: 73 | """Number of graphs in corpus""" 74 | 75 | return len(self._graphs) 76 | 77 | def sample(self, k: int) -> Dict[Hashable, OutGraph]: 78 | """Sample k graphs without replacement 79 | 80 | Parameters 81 | ---------- 82 | k 83 | the number of graphs to sample 84 | """ 85 | 86 | return {tid: self._graphs[tid] 87 | for tid 88 | in sample(self._graphs.keys(), k=k)} 89 | -------------------------------------------------------------------------------- /decomp/data/1.0/normalized/document/annotations/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/document/annotations/.gitkeep -------------------------------------------------------------------------------- /decomp/data/1.0/normalized/sentence/annotations/factuality.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/factuality.zip -------------------------------------------------------------------------------- /decomp/data/1.0/normalized/sentence/annotations/genericity.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/genericity.zip -------------------------------------------------------------------------------- /decomp/data/1.0/normalized/sentence/annotations/protoroles.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/protoroles.zip -------------------------------------------------------------------------------- /decomp/data/1.0/normalized/sentence/annotations/time.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/time.zip -------------------------------------------------------------------------------- /decomp/data/1.0/normalized/sentence/annotations/wordsense.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/wordsense.zip -------------------------------------------------------------------------------- /decomp/data/1.0/raw/sentence/annotations/factuality.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/factuality.zip -------------------------------------------------------------------------------- /decomp/data/1.0/raw/sentence/annotations/genericity.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/genericity.zip -------------------------------------------------------------------------------- /decomp/data/1.0/raw/sentence/annotations/protoroles.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/protoroles.zip -------------------------------------------------------------------------------- /decomp/data/1.0/raw/sentence/annotations/time.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/time.zip -------------------------------------------------------------------------------- /decomp/data/1.0/raw/sentence/annotations/wordsense.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/wordsense.zip -------------------------------------------------------------------------------- /decomp/data/2.0/normalized/document/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/document/.gitkeep -------------------------------------------------------------------------------- /decomp/data/2.0/normalized/document/annotations/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/document/annotations/.gitkeep -------------------------------------------------------------------------------- /decomp/data/2.0/normalized/document/annotations/event_structure_mereology.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/document/annotations/event_structure_mereology.zip -------------------------------------------------------------------------------- /decomp/data/2.0/normalized/sentence/annotations/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/.gitkeep -------------------------------------------------------------------------------- /decomp/data/2.0/normalized/sentence/annotations/event_structure_distributivity.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/event_structure_distributivity.zip -------------------------------------------------------------------------------- /decomp/data/2.0/normalized/sentence/annotations/event_structure_natural_parts.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/event_structure_natural_parts.zip -------------------------------------------------------------------------------- /decomp/data/2.0/normalized/sentence/annotations/factuality.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/factuality.zip -------------------------------------------------------------------------------- /decomp/data/2.0/normalized/sentence/annotations/genericity.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/genericity.zip -------------------------------------------------------------------------------- /decomp/data/2.0/normalized/sentence/annotations/protoroles.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/protoroles.zip -------------------------------------------------------------------------------- /decomp/data/2.0/normalized/sentence/annotations/time.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/time.zip -------------------------------------------------------------------------------- /decomp/data/2.0/normalized/sentence/annotations/wordsense.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/wordsense.zip -------------------------------------------------------------------------------- /decomp/data/2.0/raw/document/annotations/event_structure_mereology.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/document/annotations/event_structure_mereology.zip -------------------------------------------------------------------------------- /decomp/data/2.0/raw/document/annotations/time.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/document/annotations/time.zip -------------------------------------------------------------------------------- /decomp/data/2.0/raw/sentence/annotations/event_structure_distributivity.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/event_structure_distributivity.zip -------------------------------------------------------------------------------- /decomp/data/2.0/raw/sentence/annotations/event_structure_natural_parts.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/event_structure_natural_parts.zip -------------------------------------------------------------------------------- /decomp/data/2.0/raw/sentence/annotations/factuality.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/factuality.zip -------------------------------------------------------------------------------- /decomp/data/2.0/raw/sentence/annotations/genericity.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/genericity.zip -------------------------------------------------------------------------------- /decomp/data/2.0/raw/sentence/annotations/protoroles.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/protoroles.zip -------------------------------------------------------------------------------- /decomp/data/2.0/raw/sentence/annotations/time.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/time.zip -------------------------------------------------------------------------------- /decomp/data/2.0/raw/sentence/annotations/wordsense.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/wordsense.zip -------------------------------------------------------------------------------- /decomp/graph/__init__.py: -------------------------------------------------------------------------------- 1 | """Module for converting between NetworkX and RDFLib graphs""" 2 | 3 | from .rdf import RDFConverter 4 | from .nx import NXConverter 5 | -------------------------------------------------------------------------------- /decomp/graph/nx.py: -------------------------------------------------------------------------------- 1 | """Module for converting from networkx to RDF""" 2 | 3 | from networkx import DiGraph, to_dict_of_dicts 4 | from rdflib import Graph, URIRef, Literal 5 | 6 | 7 | class NXConverter: 8 | """A converter between RDFLib graphs and NetworkX digraphs 9 | 10 | Parameters 11 | ---------- 12 | graph 13 | the graph to convert 14 | """ 15 | 16 | def __init__(self, rdfgraph: Graph): 17 | self.nxgraph = DiGraph() 18 | self.rdfgraph = rdfgraph 19 | 20 | @classmethod 21 | def rdf_to_networkx(cls, rdfgraph: Graph) -> DiGraph: 22 | """Convert an RDFLib graph to a NetworkX digraph 23 | 24 | Parameters 25 | ---------- 26 | rdfgraph 27 | the RDFLib graph to convert 28 | """ 29 | 30 | converter = cls(rdfgraph) 31 | 32 | raise NotImplementedError 33 | 34 | # nxdict = to_dict_of_dicts(nxgraph) 35 | 36 | # for nodeid1, edgedict in nxdict.items(): 37 | # converter._add_node_attributes(nodeid1) 38 | 39 | # for nodeid2, properties in edgedict.items(): 40 | # converter._add_node_attributes(nodeid2) 41 | # converter._add_edge_attributes(nodeid1, nodeid2, properties) 42 | 43 | # cls._reset_attributes() 44 | 45 | # return converter.rdfgraph 46 | 47 | # def _add_node_attributes(self, nodeid): 48 | # for propid, val in self.nxgraph.nodes[nodeid].items(): 49 | # triple = self.__class__._construct_property(nodeid, propid, val) 50 | # self.rdfgraph.add(triple) 51 | 52 | # def _add_edge_attributes(self, nodeid1, nodeid2, properties): 53 | # triple = self.__class__._construct_edge(nodeid1, nodeid2) 54 | # self.rdfgraph.add(triple) 55 | 56 | # edgeid = triple[1] 57 | 58 | # for propid, val in properties.items(): 59 | # triple = self.__class__._construct_property(edgeid, propid, val) 60 | # self.rdfgraph.add(triple) 61 | 62 | # @classmethod 63 | # def _construct_node(cls, nodeid): 64 | # if nodeid not in cls.NODES: 65 | # cls.NODES[nodeid] = URIRef(nodeid) 66 | 67 | # return cls.NODES[nodeid] 68 | 69 | # @classmethod 70 | # def _construct_edge(cls, nodeid1, nodeid2): 71 | # node1 = cls._construct_node(nodeid1) 72 | # node2 = cls._construct_node(nodeid2) 73 | 74 | # edgeid = nodeid1 + '%%' + nodeid2 75 | 76 | # if edgeid not in cls.EDGES: 77 | # cls.EDGES[edgeid] = URIRef(edgeid) 78 | 79 | # return (node1, cls.EDGES[edgeid], node2) 80 | 81 | # @classmethod 82 | # def _construct_property(cls, nodeid, propid, val): 83 | # if nodeid not in cls.NODES: 84 | # cls.NODES[nodeid] = URIRef(nodeid) 85 | 86 | # if propid not in cls.NODES: 87 | # cls.PROPERTIES[propid] = URIRef(propid) 88 | 89 | # if propid in ['type', 'subtype']: 90 | # if val not in cls.VALUES: 91 | # cls.VALUES[val] = URIRef(val) 92 | 93 | # return (cls.NODES[nodeid], 94 | # cls.PROPERTIES[propid], 95 | # cls.VALUES[val]) 96 | 97 | # else: 98 | # return (cls.NODES[nodeid], 99 | # cls.PROPERTIES[propid], 100 | # Literal(val)) 101 | 102 | # @classmethod 103 | # def _reset_attributes(cls): 104 | # cls.NODES = {} 105 | # cls.EDGES = {} 106 | -------------------------------------------------------------------------------- /decomp/graph/rdf.py: -------------------------------------------------------------------------------- 1 | """Module for converting from networkx to RDF""" 2 | 3 | from networkx import DiGraph, to_dict_of_dicts 4 | from rdflib import Graph, URIRef, Literal 5 | 6 | 7 | class RDFConverter: 8 | """A converter between NetworkX digraphs and RDFLib graphs 9 | 10 | Parameters 11 | ---------- 12 | nxgraph 13 | the graph to convert 14 | """ 15 | 16 | SUBSPACES = {} 17 | PROPERTIES = {'domain': URIRef('domain'), 18 | 'type': URIRef('type'), 19 | 'subspace': URIRef('subspace'), 20 | 'confidence': URIRef('confidence')} 21 | VALUES = {} 22 | 23 | def __init__(self, nxgraph: DiGraph): 24 | self.nxgraph = nxgraph 25 | self.rdfgraph = Graph() 26 | self.nodes = {} 27 | 28 | @classmethod 29 | def networkx_to_rdf(cls, nxgraph: DiGraph) -> Graph: 30 | """Convert a NetworkX digraph to an RDFLib graph 31 | 32 | Parameters 33 | ---------- 34 | nxgraph 35 | the NetworkX graph to convert 36 | """ 37 | 38 | converter = cls(nxgraph) 39 | 40 | nxdict = to_dict_of_dicts(nxgraph) 41 | 42 | for nodeid1, edgedict in nxdict.items(): 43 | converter._add_node_attributes(nodeid1) 44 | for nodeid2 in edgedict: 45 | converter._add_node_attributes(nodeid2) 46 | converter._add_edge_attributes(nodeid1, nodeid2) 47 | 48 | return converter.rdfgraph 49 | 50 | def _add_node_attributes(self, nodeid): 51 | self._construct_node(nodeid) 52 | 53 | self._add_attributes(nodeid, 54 | self.nxgraph.nodes[nodeid].items()) 55 | 56 | 57 | def _add_edge_attributes(self, nodeid1, nodeid2): 58 | edgeid = self._construct_edge(nodeid1, nodeid2) 59 | edgetup = (nodeid1, nodeid2) 60 | 61 | self._add_attributes(edgeid, 62 | self.nxgraph.edges[edgetup].items()) 63 | 64 | 65 | def _add_attributes(self, nid, attributes): 66 | triples = [] 67 | 68 | for attrid1, attrs1 in attributes: 69 | if not isinstance(attrs1, dict): 70 | if isinstance(attrs1, list) or isinstance(attrs1, tuple): 71 | errmsg = 'Cannot convert list- or tuple-valued' +\ 72 | ' attributes to RDF' 73 | raise ValueError(errmsg) 74 | 75 | triples += self._construct_property(nid, 76 | attrid1, 77 | attrs1) 78 | 79 | else: 80 | for attrid2, attrs2 in attrs1.items(): 81 | triples += self._construct_property(nid, 82 | attrid2, 83 | attrs2, 84 | attrid1) 85 | 86 | for t in triples: 87 | self.rdfgraph.add(t) 88 | 89 | def _construct_node(self, nodeid): 90 | if nodeid not in self.nodes: 91 | self.nodes[nodeid] = URIRef(nodeid) 92 | 93 | def _construct_edge(self, nodeid1, nodeid2): 94 | edgeid = nodeid1 + '%%' + nodeid2 95 | 96 | if edgeid not in self.nodes: 97 | node1 = self.nodes[nodeid1] 98 | node2 = self.nodes[nodeid2] 99 | 100 | self.nodes[edgeid] = URIRef(edgeid) 101 | triple = (node1, self.nodes[edgeid], node2) 102 | 103 | self.rdfgraph.add(triple) 104 | 105 | return edgeid 106 | 107 | else: 108 | return edgeid 109 | 110 | def _construct_property(self, nodeid, propid, val, 111 | subspaceid=None): 112 | 113 | c = self.__class__ 114 | 115 | if isinstance(val, dict) and subspaceid is not None: 116 | # We currently do not support querying on raw UDS 117 | # annotations, all of which have dict-valued 'value' 118 | # and 'confidence' fields. 119 | if isinstance(val['value'], dict) or isinstance(val['confidence'], dict): 120 | raise TypeError('Attempted query of graph with raw properties. Querying '\ 121 | 'graphs with raw properties is prohibited.') 122 | triples = c._construct_subspace(subspaceid, propid) 123 | triples += [(self.nodes[nodeid], 124 | c.PROPERTIES[propid], 125 | Literal(val['value'])), 126 | (self.nodes[nodeid], 127 | c.PROPERTIES[propid+'-confidence'], 128 | Literal(val['confidence']))] 129 | 130 | elif propid in ['domain', 'type']: 131 | if val not in c.VALUES: 132 | c.VALUES[val] = URIRef(val) 133 | 134 | triples = [(self.nodes[nodeid], 135 | c.PROPERTIES[propid], 136 | c.VALUES[val])] 137 | 138 | else: 139 | if propid not in c.PROPERTIES: 140 | c.PROPERTIES[propid] = URIRef(propid) 141 | 142 | triples = [(self.nodes[nodeid], 143 | c.PROPERTIES[propid], 144 | Literal(val))] 145 | 146 | return triples 147 | 148 | @classmethod 149 | def _construct_subspace(cls, subspaceid, propid): 150 | if subspaceid not in cls.SUBSPACES: 151 | cls.SUBSPACES[subspaceid] = URIRef(subspaceid) 152 | 153 | if propid not in cls.PROPERTIES: 154 | cls.PROPERTIES[propid] = URIRef(propid) 155 | cls.PROPERTIES[propid+'-confidence'] = URIRef(propid+'-confidence') 156 | 157 | return [(cls.PROPERTIES[propid], 158 | cls.PROPERTIES['subspace'], 159 | cls.SUBSPACES[subspaceid]), 160 | (cls.PROPERTIES[propid+'-confidence'], 161 | cls.PROPERTIES['subspace'], 162 | cls.SUBSPACES[subspaceid]), 163 | (cls.PROPERTIES[propid], 164 | cls.PROPERTIES['confidence'], 165 | cls.PROPERTIES[propid+'-confidence'])] 166 | -------------------------------------------------------------------------------- /decomp/semantics/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for representing PredPatt and UDS graphs 3 | 4 | This module represents PredPatt and UDS graphs using networkx. It 5 | incorporates the dependency parse-based graphs from the syntax module 6 | as subgraphs. 7 | """ 8 | -------------------------------------------------------------------------------- /decomp/semantics/predpatt.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=W0221 2 | # pylint: disable=R0903 3 | # pylint: disable=R1704 4 | """Module for converting PredPatt objects to networkx digraphs""" 5 | 6 | from os.path import basename, splitext 7 | from typing import Tuple, Hashable, TextIO, Optional, Union 8 | from networkx import DiGraph 9 | from predpatt import load_conllu, PredPatt, PredPattOpts 10 | from ..corpus import Corpus 11 | from ..syntax.dependency import CoNLLDependencyTreeCorpus 12 | 13 | DEFAULT_PREDPATT_OPTIONS = PredPattOpts(resolve_relcl=True, 14 | borrow_arg_for_relcl=True, 15 | resolve_conj=False, 16 | cut=True) # Resolve relative clause 17 | 18 | 19 | class PredPattCorpus(Corpus): 20 | """Container for predpatt graphs""" 21 | 22 | def _graphbuilder(self, 23 | graphid: Hashable, 24 | predpatt_depgraph: Tuple[PredPatt, DiGraph]) -> DiGraph: 25 | """ 26 | Parameters 27 | ---------- 28 | treeid 29 | an identifier for the tree 30 | predpatt_depgraph 31 | a pairing of the predpatt for a dependency parse and the graph 32 | representing that dependency parse 33 | """ 34 | 35 | predpatt, depgraph = predpatt_depgraph 36 | 37 | return PredPattGraphBuilder.from_predpatt(predpatt, depgraph, graphid) 38 | 39 | @classmethod 40 | def from_conll(cls, 41 | corpus: Union[str, TextIO], 42 | name: str = 'ewt', 43 | options: Optional[PredPattOpts] = None) -> 'PredPattCorpus': 44 | """Load a CoNLL dependency corpus and apply predpatt 45 | 46 | Parameters 47 | ---------- 48 | corpus 49 | (path to) a .conllu file 50 | name 51 | the name of the corpus; used in constructing treeids 52 | options 53 | options for predpatt extraction 54 | """ 55 | 56 | options = DEFAULT_PREDPATT_OPTIONS if options is None else options 57 | 58 | corp_is_str = isinstance(corpus, str) 59 | 60 | if corp_is_str and splitext(basename(corpus))[1] == '.conllu': 61 | with open(corpus) as infile: 62 | data = infile.read() 63 | 64 | elif corp_is_str: 65 | data = corpus 66 | 67 | else: 68 | data = corpus.read() 69 | 70 | # load the CoNLL dependency parses as graphs 71 | ud_corp = {name+'-'+str(i+1): [line.split() 72 | for line in block.split('\n') 73 | if len(line) > 0 74 | if line[0] != '#'] 75 | for i, block in enumerate(data.split('\n\n'))} 76 | ud_corp = CoNLLDependencyTreeCorpus(ud_corp) 77 | 78 | # extract the predpatt for those dependency parses 79 | try: 80 | predpatt = {name+'-'+sid.split('_')[1]: PredPatt(ud_parse, 81 | opts=options) 82 | for sid, ud_parse in load_conllu(data)} 83 | 84 | except ValueError: 85 | errmsg = 'PredPatt was unable to parse the CoNLL you provided.' +\ 86 | ' This is likely due to using a version of UD that is' +\ 87 | ' incompatible with PredPatt. Use of version 1.2 is' +\ 88 | ' suggested.' 89 | 90 | raise ValueError(errmsg) 91 | 92 | return cls({n: (pp, ud_corp[n]) 93 | for n, pp in predpatt.items()}) 94 | 95 | 96 | class PredPattGraphBuilder: 97 | """A predpatt graph builder""" 98 | 99 | @classmethod 100 | def from_predpatt(cls, 101 | predpatt: PredPatt, 102 | depgraph: DiGraph, 103 | graphid: str = '') -> DiGraph: 104 | """Build a DiGraph from a PredPatt object and another DiGraph 105 | 106 | Parameters 107 | ---------- 108 | predpatt 109 | the predpatt extraction for the dependency parse 110 | depgraph 111 | the dependency graph 112 | graphid 113 | the tree indentifier; will be a prefix of all node 114 | identifiers 115 | """ 116 | # handle null graphids 117 | graphid = graphid+'-' if graphid else '' 118 | 119 | # initialize the predpatt graph 120 | # predpattgraph = DiGraph(predpatt=predpatt) 121 | predpattgraph = DiGraph() 122 | predpattgraph.name = graphid.strip('-') 123 | 124 | # include all of the syntax edges in the original dependendency graph 125 | predpattgraph.add_nodes_from([(n, attr) 126 | for n, attr in depgraph.nodes.items()]) 127 | predpattgraph.add_edges_from([(n1, n2, attr) 128 | for (n1, n2), attr 129 | in depgraph.edges.items()]) 130 | 131 | # add links between predicate nodes and syntax nodes 132 | predpattgraph.add_edges_from([edge 133 | for event in predpatt.events 134 | for edge 135 | in cls._instantiation_edges(graphid, 136 | event, 137 | 'pred')]) 138 | 139 | # add links between argument nodes and syntax nodes 140 | edges = [edge 141 | for event in predpatt.events 142 | for arg in event.arguments 143 | for edge 144 | in cls._instantiation_edges(graphid, arg, 'arg')] 145 | 146 | predpattgraph.add_edges_from(edges) 147 | 148 | # add links between predicate nodes and argument nodes 149 | edges = [edge 150 | for event in predpatt.events 151 | for arg in event.arguments 152 | for edge in cls._predarg_edges(graphid, event, arg, 153 | arg.position 154 | in [e.position 155 | for e 156 | in predpatt.events])] 157 | 158 | predpattgraph.add_edges_from(edges) 159 | 160 | # mark that all the semantic nodes just added were from predpatt 161 | # this is done to distinguish them from nodes added through annotations 162 | for node in predpattgraph.nodes: 163 | if 'semantics' in node: 164 | predpattgraph.nodes[node]['domain'] = 'semantics' 165 | predpattgraph.nodes[node]['frompredpatt'] = True 166 | 167 | if 'arg' in node: 168 | predpattgraph.nodes[node]['type'] = 'argument' 169 | elif 'pred' in node: 170 | predpattgraph.nodes[node]['type'] = 'predicate' 171 | 172 | return predpattgraph 173 | 174 | @staticmethod 175 | def _instantiation_edges(graphid, node, typ): 176 | parent_id = graphid+'semantics-'+typ+'-'+str(node.position+1) 177 | child_head_token_id = graphid+'syntax-'+str(node.position+1) 178 | child_span_token_ids = [graphid+'syntax-'+str(tok.position+1) 179 | for tok in node.tokens 180 | if child_head_token_id != 181 | graphid+'syntax-'+str(tok.position+1)] 182 | 183 | return [(parent_id, child_head_token_id, 184 | {'domain': 'interface', 185 | 'type': 'head'})] +\ 186 | [(parent_id, tokid, {'domain': 'interface', 187 | 'type': 'nonhead'}) 188 | for tokid in child_span_token_ids] 189 | 190 | @staticmethod 191 | def _predarg_edges(graphid, parent_node, child_node, pred_child): 192 | parent_id = graphid+'semantics-pred-'+str(parent_node.position+1) 193 | child_id = graphid+'semantics-arg-'+str(child_node.position+1) 194 | 195 | if pred_child: 196 | child_id_pred = graphid +\ 197 | 'semantics-pred-' +\ 198 | str(child_node.position+1) 199 | return [(parent_id, 200 | child_id, 201 | {'domain': 'semantics', 202 | 'type': 'dependency', 203 | 'frompredpatt': True})] +\ 204 | [(child_id, 205 | child_id_pred, 206 | {'domain': 'semantics', 207 | 'type': 'head', 208 | 'frompredpatt': True})] 209 | 210 | return [(parent_id, 211 | child_id, 212 | {'domain': 'semantics', 213 | 'type': 'dependency', 214 | 'frompredpatt': True})] 215 | -------------------------------------------------------------------------------- /decomp/semantics/uds/__init__.py: -------------------------------------------------------------------------------- 1 | """Module for representing UDS corpora, documents, graphs, and annotations.""" 2 | 3 | from .corpus import UDSCorpus 4 | from .document import UDSDocument 5 | from .graph import UDSDocumentGraph 6 | from .graph import UDSSentenceGraph 7 | from .annotation import RawUDSAnnotation 8 | from .annotation import NormalizedUDSAnnotation 9 | -------------------------------------------------------------------------------- /decomp/semantics/uds/document.py: -------------------------------------------------------------------------------- 1 | """Module for representing UDS documents.""" 2 | 3 | import re 4 | 5 | from typing import Optional, Any 6 | from typing import Dict 7 | 8 | from memoized_property import memoized_property 9 | from networkx import DiGraph 10 | from .graph import UDSSentenceGraph, UDSDocumentGraph 11 | 12 | 13 | class UDSDocument: 14 | """A Universal Decompositional Semantics document 15 | 16 | Parameters 17 | ---------- 18 | sentence_graphs 19 | the UDSSentenceGraphs associated with each sentence in the document 20 | sentence_ids 21 | the UD sentence IDs for each graph 22 | name 23 | the name of the document (i.e. the UD document ID) 24 | genre 25 | the genre of the document (e.g. `weblog`) 26 | timestamp 27 | the timestamp of the UD document on which this UDSDocument is based 28 | doc_graph 29 | the NetworkX DiGraph for the document. If not provided, this will be 30 | initialized without edges from sentence_graphs 31 | """ 32 | def __init__(self, sentence_graphs: Dict[str, UDSSentenceGraph], 33 | sentence_ids: Dict[str, str], name: str, genre: str, 34 | timestamp: Optional[str] = None, doc_graph: Optional[UDSDocumentGraph] = None): 35 | self.sentence_graphs = {} 36 | self.sentence_ids = {} 37 | self.name = name 38 | self.genre = genre 39 | self.timestamp = timestamp 40 | 41 | # Initialize the document-level graph 42 | if doc_graph: 43 | self.document_graph = doc_graph 44 | else: 45 | self.document_graph = UDSDocumentGraph(DiGraph(), name) 46 | 47 | # Initialize the sentence-level graphs 48 | self.add_sentence_graphs(sentence_graphs, sentence_ids) 49 | 50 | def to_dict(self) -> Dict: 51 | """Convert the graph to a dictionary""" 52 | return self.document_graph.to_dict() 53 | 54 | @classmethod 55 | def from_dict(cls, document: Dict[str, Dict], sentence_graphs: Dict[str, UDSSentenceGraph], 56 | sentence_ids: Dict[str, str], name: str = 'UDS') -> 'UDSDocument': 57 | """Construct a UDSDocument from a dictionary 58 | 59 | Since only the document graphs are serialized, the sentence 60 | graphs must also be provided to this method call in order 61 | to properly associate them with their documents. 62 | 63 | Parameters 64 | ---------- 65 | document 66 | a dictionary constructed by networkx.adjacency_data, 67 | containing the graph for the document 68 | sentence_graphs 69 | a dictionary containing (possibly a superset of) the 70 | sentence-level graphs for the sentences in the document 71 | sentence_ids 72 | a dictionary containing (possibly a superset of) the 73 | UD sentence IDs for each graph 74 | name 75 | identifier to append to the beginning of node ids 76 | """ 77 | document_graph = UDSDocumentGraph.from_dict(document, name) 78 | sent_graph_names = set(map(lambda node: node['semantics']['graph'], document['nodes'])) 79 | sent_graphs = {} 80 | sent_ids = {} 81 | for gname in sent_graph_names: 82 | sentence_graphs[gname].document_id = name 83 | sentence_graphs[gname].sentence_id = sentence_ids[gname] 84 | sent_graphs[gname] = sentence_graphs[gname] 85 | sent_ids[gname] = sentence_ids[gname] 86 | genre = name.split('-')[0] 87 | timestamp = cls._get_timestamp_from_document_name(name) 88 | return cls(sent_graphs, sent_ids, name, genre, timestamp, document_graph) 89 | 90 | @staticmethod 91 | def _get_timestamp_from_document_name(document_name): 92 | timestamp = re.search('\d{8}_?\d{6}', document_name) 93 | return timestamp[0] if timestamp else None 94 | 95 | def add_sentence_graphs(self, sentence_graphs: Dict[str, UDSSentenceGraph], 96 | sentence_ids: Dict[str, str]) -> None: 97 | """Add additional sentences to a document 98 | 99 | Parameters 100 | ---------- 101 | sentence_graphs 102 | a dictionary containing the sentence-level graphs 103 | for the sentences in the document 104 | sentence_ids 105 | a dictionary containing the UD sentence IDs for each graph 106 | name 107 | identifier to append to the beginning of node ids 108 | """ 109 | for gname, graph in sentence_graphs.items(): 110 | sentence_graphs[gname].sentence_id = sentence_ids[gname] 111 | sentence_graphs[gname].document_id = self.name 112 | self.sentence_graphs[gname] = graph 113 | self.sentence_ids[gname] = sentence_ids[gname] 114 | for node_name, node in graph.semantics_nodes.items(): 115 | semantics = {'graph': gname, 'node': node_name} 116 | document_node_name = node_name.replace('semantics', 'document') 117 | self.document_graph.graph.add_node(document_node_name, 118 | domain='document', type=node['type'], 119 | frompredpatt=False, semantics=semantics) 120 | 121 | def add_annotation(self, node_attrs: Dict[str, Dict[str, Any]], 122 | edge_attrs: Dict[str, Dict[str, Any]]) -> None: 123 | """Add node or edge annotations to the document-level graph 124 | 125 | Parameters 126 | ---------- 127 | node_attrs 128 | the node annotations to be added 129 | edge_attrs 130 | the edge annotations to be added 131 | """ 132 | self.document_graph.add_annotation(node_attrs, edge_attrs, self.sentence_ids) 133 | 134 | def semantics_node(self, document_node: str) -> Dict[str, Dict]: 135 | """The semantics node for a given document node 136 | 137 | Parameters 138 | ---------- 139 | document_node 140 | the document domain node whose semantics node is to be 141 | retrieved 142 | """ 143 | semantics = self.document_graph.nodes[document_node]['semantics'] 144 | semantics_node = self.sentence_graphs[semantics['graph']].semantics_nodes[semantics['node']] 145 | return {semantics['node']: semantics_node} 146 | 147 | @memoized_property 148 | def text(self) -> str: 149 | """The document text""" 150 | return ' '.join([sent_graph.sentence for gname, sent_graph in sorted(self.sentence_graphs.items())]) 151 | -------------------------------------------------------------------------------- /decomp/syntax/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for representing CoNLL dependency tree corpora 3 | 4 | This module provides readers for corpora represented using 5 | conll-formatted dependency parses. All dependency parses are read in 6 | as networkx graphs. These graphs become subgraphs of the PredPatt and 7 | UDS graphs in the semantics module. 8 | """ 9 | -------------------------------------------------------------------------------- /decomp/syntax/dependency.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=R1717 2 | # pylint: disable=R0903 3 | """Module for building/containing dependency trees from CoNLL""" 4 | 5 | from typing import List 6 | from numpy import array 7 | from networkx import DiGraph 8 | from ..corpus import Corpus 9 | 10 | CONLL_HEAD = {'u': ['id', 'form', 'lemma', 'upos', 'xpos', 11 | 'feats', 'head', 'deprel', 'deps', 'misc'], 12 | 'x': ['id', 'form', 'lemma', 'cpostag', 'postag', 13 | 'feats', 'head', 'deprel', 'phead', 'pdeprel']} 14 | 15 | CONLL_NODE_ATTRS = {'u': {k: CONLL_HEAD['u'].index(k) 16 | for k in ['form', 'lemma', 'upos', 'xpos', 'feats']}, 17 | 'x': {k: CONLL_HEAD['x'].index(k) 18 | for k in ['form', 'lemma', 'cpostag', 19 | 'postag', 'feats']}} 20 | 21 | CONLL_EDGE_ATTRS = {'u': {k: CONLL_HEAD['u'].index(k) 22 | for k in ['deprel']}, 23 | 'x': {k: CONLL_HEAD['x'].index(k) 24 | for k in ['deprel']}} 25 | 26 | 27 | class CoNLLDependencyTreeCorpus(Corpus): 28 | """Class for building/containing dependency trees from CoNLL-U 29 | 30 | Attributes 31 | ---------- 32 | graphs 33 | trees constructed from annotated sentences 34 | graphids 35 | ids for trees constructed from annotated sentences 36 | ngraphs 37 | number of graphs in corpus 38 | """ 39 | 40 | def _graphbuilder(self, graphid: str, rawgraph: str): 41 | return DependencyGraphBuilder.from_conll(rawgraph, graphid) 42 | 43 | 44 | class DependencyGraphBuilder: 45 | """A dependency graph builder""" 46 | 47 | @classmethod 48 | def from_conll(cls, 49 | conll: List[List[str]], 50 | treeid: str='', 51 | spec: str='u') -> DiGraph: 52 | """Build DiGraph from a CoNLL representation 53 | 54 | Parameters 55 | ---------- 56 | conll 57 | conll representation 58 | treeid 59 | a unique identifier for the tree 60 | spec 61 | the specification to assume of the conll representation 62 | ("u" or "x") 63 | """ 64 | 65 | # handle null treeids 66 | treeid = treeid+'-' if treeid else '' 67 | 68 | # initialize the dependency graph 69 | depgraph = DiGraph(conll=array(conll)) 70 | depgraph.name = treeid.strip('-') 71 | 72 | # populate graph with nodes 73 | depgraph.add_nodes_from([cls._conll_node_attrs(treeid, row, spec) 74 | for row in conll]) 75 | 76 | # add the root 77 | depgraph.add_node(treeid+'root-0', 78 | position=0, 79 | domain='root', 80 | type='root') 81 | 82 | # connect nodes 83 | depgraph.add_edges_from([cls._conll_edge_attrs(treeid, row, spec) 84 | for row in conll]) 85 | 86 | return depgraph 87 | 88 | @staticmethod 89 | def _conll_node_attrs(treeid, row, spec): 90 | node_id = row[0] 91 | 92 | node_attrs = {'domain': 'syntax', 93 | 'type': 'token', 94 | 'position': int(node_id)} 95 | other_attrs = {} 96 | 97 | for attr, idx in CONLL_NODE_ATTRS[spec].items(): 98 | # convert features into a dictionary 99 | if attr == 'feats': 100 | if row[idx] != '_': 101 | feat_split = row[idx].split('|') 102 | other_attrs = dict([kv.split('=') 103 | for kv in feat_split]) 104 | 105 | else: 106 | node_attrs[attr] = row[idx] 107 | 108 | node_attrs = dict(node_attrs, **other_attrs) 109 | 110 | return (treeid+'syntax-'+node_id, node_attrs) 111 | 112 | @staticmethod 113 | def _conll_edge_attrs(treeid, row, spec): 114 | child_id = treeid+'syntax-'+row[0] 115 | 116 | parent_position = row[CONLL_HEAD[spec].index('head')] 117 | 118 | if parent_position == '0': 119 | parent_id = treeid+'root-0' 120 | else: 121 | parent_id = treeid+'syntax-'+parent_position 122 | 123 | edge_attrs = {attr: row[idx] 124 | for attr, idx in CONLL_EDGE_ATTRS[spec].items()} 125 | 126 | edge_attrs['domain'] = 'syntax' 127 | edge_attrs['type'] = 'dependency' 128 | 129 | return (parent_id, child_id, edge_attrs) 130 | -------------------------------------------------------------------------------- /decomp/vis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/vis/__init__.py -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Decomp documentation 2 | 3 | To build the documentation, you will need Sphinx and three Sphinx extensions: 4 | 5 | ```bash 6 | pip install --user sphinx==3.1.2 sphinxcontrib-napoleon sphinx-autodoc-typehints sphinx_rtd_theme 7 | ``` 8 | 9 | Then, while in this directory, use: 10 | 11 | ```bash 12 | make clean 13 | make html 14 | ``` 15 | 16 | To view the built documentation, start a python http server with: 17 | 18 | 19 | ```bash 20 | python3 -m http.server 21 | ``` 22 | 23 | Then, navigate to [http://localhost:8000/build/html/](http://localhost:8000/build/html/) in your browser. 24 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx>=3.0.0 2 | sphinxcontrib-napoleon 3 | sphinx-autodoc-typehints 4 | sphinx_rtd_theme 5 | http://github.com/decompositional-semantics-initiative/decomp/tarball/master#egg=decomp 6 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('../../decomp/')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'Decomp' 21 | copyright = '2020, Aaron Steven White' 22 | author = 'Aaron Steven White' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = '0.2.2' 26 | 27 | # Changes root document from contents.rst to index.rst 28 | master_doc = 'index' 29 | 30 | # -- General configuration --------------------------------------------------- 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = [ 36 | 'sphinx.ext.autodoc', 37 | 'sphinxcontrib.napoleon', # MUST be loaded before typehints 38 | 'sphinx_autodoc_typehints' 39 | ] 40 | 41 | # Napoleon settings 42 | napoleon_google_docstring = True 43 | napoleon_numpy_docstring = True 44 | napoleon_include_init_with_doc = False 45 | napoleon_include_private_with_doc = False 46 | napoleon_include_special_with_doc = False 47 | napoleon_use_admonition_for_examples = False 48 | napoleon_use_admonition_for_notes = False 49 | napoleon_use_admonition_for_references = False 50 | napoleon_use_ivar = False 51 | napoleon_use_param = True 52 | napoleon_use_rtype = True 53 | napoleon_use_keyword = True 54 | napoleon_custom_sections = None 55 | 56 | # Add any paths that contain templates here, relative to this directory. 57 | templates_path = ['_templates'] 58 | 59 | # List of patterns, relative to source directory, that match files and 60 | # directories to ignore when looking for source files. 61 | # This pattern also affects html_static_path and html_extra_path. 62 | exclude_patterns = [] 63 | 64 | 65 | # -- Options for HTML output ------------------------------------------------- 66 | 67 | # The theme to use for HTML and HTML Help pages. See the documentation for 68 | # a list of builtin themes. 69 | # 70 | html_theme = 'sphinx_rtd_theme' 71 | 72 | # Add any paths that contain custom static files (such as style sheets) here, 73 | # relative to this directory. They are copied after the builtin static files, 74 | # so a file named "default.css" will overwrite the builtin "default.css". 75 | html_static_path = ['_static'] 76 | -------------------------------------------------------------------------------- /docs/source/data/document-graphs.rst: -------------------------------------------------------------------------------- 1 | Universal Decompositional Document Graphs 2 | ========================================= 3 | 4 | The semantic graphs that form the third layer of annotation represent 5 | document-level relations. These graphs contain a node for each node in 6 | the document's constituent sentence-level graphs along with a pointer 7 | from the document-level node to the sentence-level node. Unlike the 8 | sentence-level graphs, they are not produced by PredPatt, so whether 9 | any two nodes in a document-level graph are joined by an edge is 10 | determined by whether the relation between the two nodes is annotated 11 | in some UDS dataset. 12 | 13 | At minimum, each of these nodes has the following attributes: 14 | 15 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph 16 | 17 | - ``domain`` (``str``): the subgraph this node is part of (always ``document``) 18 | - ``type`` (``str``): the type of object corresponding to this node in the ``semantics`` domain (either ``predicate`` or ``argument``) 19 | - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``False``, although the corresponding ``semantics`` node will have this set as ``True``) 20 | - ``semantics`` (``dict``): a two-item dictionary containing information about the corresponding ``semantics`` node. The first item, ``graph``, indicates the sentence-level graph that the semantics node comes from. The second item, ``node``, contains the name of the node. 21 | 22 | Document graphs are initialized without edges, which are created dynamically 23 | when edge attribute annotations are added. These edges may span nodes 24 | associated with different sentences within a document and may connect not 25 | only predicates to arguments, but predicates to predicates and arguments to 26 | arguments. Any annotations that are provided that cross document boundaries 27 | will be automatically filtered out. Finally, beyond the attributes provided 28 | by annotations, each edge will also contain all but the last of the core 29 | set of node attributes listed above. 30 | 31 | The `UDSDocumentGraph`_ object is wrapped by a `UDSDocument`_, which 32 | holds additional metadata associated with the document, data relating 33 | to its constituent sentences (and their graphs), and methods for 34 | interacting with it. Finally, it should be noted that querying on 35 | document graphs is not currently supported. 36 | 37 | .. _UDSDocument: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument 38 | -------------------------------------------------------------------------------- /docs/source/data/index.rst: -------------------------------------------------------------------------------- 1 | Dataset Reference 2 | ================= 3 | 4 | The Universal Decompositional Semantics (UDS) dataset consists of four 5 | layers of annotations built on top of the `English Web Treebank`_ 6 | (EWT). 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | :caption: Contents: 11 | 12 | syntactic-graphs 13 | sentence-graphs 14 | document-graphs 15 | semantic-types 16 | 17 | .. _English Web Treebank: https://catalog.ldc.upenn.edu/LDC2012T13 18 | 19 | Each layer contains pointers directly to the previous layer. 20 | -------------------------------------------------------------------------------- /docs/source/data/semantic-types.rst: -------------------------------------------------------------------------------- 1 | `Universal Decompositional Semantic`_ Types 2 | =========================================== 3 | 4 | .. _Universal Decompositional Semantic: http://decomp.io/ 5 | 6 | PredPatt makes very coarse-grained typing distinctions—between 7 | predicate and argument nodes, on the one hand, and between dependency 8 | and head edges, on the other. UDS provides ultra fine-grained typing 9 | distinctions, represented as collections of real-valued 10 | attributes. The union of all node and edge attributes defined in UDS 11 | determines the *UDS type space*; any proper subset determines a *UDS 12 | type subspace*. 13 | 14 | UDS attributes are derived from crowd-sourced annotations of the heads 15 | or spans corresponding to predicates and/or arguments and are 16 | represented in the dataset as node and/or edge attributes. It is 17 | important to note that, though all nodes and edges in the semantics 18 | domain have a ``type`` attribute, UDS does not afford any special 19 | status to these types. That is, the only thing that UDS "sees" are the 20 | nodes and edges in the semantics domain. The set of nodes and edges 21 | visible to UDS is a superset of those associated with PredPatt 22 | predicates and their arguments. 23 | 24 | There are currently four node type subspaces annotated on 25 | nodes in sentence-level graphs. 26 | 27 | - `Factuality`_ (``factuality``) 28 | - `Genericity`_ (``genericity``) 29 | - `Time`_ (``time``) 30 | - `Entity type`_ (``wordsense``) 31 | - `Event structure`_ (``event_structure``) 32 | 33 | There is currently one edge type subspace annotated on 34 | edges in sentence-level graphs. 35 | 36 | - `Semantic Proto-Roles`_ (``protoroles``) 37 | - `Event structure`_ (``event_structure``) 38 | 39 | There is currently (starting in UDS2.0) one edge type subspace 40 | annotated on edges in document-level graphs. 41 | 42 | - `Time`_ (``time``) 43 | - `Event structure`_ (``event_structure``) 44 | 45 | Each subspace key lies at the same level as the ``type`` attribute and 46 | maps to a dictionary value. This dictionary maps from attribute keys 47 | (see *Attributes* in each section below) to dictionaries that always 48 | have two keys ``value`` and ``confidence``. See the below paper for 49 | information on how the these are derived from the underlying dataset. 50 | 51 | Two versions of these annotations are currently available: one 52 | containing the raw annotator data (``"raw"``) and the other containing 53 | normalized data (``"normalized"``). In the former case, both the 54 | ``value`` and ``confidence`` fields described above map to 55 | dictionaries keyed on (anonymized) annotator IDs, where the 56 | corresponding value contains that annotator's response (for the 57 | ``value`` dictionary) or confidence (for the ``confidence`` 58 | dictionary). In the latter case, the ``value`` and ``confidence`` 59 | fields map to single, normalized value and confidence scores, 60 | respectively. 61 | 62 | For more information on the normalization used to produce the 63 | normalized annotations, see: 64 | 65 | White, Aaron Steven, Elias Stengel-Eskin, Siddharth Vashishtha, Venkata Subrahmanyan Govindarajan, Dee Ann Reisinger, Tim Vieira, Keisuke Sakaguchi, et al. 2020. `The Universal Decompositional Semantics Dataset and Decomp Toolkit`_. *Proceedings of The 12th Language Resources and Evaluation Conference*, 5698–5707. Marseille, France: European Language Resources Association. 66 | 67 | 68 | .. _The Universal Decompositional Semantics Dataset and Decomp Toolkit: https://www.aclweb.org/anthology/2020.lrec-1.699/ 69 | 70 | .. code-block:: latex 71 | 72 | @inproceedings{white-etal-2020-universal, 73 | title = "The Universal Decompositional Semantics Dataset and Decomp Toolkit", 74 | author = "White, Aaron Steven and 75 | Stengel-Eskin, Elias and 76 | Vashishtha, Siddharth and 77 | Govindarajan, Venkata Subrahmanyan and 78 | Reisinger, Dee Ann and 79 | Vieira, Tim and 80 | Sakaguchi, Keisuke and 81 | Zhang, Sheng and 82 | Ferraro, Francis and 83 | Rudinger, Rachel and 84 | Rawlins, Kyle and 85 | Van Durme, Benjamin", 86 | booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference", 87 | month = may, 88 | year = "2020", 89 | address = "Marseille, France", 90 | publisher = "European Language Resources Association", 91 | url = "https://www.aclweb.org/anthology/2020.lrec-1.699", 92 | pages = "5698--5707", 93 | ISBN = "979-10-95546-34-4", 94 | } 95 | 96 | 97 | Information about each subspace can be found below. Unless otherwise 98 | specified the properties in a particular subspace remain constant 99 | across the raw and normalized formats. 100 | 101 | Factuality 102 | ---------- 103 | 104 | **Project page** 105 | 106 | ``_ 107 | 108 | **Sentence-level attributes** 109 | 110 | ``factual`` 111 | 112 | **First UDS version** 113 | 114 | 1.0 115 | 116 | **References** 117 | 118 | White, A.S., D. Reisinger, K. Sakaguchi, T. Vieira, S. Zhang, R. Rudinger, K. Rawlins, & B. Van Durme. 2016. `Universal Decompositional Semantics on Universal Dependencies`_. *Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing*, pages 1713–1723, Austin, Texas, November 1-5, 2016. 119 | 120 | 121 | Rudinger, R., White, A.S., & B. Van Durme. 2018. `Neural models of factuality`_. *Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)*, pages 731–744. New Orleans, Louisiana, June 1-6, 2018. 122 | 123 | .. _Neural models of factuality: https://www.aclweb.org/anthology/N18-1067 124 | .. _Universal Decompositional Semantics on Universal Dependencies: https://www.aclweb.org/anthology/D16-1177 125 | 126 | .. code-block:: latex 127 | 128 | @inproceedings{white-etal-2016-universal, 129 | title = "Universal Decompositional Semantics on {U}niversal {D}ependencies", 130 | author = "White, Aaron Steven and 131 | Reisinger, Dee Ann and 132 | Sakaguchi, Keisuke and 133 | Vieira, Tim and 134 | Zhang, Sheng and 135 | Rudinger, Rachel and 136 | Rawlins, Kyle and 137 | Van Durme, Benjamin", 138 | booktitle = "Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing", 139 | month = nov, 140 | year = "2016", 141 | address = "Austin, Texas", 142 | publisher = "Association for Computational Linguistics", 143 | url = "https://www.aclweb.org/anthology/D16-1177", 144 | doi = "10.18653/v1/D16-1177", 145 | pages = "1713--1723", 146 | } 147 | 148 | @inproceedings{rudinger-etal-2018-neural-models, 149 | title = "Neural Models of Factuality", 150 | author = "Rudinger, Rachel and 151 | White, Aaron Steven and 152 | Van Durme, Benjamin", 153 | booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)", 154 | month = jun, 155 | year = "2018", 156 | address = "New Orleans, Louisiana", 157 | publisher = "Association for Computational Linguistics", 158 | url = "https://www.aclweb.org/anthology/N18-1067", 159 | doi = "10.18653/v1/N18-1067", 160 | pages = "731--744", 161 | } 162 | 163 | 164 | Genericity 165 | ---------- 166 | 167 | **Project page** 168 | 169 | ``_ 170 | 171 | **Sentence-level attributes** 172 | 173 | ``arg-particular``, ``arg-kind``, ``arg-abstract``, ``pred-particular``, ``pred-dynamic``, ``pred-hypothetical`` 174 | 175 | **First UDS version** 176 | 177 | 1.0 178 | 179 | **References** 180 | 181 | Govindarajan, V.S., B. Van Durme, & A.S. White. 2019. `Decomposing Generalization: Models of Generic, Habitual, and Episodic Statements`_. Transactions of the Association for Computational Linguistics. 182 | 183 | .. _Decomposing Generalization\: Models of Generic, Habitual, and Episodic Statements: https://www.aclweb.org/anthology/Q19-1035 184 | 185 | .. code-block:: latex 186 | 187 | @article{govindarajan-etal-2019-decomposing, 188 | title = "Decomposing Generalization: Models of Generic, Habitual, and Episodic Statements", 189 | author = "Govindarajan, Venkata and 190 | Van Durme, Benjamin and 191 | White, Aaron Steven", 192 | journal = "Transactions of the Association for Computational Linguistics", 193 | volume = "7", 194 | month = mar, 195 | year = "2019", 196 | url = "https://www.aclweb.org/anthology/Q19-1035", 197 | doi = "10.1162/tacl_a_00285", 198 | pages = "501--517" 199 | } 200 | 201 | 202 | Time 203 | ---- 204 | 205 | **Project page** 206 | 207 | ``_ 208 | 209 | **Sentence-level attributes** 210 | 211 | *normalized* 212 | 213 | ``dur-hours``, ``dur-instant``, ``dur-forever``, ``dur-weeks``, ``dur-days``, ``dur-months``, ``dur-years``, ``dur-centuries``, ``dur-seconds``, ``dur-minutes``, ``dur-decades`` 214 | 215 | *raw* 216 | 217 | ``duration`` 218 | 219 | 220 | **Document-level attributes** 221 | 222 | *raw* 223 | 224 | ``rel-start1``, ``rel-start2``, ``rel-end1``, ``rel-end2`` 225 | 226 | **First UDS version** 227 | 228 | 1.0 (sentence-level), 2.0 (document-level) 229 | 230 | **References** 231 | 232 | Vashishtha, S., B. Van Durme, & A.S. White. 2019. `Fine-Grained Temporal Relation Extraction`_. *Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (ACL 2019)*, 2906—2919. Florence, Italy, July 29-31, 2019. 233 | 234 | 235 | .. _Fine-Grained Temporal Relation Extraction: https://www.aclweb.org/anthology/P19-1280 236 | 237 | .. code-block:: latex 238 | 239 | @inproceedings{vashishtha-etal-2019-fine, 240 | title = "Fine-Grained Temporal Relation Extraction", 241 | author = "Vashishtha, Siddharth and 242 | Van Durme, Benjamin and 243 | White, Aaron Steven", 244 | booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics", 245 | month = jul, 246 | year = "2019", 247 | address = "Florence, Italy", 248 | publisher = "Association for Computational Linguistics", 249 | url = "https://www.aclweb.org/anthology/P19-1280", 250 | doi = "10.18653/v1/P19-1280", 251 | pages = "2906--2919" 252 | } 253 | 254 | 255 | **Notes** 256 | 257 | 1. The Time dataset has different formats for raw and normalized annotations. The duration attributes from the normalized version are each assigned an ordinal value in the raw version (in ascending order of duration), which is assigned to the single attribute ``duration``. 258 | 2. The document-level relation annotations are *only* available in the raw format and only starting in UDS2.0. 259 | 260 | Entity type 261 | ----------- 262 | 263 | **Project page** 264 | 265 | ``_ 266 | 267 | **Sentence-level attributes** 268 | 269 | ``supersense-noun.shape``, ``supersense-noun.process``, ``supersense-noun.relation``, ``supersense-noun.communication``, ``supersense-noun.time``, ``supersense-noun.plant``, ``supersense-noun.phenomenon``, ``supersense-noun.animal``, ``supersense-noun.state``, ``supersense-noun.substance``, ``supersense-noun.person``, ``supersense-noun.possession``, ``supersense-noun.Tops``, ``supersense-noun.object``, ``supersense-noun.event``, ``supersense-noun.artifact``, ``supersense-noun.act``, ``supersense-noun.body``, ``supersense-noun.attribute``, ``supersense-noun.quantity``, ``supersense-noun.motive``, ``supersense-noun.location``, ``supersense-noun.cognition``, ``supersense-noun.group``, ``supersense-noun.food``, ``supersense-noun.feeling`` 270 | 271 | **First UDS version** 272 | 273 | 1.0 274 | 275 | **Notes** 276 | 277 | 1. The key is called ``wordsense`` because the normalized annotations come from UDS-Word Sense (v1.0). 278 | 279 | **References** 280 | 281 | White, A.S., D. Reisinger, K. Sakaguchi, T. Vieira, S. Zhang, R. Rudinger, K. Rawlins, & B. Van Durme. 2016. `Universal Decompositional Semantics on Universal Dependencies`_. *Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing*, pages 1713–1723, Austin, Texas, November 1-5, 2016. 282 | 283 | .. code-block:: latex 284 | 285 | @inproceedings{white-etal-2016-universal, 286 | title = "Universal Decompositional Semantics on {U}niversal {D}ependencies", 287 | author = "White, Aaron Steven and 288 | Reisinger, Dee Ann and 289 | Sakaguchi, Keisuke and 290 | Vieira, Tim and 291 | Zhang, Sheng and 292 | Rudinger, Rachel and 293 | Rawlins, Kyle and 294 | Van Durme, Benjamin", 295 | booktitle = "Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing", 296 | month = nov, 297 | year = "2016", 298 | address = "Austin, Texas", 299 | publisher = "Association for Computational Linguistics", 300 | url = "https://www.aclweb.org/anthology/D16-1177", 301 | doi = "10.18653/v1/D16-1177", 302 | pages = "1713--1723", 303 | } 304 | 305 | 306 | Semantic Proto-Roles 307 | -------------------- 308 | 309 | **Project page** 310 | 311 | ``_ 312 | 313 | **Sentence-level attributes** 314 | 315 | ``was_used``, ``purpose``, ``partitive``, ``location``, ``instigation``, ``existed_after``, ``time``, ``awareness``, ``change_of_location``, ``manner``, ``sentient``, ``was_for_benefit``, ``change_of_state_continuous``, ``existed_during``, ``change_of_possession``, ``existed_before``, ``volition``, ``change_of_state`` 316 | 317 | **References** 318 | 319 | Reisinger, D., R. Rudinger, F. Ferraro, C. Harman, K. Rawlins, & B. Van Durme. (2015). `Semantic Proto-Roles`_. *Transactions of the Association for Computational Linguistics 3*:475–488. 320 | 321 | White, A.S., D. Reisinger, K. Sakaguchi, T. Vieira, S. Zhang, R. Rudinger, K. Rawlins, & B. Van Durme. 2016. `Universal Decompositional Semantics on Universal Dependencies`_. *Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing*, pages 1713–1723, Austin, Texas, November 1-5, 2016. 322 | 323 | .. _Semantic Proto-Roles: https://www.aclweb.org/anthology/Q15-1034 324 | 325 | .. code-block:: latex 326 | 327 | @article{reisinger-etal-2015-semantic, 328 | title = "Semantic Proto-Roles", 329 | author = "Reisinger, Dee Ann and 330 | Rudinger, Rachel and 331 | Ferraro, Francis and 332 | Harman, Craig and 333 | Rawlins, Kyle and 334 | Van Durme, Benjamin", 335 | journal = "Transactions of the Association for Computational Linguistics", 336 | volume = "3", 337 | year = "2015", 338 | url = "https://www.aclweb.org/anthology/Q15-1034", 339 | doi = "10.1162/tacl_a_00152", 340 | pages = "475--488", 341 | } 342 | 343 | @inproceedings{white-etal-2016-universal, 344 | title = "Universal Decompositional Semantics on {U}niversal {D}ependencies", 345 | author = "White, Aaron Steven and 346 | Reisinger, Dee Ann and 347 | Sakaguchi, Keisuke and 348 | Vieira, Tim and 349 | Zhang, Sheng and 350 | Rudinger, Rachel and 351 | Rawlins, Kyle and 352 | Van Durme, Benjamin", 353 | booktitle = "Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing", 354 | month = nov, 355 | year = "2016", 356 | address = "Austin, Texas", 357 | publisher = "Association for Computational Linguistics", 358 | url = "https://www.aclweb.org/anthology/D16-1177", 359 | doi = "10.18653/v1/D16-1177", 360 | pages = "1713--1723", 361 | } 362 | 363 | 364 | Event structure 365 | --------------- 366 | 367 | **Project page** 368 | 369 | ``_ 370 | 371 | **Sentence-level attributes** 372 | 373 | *normalized* 374 | 375 | 376 | ``distributive``, ``dynamic``, ``natural_parts``, ``part_similarity``, ``telic``, ``avg_part_duration_lbound-centuries``, ``avg_part_duration_ubound-centuries``, ``situation_duration_lbound-centuries``, ``situation_duration_ubound-centuries``, ``avg_part_duration_lbound-days``, ``avg_part_duration_ubound-days``, ``situation_duration_lbound-days``, ``situation_duration_ubound-days``, ``avg_part_duration_lbound-decades``, ``avg_part_duration_ubound-decades``, ``situation_duration_lbound-decades``, ``situation_duration_ubound-decades``, ``avg_part_duration_lbound-forever``, ``avg_part_duration_ubound-forever``, ``situation_duration_lbound-forever``, ``situation_duration_ubound-forever``, ``avg_part_duration_lbound-fractions_of_a_second``, ``avg_part_duration_ubound-fractions_of_a_second``, ``situation_duration_lbound-fractions_of_a_second``, ``situation_duration_ubound-fractions_of_a_second``, ``avg_part_duration_lbound-hours``, ``avg_part_duration_ubound-hours``, ``situation_duration_lbound-hours``, ``situation_duration_ubound-hours``, ``avg_part_duration_lbound-instant``, ``avg_part_duration_ubound-instant``, ``situation_duration_lbound-instant``, ``situation_duration_ubound-instant``, ``avg_part_duration_lbound-minutes``, ``avg_part_duration_ubound-minutes``, ``situation_duration_lbound-minutes``, ``situation_duration_ubound-minutes``, ``avg_part_duration_lbound-months``, ``avg_part_duration_ubound-months``, ``situation_duration_lbound-months``, ``situation_duration_ubound-months``, ``avg_part_duration_lbound-seconds``, ``avg_part_duration_ubound-seconds``, ``situation_duration_lbound-seconds``, ``situation_duration_ubound-seconds``, ``avg_part_duration_lbound-weeks``, ``avg_part_duration_ubound-weeks``, ``situation_duration_lbound-weeks``, ``situation_duration_ubound-weeks``, ``avg_part_duration_lbound-years``, ``avg_part_duration_ubound-years``, ``situation_duration_lbound-years``, ``situation_duration_ubound-years`` 377 | 378 | *raw* 379 | 380 | ``dynamic``, ``natural_parts``, ``part_similarity``, ``telic``, ``avg_part_duration_lbound``, ``avg_part_duration_ubound``, ``situation_duration_lbound``, ``situation_duration_ubound`` 381 | 382 | 383 | **Document-level attributes** 384 | 385 | ``pred1_contains_pred2``, ``pred2_contains_pred1`` 386 | 387 | **First UDS version** 388 | 389 | 2.0 390 | 391 | **Notes** 392 | 393 | 1. Whether ``dynamic``, ``situation_duration_lbound``, and ``situation_duration_ubound`` are answered or ``part_similarity``, ``avg_part_duration_lbound``, and ``avg_part_duration_ubound`` are answered is dependent on the answer an annotator gives to ``natural_parts``. Thus, not all node attributes will necessarily be present on all nodes. 394 | 395 | **References** 396 | 397 | Gantt, W., L. Glass, & A.S. White. 2021. `Decomposing and Recomposing Event Structure`_. arXiv:2103.10387 [cs.CL]. 398 | 399 | 400 | .. _Decomposing and Recomposing Event Structure: https://arxiv.org/abs/2103.10387 401 | 402 | .. code-block:: latex 403 | 404 | @misc{gantt2021decomposing, 405 | title={Decomposing and Recomposing Event Structure}, 406 | author={William Gantt and Lelia Glass and Aaron Steven White}, 407 | year={2021}, 408 | eprint={2103.10387}, 409 | archivePrefix={arXiv}, 410 | primaryClass={cs.CL} 411 | } 412 | 413 | 414 | 415 | -------------------------------------------------------------------------------- /docs/source/data/sentence-graphs.rst: -------------------------------------------------------------------------------- 1 | `PredPatt`_ Sentence Graphs 2 | =========================== 3 | 4 | .. _PredPatt: https://github.com/hltcoe/PredPatt 5 | 6 | The semantic graphs that form the second layer of annotation in the 7 | dataset are produced by the PredPatt_ system. PredPatt takes as input 8 | a UD parse for a single sentence and produces a set of predicates and 9 | set of arguments of each predicate in that sentence. Both predicates 10 | and arguments are associated with a single head token in the sentence 11 | as well as a set of tokens that make up the predicate or argument (its 12 | span). Predicate or argument spans may be trivial in only containinig 13 | the head token. 14 | 15 | For example, given the dependency parse for the sentence *Chris gave 16 | the book to Pat .*, PredPatt produces the following. 17 | 18 | :: 19 | 20 | ?a gave ?b to ?c 21 | ?a: Chris 22 | ?b: the book 23 | ?c: Pat 24 | 25 | Assuming UD's 1-indexation, the single predicate in this sentence 26 | (*gave...to*) has a head at position 2 and a span over positions {2, 27 | 5}. This predicate has three arguments, one headed by *Chris* at 28 | position 1, with span over position {1}; one headed by *book* at 29 | position 4, with span over positions {3, 4}; and one headed by *Pat* 30 | at position 6, with span over position {6}. 31 | 32 | See the `PredPatt documentation tests`_ for examples. 33 | 34 | .. _PredPatt documentation tests: https://github.com/hltcoe/PredPatt/blob/master/doc/DOCTEST.md 35 | 36 | Each predicate and argument produced by PredPatt is associated with a 37 | node in a digraph with identifier 38 | ``ewt-SPLIT-SENTNUM-semantics-TYPE-HEADTOKNUM``, where ``TYPE`` is 39 | always either ``pred`` or ``arg`` and ``HEADTOKNUM`` is the ordinal 40 | position of the head token within the sentence (1-indexed, following 41 | the convention in UD-EWT). At minimum, each such node has the 42 | following attributes. 43 | 44 | - ``domain`` (``str``): the subgraph this node is part of (always ``semantics``) 45 | - ``type`` (``str``): the type of the object in the particular domain (either ``predicate`` or ``argument``) 46 | - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``) 47 | 48 | Predicate and argument nodes produced by PredPatt furthermore always 49 | have at least one outgoing *instance* edge that points to nodes in the 50 | syntax domain that correspond to the associated span of the predicate 51 | or argument. At minimum, each such edge has the following attributes. 52 | 53 | - ``domain`` (``str``): the subgraph this node is part of (always ``interface``) 54 | - ``type`` (``str``): the type of the object in the particular domain (either ``head`` or ``nonhead``) 55 | - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``) 56 | 57 | Because PredPatt produces a unique head for each predicate and 58 | argument, there is always exactly one instance edge of type ``head`` 59 | from any particular node in the semantics domain. There may or may not 60 | be instance edges of type ``nonhead``. 61 | 62 | In addition to instance edges, predicate nodes always have exactly one 63 | outgoing edge connecting them to each of the nodes corresponding to 64 | their arguments. At minimum, each such edge has the following 65 | attributes. 66 | 67 | - ``domain`` (``str``): the subgraph this node is part of (always ``semantics``) 68 | - ``type`` (``str``): the type of the object in the particular domain (always ``dependency``) 69 | - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``) 70 | 71 | There is one special case where an argument nodes has an outgoing edge 72 | that points to a predicate node: clausal subordination. 73 | 74 | For example, given the dependency parse for the sentence *Gene thought 75 | that Chris gave the book to Pat .*, PredPatt produces the following. 76 | 77 | :: 78 | 79 | ?a thinks ?b 80 | ?a: Gene 81 | ?b: SOMETHING := that Chris gave the book to Pat 82 | 83 | ?a gave ?b to ?c 84 | ?a: Chris 85 | ?b: the book 86 | ?c: Pat 87 | 88 | In this case, the second argument of the predicate headed by *thinks* 89 | is the argument *that Chris gave the book to Pat*, which is headed by 90 | *gave*. This argument is associated with a node of type ``argument`` 91 | with span over positions {3, 4, 5, 6, 7, 8, 9} and identifier 92 | ``ewt-SPLIT-SENTNUM-semantics-arg-5``. In addition, there is a 93 | predicate headed by *gave*. This predicate is associated with a node 94 | with span over positions {5, 8} and identifier 95 | ``ewt-SPLIT-SENTNUM-semantics-pred-5``. Node 96 | ``ewt-SPLIT-SENTNUM-semantics-arg-5`` then has an outgoing edge 97 | pointing to ``ewt-SPLIT-SENTNUM-semantics-pred-5``. At minimum, each 98 | such edge has the following attributes. 99 | 100 | - ``domain`` (``str``): the subgraph this node is part of (always ``semantics``) 101 | - ``type`` (``str``): the type of the object in the particular domain (always ``head``) 102 | - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``) 103 | 104 | The ``type`` attribute in this case has the same value as instance 105 | edges, but crucially the ``domain`` attribute is distinct. In the case 106 | of instance edges, it is ``interface`` and in the case of clausal 107 | subordination, it is ``semantics``. This matters when making queries 108 | against the graph. 109 | 110 | If the ``frompredpatt`` attribute has value ``True``, it is guaranteed 111 | that the only semantics edges of type ``head`` are ones that involve 112 | clausal subordination like the above. This is not guaranteed for nodes 113 | for which the ``frompredpatt`` attribute has value ``False``. 114 | 115 | Every semantic graph contains at least four additional *performative* 116 | nodes that are note produced by PredPatt (and thus, for which the 117 | ``frompredpatt`` attribute has value ``False``). 118 | 119 | - ``ewt-SPLIT-SENTNUM-semantics-arg-0``: an argument node representing the entire sentence in the same way complement clauses are represented 120 | - ``ewt-SPLIT-SENTNUM-semantics-pred-root``: a predicate node representing the author's production of the entire sentence directed at the addressee 121 | - ``ewt-SPLIT-SENTNUM-semantics-arg-speaker``: an argument node representing the author 122 | - ``ewt-SPLIT-SENTNUM-semantics-arg-addressee``: an argument node representing the addressee 123 | 124 | All of these nodes have a ``domain`` attribute with value ``semantics``. Unlike nodes associated with PredPatt predicates and arguments, ``ewt-SPLIT-SENTNUM-semantics-pred-root``, ``ewt-SPLIT-SENTNUM-semantics-arg-speaker``, and ``ewt-SPLIT-SENTNUM-semantics-arg-addressee`` have no instance edges connecting them to syntactic nodes. In contrast, ``ewt-SPLIT-SENTNUM-semantics-arg-0`` has an instance head edge to ``ewt-SPLIT-SENTNUM-root-0``. 125 | 126 | The ``ewt-SPLIT-SENTNUM-semantics-arg-0`` node has semantics head edges to each of the predicate nodes in the graph that are not dominated by any other semantics node. This node, in addition to ``ewt-SPLIT-SENTNUM-semantics-arg-speaker`` and ``ewt-SPLIT-SENTNUM-semantics-arg-addressee``, has a dependency edge to ``ewt-SPLIT-SENTNUM-semantics-pred-root``. 127 | 128 | These nodes are included for purposes of forward compatibility. None of them currently have attributes, but future releases of decomp will include annotations on either them or their edges. 129 | -------------------------------------------------------------------------------- /docs/source/data/syntactic-graphs.rst: -------------------------------------------------------------------------------- 1 | `Universal Dependencies`_ Syntactic Graphs 2 | ========================================== 3 | 4 | .. _Universal Dependencies: https://universaldependencies.org/ 5 | 6 | The syntactic graphs that form the first layer of annotation in the dataset come from gold UD dependency parses provided in the UD-EWT_ treebank, which contains sentences from the Linguistic Data Consortium's constituency parsed EWT_. UD-EWT has predefined training (``train``), development (``dev``), and test (``test``) data in corresponding files in `CoNLL-U format`_: ``en_ewt-ud-train.conllu``, ``en_ewt-ud-dev.conllu``, and ``en_ewt-ud-test.conllu``. Henceforth, ``SPLIT`` ranges over ``train``, ``dev``, and ``test``. 7 | 8 | .. _UD-EWT: https://github.com/UniversalDependencies/UD_English-EWT 9 | .. _EWT: https://catalog.ldc.upenn.edu/LDC2012T13 10 | .. _CoNLL-U format: https://universaldependencies.org/format.html 11 | 12 | In UDS, each dependency parsed sentence in UD-EWT is represented as a rooted_ `directed graph`_ (digraph). Each graph's identifier takes the form ``ewt-SPLIT-SENTNUM``, where ``SENTNUM`` is the ordinal position (1-indexed) of the sentence within ``en_ewt-ud-SPLIT.conllu``. 13 | 14 | .. _rooted: https://en.wikipedia.org/wiki/Rooted_graph 15 | .. _directed graph: https://en.wikipedia.org/wiki/Directed_graph 16 | 17 | Each token in a sentence is associated with a node with identifier ``ewt-SPLIT-SENTNUM-syntax-TOKNUM``, where ``TOKNUM`` is the token's ordinal position within the sentence (1-indexed, following the convention in UD-EWT). At minimum, each node has the following attributes. 18 | 19 | - ``position`` (``int``): the ordinal position (``TOKNUM``) of that node as an integer (again, 1-indexed) 20 | - ``domain`` (``str``): the subgraph this node is part of (always ``syntax``) 21 | - ``type`` (``str``): the type of the object in the particular domain (always ``token``) 22 | - ``form`` (``str``): the actual token 23 | - ``lemma`` (``str``): the lemma corresponding to the actual token 24 | - ``upos`` (``str``): the UD part-of-speech tag 25 | - ``xpos`` (``str``): the Penn TreeBank part-of-speech tag 26 | - any attribute found in the features column of the CoNLL-U 27 | 28 | For information about the values ``upos``, ``xpos``, and the attributes contained in the features column can take on, see the `UD Guidelines`_. 29 | 30 | .. _UD Guidelines: https://universaldependencies.org/guidelines.html 31 | 32 | Each graph also has a special root node with identifier ``ewt-SPLIT-SENTNUM-root-0``. This node always has a ``position`` attribute set to ``0`` and ``domain`` and ``type`` attributes set to ``root``. 33 | 34 | Edges within the graph represent the grammatical relations (dependencies) annotated in UD-EWT. These dependencies are always represented as directed edges pointing from the head to the dependent. At minimum, each edge has the following attributes. 35 | 36 | - ``domain`` (``str``): the subgraph this node is part of (always ``syntax``) 37 | - ``type`` (``str``): the type of the object in the particular domain (always ``dependency``) 38 | - ``deprel`` (``str``): the UD dependency relation tag 39 | 40 | For information about the values ``deprel`` can take on, see the `UD Guidelines`_. 41 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Decomp: A toolkit for decompositional semantics 2 | =============================================== 3 | 4 | Decomp_ is a toolkit for working with the `Universal Decompositional 5 | Semantics (UDS) dataset`_, which is a collection of directed acyclic 6 | semantic graphs with real-valued node and edge attributes pointing 7 | into `Universal Dependencies`_ syntactic dependency trees. 8 | 9 | The toolkit is built on top of NetworkX_ and RDFLib_ making it 10 | straightforward to: 11 | 12 | - read the UDS dataset from its native JSON format 13 | - query both the syntactic and semantic subgraphs of UDS (as well as 14 | pointers between them) using SPARQL 1.1 queries 15 | - serialize UDS graphs to many common formats, such as Notation3_, 16 | N-Triples_, turtle_, and JSON-LD_, as well as any other format 17 | supported by NetworkX 18 | 19 | The toolkit was built by `Aaron Steven White`_ and is maintained by 20 | the `Decompositional Semantics Initiative`_. The UDS dataset was 21 | constructed from annotations collected by the `Decompositional 22 | Semantics Initiative`_. 23 | 24 | If you use either UDS or Decomp in your research, we ask that you cite the following paper: 25 | 26 | White, Aaron Steven, Elias Stengel-Eskin, Siddharth Vashishtha, Venkata Subrahmanyan Govindarajan, Dee Ann Reisinger, Tim Vieira, Keisuke Sakaguchi, et al. 2020. `The Universal Decompositional Semantics Dataset and Decomp Toolkit`_. *Proceedings of The 12th Language Resources and Evaluation Conference*, 5698–5707. Marseille, France: European Language Resources Association. 27 | 28 | .. code-block:: latex 29 | 30 | @inproceedings{white-etal-2020-universal, 31 | title = "The Universal Decompositional Semantics Dataset and Decomp Toolkit", 32 | author = "White, Aaron Steven and 33 | Stengel-Eskin, Elias and 34 | Vashishtha, Siddharth and 35 | Govindarajan, Venkata Subrahmanyan and 36 | Reisinger, Dee Ann and 37 | Vieira, Tim and 38 | Sakaguchi, Keisuke and 39 | Zhang, Sheng and 40 | Ferraro, Francis and 41 | Rudinger, Rachel and 42 | Rawlins, Kyle and 43 | Van Durme, Benjamin", 44 | booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference", 45 | month = may, 46 | year = "2020", 47 | address = "Marseille, France", 48 | publisher = "European Language Resources Association", 49 | url = "https://www.aclweb.org/anthology/2020.lrec-1.699", 50 | pages = "5698--5707", 51 | ISBN = "979-10-95546-34-4", 52 | } 53 | 54 | 55 | .. _Decomp: https://github.com/decompositional-semantics-initiative/decomp 56 | .. _Universal Decompositional Semantics (UDS) dataset: http://decomp.io 57 | .. _Universal Dependencies: https://universaldependencies.org/ 58 | .. _NetworkX: https://github.com/networkx/networkx 59 | .. _RDFLib: https://github.com/RDFLib/rdflib 60 | .. _matplotlib: https://matplotlib.org/ 61 | .. _D3: https://d3js.org/ 62 | .. _Notation3: https://www.w3.org/TeamSubmission/n3/ 63 | .. _N-Triples: https://www.w3.org/TR/n-triples/ 64 | .. _turtle: https://www.w3.org/TeamSubmission/turtle/ 65 | .. _JSON-LD: https://json-ld.org/ 66 | .. _Aaron Steven White: http://aaronstevenwhite.io/ 67 | .. _Decompositional Semantics Initiative: http://decomp.io/ 68 | .. _The Universal Decompositional Semantics Dataset and Decomp Toolkit: https://www.aclweb.org/anthology/2020.lrec-1.699/ 69 | 70 | .. toctree:: 71 | :maxdepth: 2 72 | :caption: Contents: 73 | 74 | install 75 | tutorial/index 76 | data/index 77 | package/index 78 | 79 | 80 | Indices and tables 81 | ================== 82 | 83 | * :ref:`genindex` 84 | * :ref:`modindex` 85 | * :ref:`search` 86 | -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | .. _install: 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | The most painless way to get started quickly is to use the included 8 | barebones Python 3.6-based Dockerfile. To build the image and start a 9 | python interactive prompt, use: 10 | 11 | .. code-block:: bash 12 | 13 | git clone git://gitlab.hltcoe.jhu.edu/aswhite/decomp.git 14 | cd decomp 15 | docker build -t decomp . 16 | docker run -it decomp python 17 | 18 | A jupyter notebook can then be opened in the standard way. 19 | 20 | Decomp can also be installed to a local environment using ``pip``. 21 | 22 | .. code-block:: bash 23 | 24 | pip install git+git://github.com/decompositional-semantics-initiative/decomp.git 25 | 26 | 27 | As an alternative to ``pip`` you can clone the decomp repository and use the included ``setup.py`` with the ``install`` flag. 28 | 29 | .. code-block:: bash 30 | 31 | git clone https://github.com/decompositional-semantics-initiative/decomp.git 32 | cd decomp 33 | pip install --user --no-cache-dir -r ./requirements.txt 34 | python setup.py install 35 | 36 | 37 | If you would like to install the package for the purposes of development, you can use the included ``setup.py`` with the ``develop`` flag. 38 | 39 | .. code-block:: bash 40 | 41 | git clone https://github.com/decompositional-semantics-initiative/decomp.git 42 | cd decomp 43 | pip install --user --no-cache-dir -r ./requirements.txt 44 | python setup.py develop 45 | 46 | 47 | If you have trouble installing via setup.py or pip on OS X Mojave, adding the following environment variables may help. 48 | 49 | .. code-block:: bash 50 | 51 | CXXFLAGS=-stdlib=libc++ CFLAGS=-stdlib=libc++ python setup.py install 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /docs/source/package/decomp.corpus.corpus.rst: -------------------------------------------------------------------------------- 1 | decomp.corpus.corpus 2 | ==================== 3 | 4 | .. automodule:: decomp.corpus.corpus 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/package/decomp.corpus.rst: -------------------------------------------------------------------------------- 1 | decomp.corpus 2 | ============= 3 | 4 | .. automodule:: decomp.corpus 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. toctree:: 10 | decomp.corpus.corpus 11 | -------------------------------------------------------------------------------- /docs/source/package/decomp.graph.nx.rst: -------------------------------------------------------------------------------- 1 | decomp.graph.nx 2 | =============== 3 | 4 | .. automodule:: decomp.graph.nx 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/package/decomp.graph.rdf.rst: -------------------------------------------------------------------------------- 1 | decomp.graph.rdf 2 | ================ 3 | 4 | .. automodule:: decomp.graph.rdf 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/package/decomp.graph.rst: -------------------------------------------------------------------------------- 1 | decomp.graph 2 | ============= 3 | 4 | .. automodule:: decomp.graph 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. toctree:: 10 | decomp.graph.rdf 11 | decomp.graph.nx 12 | -------------------------------------------------------------------------------- /docs/source/package/decomp.semantics.predpatt.rst: -------------------------------------------------------------------------------- 1 | decomp.semantics.predpatt 2 | ========================= 3 | 4 | .. automodule:: decomp.semantics.predpatt 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/package/decomp.semantics.rst: -------------------------------------------------------------------------------- 1 | decomp.semantics 2 | ================ 3 | 4 | .. automodule:: decomp.semantics 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. toctree:: 10 | decomp.semantics.predpatt 11 | decomp.semantics.uds 12 | -------------------------------------------------------------------------------- /docs/source/package/decomp.semantics.uds.annotation.rst: -------------------------------------------------------------------------------- 1 | decomp.semantics.uds.annotation 2 | =============================== 3 | 4 | .. automodule:: decomp.semantics.uds.annotation 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/package/decomp.semantics.uds.corpus.rst: -------------------------------------------------------------------------------- 1 | decomp.semantics.uds.corpus 2 | =========================== 3 | 4 | .. automodule:: decomp.semantics.uds.corpus 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/package/decomp.semantics.uds.document.rst: -------------------------------------------------------------------------------- 1 | decomp.semantics.uds.document 2 | ============================= 3 | 4 | .. automodule:: decomp.semantics.uds.document 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/package/decomp.semantics.uds.graph.rst: -------------------------------------------------------------------------------- 1 | decomp.semantics.uds.graph 2 | ========================== 3 | 4 | .. automodule:: decomp.semantics.uds.graph 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/package/decomp.semantics.uds.metadata.rst: -------------------------------------------------------------------------------- 1 | decomp.semantics.uds.metadata 2 | ============================= 3 | 4 | .. automodule:: decomp.semantics.uds.metadata 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/package/decomp.semantics.uds.rst: -------------------------------------------------------------------------------- 1 | decomp.semantics.uds 2 | ==================== 3 | 4 | .. automodule:: decomp.semantics.uds 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. toctree:: 10 | decomp.semantics.uds.corpus 11 | decomp.semantics.uds.document 12 | decomp.semantics.uds.graph 13 | decomp.semantics.uds.annotation 14 | decomp.semantics.uds.metadata 15 | -------------------------------------------------------------------------------- /docs/source/package/decomp.syntax.dependency.rst: -------------------------------------------------------------------------------- 1 | decomp.syntax.dependency 2 | ======================== 3 | 4 | .. automodule:: decomp.syntax.dependency 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/package/decomp.syntax.rst: -------------------------------------------------------------------------------- 1 | decomp.syntax 2 | ============= 3 | 4 | .. automodule:: decomp.syntax 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. toctree:: 10 | decomp.syntax.dependency 11 | -------------------------------------------------------------------------------- /docs/source/package/decomp.vis.rst: -------------------------------------------------------------------------------- 1 | decomp.vis 2 | ============= 3 | 4 | .. automodule:: decomp.vis 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. toctree:: 10 | decomp.vis.uds_vis 11 | -------------------------------------------------------------------------------- /docs/source/package/decomp.vis.uds_vis.rst: -------------------------------------------------------------------------------- 1 | decomp.vis.uds_vis 2 | ================== 3 | 4 | .. automodule:: decomp.vis.uds_vis 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/package/index.rst: -------------------------------------------------------------------------------- 1 | Package Reference 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | decomp.syntax 8 | decomp.semantics 9 | decomp.corpus 10 | decomp.graph 11 | decomp.vis 12 | -------------------------------------------------------------------------------- /docs/source/tutorial/assets/vis_genericity_no_syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_genericity_no_syntax.png -------------------------------------------------------------------------------- /docs/source/tutorial/assets/vis_no_protoroles_no_syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_no_protoroles_no_syntax.png -------------------------------------------------------------------------------- /docs/source/tutorial/assets/vis_no_protoroles_syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_no_protoroles_syntax.png -------------------------------------------------------------------------------- /docs/source/tutorial/assets/vis_no_syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_no_syntax.png -------------------------------------------------------------------------------- /docs/source/tutorial/assets/vis_node_props_no_syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_node_props_no_syntax.png -------------------------------------------------------------------------------- /docs/source/tutorial/assets/vis_node_props_syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_node_props_syntax.png -------------------------------------------------------------------------------- /docs/source/tutorial/assets/vis_protoroles_no_syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_protoroles_no_syntax.png -------------------------------------------------------------------------------- /docs/source/tutorial/assets/vis_protoroles_syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_protoroles_syntax.png -------------------------------------------------------------------------------- /docs/source/tutorial/assets/vis_syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_syntax.png -------------------------------------------------------------------------------- /docs/source/tutorial/index.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========= 3 | 4 | If you have not already :doc:`installed ` the decomp 5 | package, follow those instructions before continuing the tutorial. 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | :caption: Contents: 10 | 11 | quick-start 12 | reading 13 | querying 14 | serializing 15 | visualization 16 | -------------------------------------------------------------------------------- /docs/source/tutorial/querying.rst: -------------------------------------------------------------------------------- 1 | Querying UDS Graphs 2 | =================== 3 | 4 | Decomp provides a rich array of methods for querying UDS graphs: both 5 | pre-compiled and user-specified. Arbitrary user-specified graph 6 | queries can be performed using the `UDSSentenceGraph.query`_ instance 7 | method. This method accepts arbitrary SPARQL 1.1 queries, either as 8 | strings or as precompiled `Query`_ objects built using RDFlib's 9 | `prepareQuery`_. 10 | 11 | .. _UDSSentenceGraph.query: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph.query 12 | .. _Query: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.plugins.sparql.html#rdflib.plugins.sparql.sparql.Query 13 | .. _prepareQuery: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.plugins.sparql.html?highlight=preparequery#rdflib.plugins.sparql.processor.prepareQuery 14 | 15 | 16 | **NOTE:** Querying is not currently supported for document-level graphs 17 | (`UDSDocumentGraph`_ objects) or for sentence-level graphs that contain 18 | raw annotations (`RawUDSDataset`_). 19 | 20 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph 21 | .. _RawUDSDataset: ../package/decomp.semantics.uds.html#decomp.semantics.uds.RawUDSDataset 22 | 23 | Pre-compiled queries 24 | -------------------- 25 | 26 | For many use cases, the various instance attributes and methods for 27 | accessing nodes, edges, and their attributes in the UDS graphs will 28 | likely be sufficient; there is no need to use ``query``. For 29 | example, to get a dictionary mapping identifiers for syntax nodes in 30 | the UDS graph to their attributes, you can use: 31 | 32 | .. code-block:: python 33 | 34 | uds["ewt-train-12"].syntax_nodes 35 | 36 | To get a dictionary mapping identifiers for semantics nodes in the UDS 37 | graph to their attributes, you can use: 38 | 39 | .. code-block:: python 40 | 41 | uds["ewt-train-12"].semantics_nodes 42 | 43 | To get a dictionary mapping identifiers for semantics edges (tuples of 44 | node identifiers) in the UDS graph to their attributes, you can use: 45 | 46 | .. code-block:: python 47 | 48 | uds["ewt-train-12"].semantics_edges() 49 | 50 | To get a dictionary mapping identifiers for semantics edges (tuples of 51 | node identifiers) in the UDS graph involving the predicate headed by 52 | the 7th token to their attributes, you can use: 53 | 54 | .. code-block:: python 55 | 56 | uds["ewt-train-12"].semantics_edges('ewt-train-12-semantics-pred-7') 57 | 58 | To get a dictionary mapping identifiers for syntax edges (tuples of 59 | node identifiers) in the UDS graph to their attributes, you can use: 60 | 61 | .. code-block:: python 62 | 63 | uds["ewt-train-12"].syntax_edges() 64 | 65 | And to get a dictionary mapping identifiers for syntax edges (tuples 66 | of node identifiers) in the UDS graph involving the node for the 7th 67 | token to their attributes, you can use: 68 | 69 | .. code-block:: python 70 | 71 | uds["ewt-train-12"].syntax_edges('ewt-train-12-syntax-7') 72 | 73 | 74 | There are also methods for accessing relationships between semantics 75 | and syntax nodes. For example, you can get a tuple of the ordinal 76 | position for the head syntax node in the UDS graph that maps of the 77 | predicate headed by the 7th token in the corresponding sentence to a 78 | list of the form and lemma attributes for that token, you can use: 79 | 80 | .. code-block:: python 81 | 82 | uds["ewt-train-12"].head('ewt-train-12-semantics-pred-7', ['form', 'lemma']) 83 | 84 | And if you want the same information for every token in the span, you 85 | can use: 86 | 87 | .. code-block:: python 88 | 89 | uds["ewt-train-12"].span('ewt-train-12-semantics-pred-7', ['form', 'lemma']) 90 | 91 | This will return a dictionary mapping ordinal position for syntax 92 | nodes in the UDS graph that make of the predicate headed by the 7th 93 | token in the corresponding sentence to a list of the form and lemma 94 | attributes for the corresponding tokens. 95 | 96 | Custom queries 97 | -------------- 98 | 99 | Where the above methods generally turn out to be insufficient is in 100 | selecting nodes and edges on the basis of (combinations of their 101 | attributes). This is where having the full power of SPARQL comes in 102 | handy. This power comes with substantial slow downs in the speed of 103 | queries, however, so if you can do a query without using SPARQL you 104 | should try to. 105 | 106 | For example, if you were interested in extracting only predicates 107 | referring to events that likely happened and likely lasted for 108 | minutes, you could use: 109 | 110 | .. code-block:: python 111 | 112 | querystr = """ 113 | SELECT ?pred 114 | WHERE { ?pred ; 115 | ; 116 | ?factual ; 117 | ?duration 118 | FILTER ( ?factual > 0 && ?duration > 0 ) 119 | } 120 | """ 121 | 122 | results = {gid: graph.query(querystr, query_type='node', cache_rdf=False) 123 | for gid, graph in uds.items()} 124 | 125 | Or more tersely (but equivalently): 126 | 127 | .. code-block:: python 128 | 129 | results = uds.query(querystr, query_type='node', cache_rdf=False) 130 | 131 | Note that the ``query_type`` parameter is set to ``'node'``. This 132 | setting means that a dictionary mapping node identifiers to node 133 | attribute values will be returned. If no such query type is passed, an 134 | RDFLib `Result`_ object will be returned, which you will need to 135 | postprocess yourself. This is necessary if, for instance, you are 136 | making a ``CONSTRUCT``, ``ASK``, or ``DESCRIBE`` query. 137 | 138 | Also, note that the ``cache_rdf`` parameter is set to ``False``. This is a 139 | memory-saving measure, as ``UDSSentenceGraph.query`` implicitly builds an RDF 140 | graph on the backend, and these graphs can be quite large. Leaving 141 | ``cache_rdf`` at its defaults of ``True`` will substantially speed up 142 | later queries at the expense of sometimes substantial memory costs. 143 | 144 | .. _Result: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.query.Result 145 | 146 | Constraints can also make reference to node and edge attributes of 147 | other nodes. For instance, if you were interested in extracting all 148 | predicates referring to events that are likely spatiotemporally 149 | delimited and have at least one spatiotemporally delimited participant 150 | that was volitional in the event, you could use: 151 | 152 | .. code-block:: python 153 | 154 | querystr = """ 155 | SELECT DISTINCT ?node 156 | WHERE { ?node ?edge ?arg ; 157 | ; 158 | ; 159 | ?predparticular 160 | FILTER ( ?predparticular > 0 ) . 161 | ?arg ; 162 | ; 163 | ?argparticular 164 | FILTER ( ?argparticular > 0 ) . 165 | ?edge ?volition 166 | FILTER ( ?volition > 0 ) . 167 | } 168 | """ 169 | 170 | results = uds.query(querystr, query_type='node', cache_rdf=False) 171 | 172 | Disjunctive constraints are also possible. For instance, for the last 173 | query, if you were interested in either volitional or sentient 174 | arguments, you could use: 175 | 176 | .. code-block:: python 177 | 178 | querystr = """ 179 | SELECT DISTINCT ?node 180 | WHERE { ?node ?edge ?arg ; 181 | ; 182 | ; 183 | ?predparticular 184 | FILTER ( ?predparticular > 0 ) . 185 | ?arg ; 186 | ; 187 | ?argparticular 188 | FILTER ( ?argparticular > 0 ) . 189 | { ?edge ?volition 190 | FILTER ( ?volition > 0 ) 191 | } UNION 192 | { ?edge ?sentient 193 | FILTER ( ?sentient > 0 ) 194 | } 195 | } 196 | """ 197 | 198 | results = uds.query(querystr, query_type='node', cache_rdf=False) 199 | 200 | Beyond returning node attributes based on complex constraints, you can 201 | also return edge attributes. For instance, for the last query, if you 202 | were interested in all the attributes of edges connecting predicates 203 | and arguments satisfying the constraints of the last query, you could 204 | simply change which variable is bound by ``SELECT`` and set 205 | ``query_type`` to ``'edge'``. 206 | 207 | .. code-block:: python 208 | 209 | querystr = """ 210 | SELECT ?edge 211 | WHERE { ?node ?edge ?arg ; 212 | ; 213 | ; 214 | ?predparticular 215 | FILTER ( ?predparticular > 0 ) . 216 | ?arg ; 217 | ; 218 | ?argparticular 219 | FILTER ( ?argparticular > 0 ) . 220 | { ?edge ?volition 221 | FILTER ( ?volition > 0 ) 222 | } UNION 223 | { ?edge ?sentient 224 | FILTER ( ?sentient > 0 ) 225 | } 226 | } 227 | """ 228 | 229 | results = uds.query(querystr, query_type='edge', cache_rdf=False) 230 | -------------------------------------------------------------------------------- /docs/source/tutorial/quick-start.rst: -------------------------------------------------------------------------------- 1 | Quick Start 2 | =========== 3 | 4 | To read the Universal Decompositional Semantics (UDS) dataset, use: 5 | 6 | .. code-block:: python 7 | 8 | from decomp import UDSCorpus 9 | 10 | uds = UDSCorpus() 11 | 12 | This imports a `UDSCorpus`_ object ``uds``, which contains all 13 | graphs across all splits in the data. If you would like a corpus, 14 | e.g., containing only a particular split, see other loading options in 15 | :doc:`reading`. 16 | 17 | .. _UDSCorpus: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus 18 | 19 | The first time you read UDS, it will take several minutes to 20 | complete while the dataset is built from the `Universal Dependencies 21 | English Web Treebank`_, which is not shipped with the package (but is 22 | downloaded automatically on import in the background), and the `UDS 23 | annotations`_, which are shipped with the package. Subsequent uses 24 | will be faster, since the dataset is cached on build. 25 | 26 | .. _Universal Dependencies English Web Treebank: https://github.com/UniversalDependencies/UD_English-EWT 27 | .. _UDS annotations: http://decomp.io/data/ 28 | 29 | `UDSSentenceGraph`_ objects in the corpus can be accessed using standard 30 | dictionary getters or iteration. For instance, to get the UDS graph 31 | corresponding to the 12th sentence in ``en-ud-train.conllu``, you can 32 | use: 33 | 34 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph 35 | 36 | .. code-block:: python 37 | 38 | uds["ewt-train-12"] 39 | 40 | To access documents (`UDSDocument`_ objects, each of which has an associated 41 | `UDSDocumentGraph`_), you can use: 42 | 43 | .. _UDSDocument: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument 44 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph 45 | 46 | .. code-block:: python 47 | 48 | uds.documents["reviews-112579"] 49 | 50 | 51 | To get the associated document graph, use: 52 | 53 | .. code-block:: python 54 | 55 | uds.documents["reviews-112579"].document_graph 56 | 57 | 58 | More generally, ``UDSCorpus`` objects behave like dictionaries. For 59 | example, to print all the sentence-level graph identifiers in the corpus 60 | (e.g. ``"ewt-train-12"``), you can use: 61 | 62 | .. code-block:: python 63 | 64 | for graphid in uds: 65 | print(graphid) 66 | 67 | 68 | To print all the document identifiers in the corpus, which correspond 69 | directly to English Web Treebank file IDs (e.g. ``"reviews-112579"``), you 70 | can use: 71 | 72 | .. code-block:: python 73 | 74 | for documentid in uds.documents: 75 | print(documentid) 76 | 77 | 78 | Similarly, to print all the sentence-level graph identifiers in the corpus 79 | (e.g. ``"ewt-train-12"``) along with the corresponding sentence, you can use: 80 | 81 | .. code-block:: python 82 | 83 | for graphid, graph in uds.items(): 84 | print(graphid) 85 | print(graph.sentence) 86 | 87 | 88 | Likewise, the following will print all document identifiers, along with each 89 | document's entire text: 90 | 91 | .. code-block:: python 92 | 93 | for documentid, document in uds.documents.items(): 94 | print(documentid) 95 | print(document.text) 96 | 97 | 98 | A list of sentence-level graph identifiers can also be accessed via the 99 | ``graphids`` attribute of the UDSCorpus. A mapping from these identifiers 100 | and the corresponding graph can be accessed via the ``graphs`` attribute. 101 | 102 | .. code-block:: python 103 | 104 | # a list of the sentence-level graph identifiers in the corpus 105 | uds.graphids 106 | 107 | # a dictionary mapping the sentence-level 108 | # graph identifiers to the corresponding graph 109 | uds.graphs 110 | 111 | 112 | A list of document identifiers can also be accessed via the ``document_ids`` 113 | attribute of the UDSCorpus: 114 | 115 | .. code-block:: python 116 | 117 | uds.document_ids 118 | 119 | 120 | For sentence-level graphs, there are various instance attributes and 121 | methods for accessing nodes, edges, and their attributes in the UDS 122 | sentence-level graphs. For example, to get a dictionary mapping identifiers for syntax nodes in a sentence-level graph to their attributes, you can use: 123 | 124 | .. code-block:: python 125 | 126 | uds["ewt-train-12"].syntax_nodes 127 | 128 | To get a dictionary mapping identifiers for semantics nodes in the UDS 129 | graph to their attributes, you can use: 130 | 131 | .. code-block:: python 132 | 133 | uds["ewt-train-12"].semantics_nodes 134 | 135 | To get a dictionary mapping identifiers for semantics edges (tuples of 136 | node identifiers) in the UDS graph to their attributes, you can use: 137 | 138 | .. code-block:: python 139 | 140 | uds["ewt-train-12"].semantics_edges() 141 | 142 | To get a dictionary mapping identifiers for semantics edges (tuples of 143 | node identifiers) in the UDS graph involving the predicate headed by 144 | the 7th token to their attributes, you can use: 145 | 146 | .. code-block:: python 147 | 148 | uds["ewt-train-12"].semantics_edges('ewt-train-12-semantics-pred-7') 149 | 150 | To get a dictionary mapping identifiers for syntax edges (tuples of 151 | node identifiers) in the UDS graph to their attributes, you can use: 152 | 153 | .. code-block:: python 154 | 155 | uds["ewt-train-12"].syntax_edges() 156 | 157 | And to get a dictionary mapping identifiers for syntax edges (tuples 158 | of node identifiers) in the UDS graph involving the node for the 7th 159 | token to their attributes, you can use: 160 | 161 | .. code-block:: python 162 | 163 | uds["ewt-train-12"].syntax_edges('ewt-train-12-syntax-7') 164 | 165 | 166 | There are also methods for accessing relationships between semantics 167 | and syntax nodes. For example, you can get a tuple of the ordinal 168 | position for the head syntax node in the UDS graph that maps of the 169 | predicate headed by the 7th token in the corresponding sentence to a 170 | list of the form and lemma attributes for that token, you can use: 171 | 172 | .. code-block:: python 173 | 174 | uds["ewt-train-12"].head('ewt-train-12-semantics-pred-7', ['form', 'lemma']) 175 | 176 | And if you want the same information for every token in the span, you 177 | can use: 178 | 179 | .. code-block:: python 180 | 181 | uds["ewt-train-12"].span('ewt-train-12-semantics-pred-7', ['form', 'lemma']) 182 | 183 | This will return a dictionary mapping ordinal position for syntax 184 | nodes in the UDS graph that make of the predicate headed by the 7th 185 | token in the corresponding sentence to a list of the form and lemma 186 | attributes for the corresponding tokens. 187 | 188 | More complicated queries of a sentence-level UDS graph can be performed 189 | using the ``query`` method, which accepts arbitrary SPARQL 1.1 queries. See 190 | :doc:`querying` for details. 191 | 192 | Queries on document-level graphs are not currently supported. However, each 193 | `UDSDocument`_ does contain a number of useful attributes, including its ``genre`` 194 | (corresponding to the English Web Treebank subcorpus); its ``text`` (as 195 | demonstrated above); its ``timestamp``; the ``sentence_ids`` of its 196 | constituent sentences; and the sentence-level graphs (``sentence_graphs``) 197 | associated with those sentences. Additionally, one can also look up the 198 | semantics node associated with a particular node in the document graph via 199 | the `semantics_node`_ instance method. 200 | 201 | .. _UDSDocument: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument 202 | .. _semantics_node: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument.semantics_node 203 | 204 | 205 | Lastly, iterables for the nodes and edges of a document-level graph may be 206 | accessed as follows: 207 | 208 | 209 | .. code-block:: python 210 | 211 | uds.documents["reviews-112579"].document_graph.nodes 212 | uds.documents["reviews-112579"].document_graph.edges 213 | 214 | 215 | Unlike the nodes and edges in a sentence-level graph, the ones in a document- 216 | level graph all share a common (``document``) domain. By default, document 217 | graphs are initialized without edges and with one node for each semantics node 218 | in the sentence-level graphs associated with the constituent sentences. Edges 219 | may be added by supplying annotations (see :doc:`reading`). 220 | -------------------------------------------------------------------------------- /docs/source/tutorial/reading.rst: -------------------------------------------------------------------------------- 1 | Reading the UDS dataset 2 | ======================= 3 | 4 | The most straightforward way to read the Universal Decompositional 5 | Semantics (UDS) dataset is to import it. 6 | 7 | .. code-block:: python 8 | 9 | from decomp import UDSCorpus 10 | 11 | uds = UDSCorpus() 12 | 13 | This loads a `UDSCorpus`_ object ``uds``, which contains all 14 | graphs across all splits in the data. 15 | 16 | .. _UDSCorpus: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus 17 | 18 | As noted in :doc:`quick-start`, the first time you do read UDS, it 19 | will take several minutes to complete while the dataset is built from 20 | the `Universal Dependencies English Web Treebank`_ (UD-EWT), which is not 21 | shipped with the package (but is downloaded automatically on import in 22 | the background), and the `UDS annotations`_, which are shipped with 23 | the package as package data. Normalized annotations are loaded by default. 24 | To load raw annotations, specify ``"raw"`` as the argument to the UDSCorpus 25 | ``annotation_format`` keyword arugment as follows: 26 | 27 | .. code-block:: python 28 | 29 | from decomp import UDSCorpus 30 | 31 | uds = UDSCorpus(annotation_format="raw") 32 | 33 | (See `Adding annotations`_ below for more detail on annotation types.) 34 | Subsequent uses of the corpus will be faster after the initial build, 35 | since the built dataset is cached. 36 | 37 | .. _Universal Dependencies English Web Treebank: https://github.com/UniversalDependencies/UD_English-EWT 38 | .. _UDS annotations: http://decomp.io/data/ 39 | 40 | Standard splits 41 | --------------- 42 | 43 | If you would rather read only the graphs in the training, development, 44 | or test split, you can do that by specifying the ``split`` parameter 45 | of ``UDSCorpus``. 46 | 47 | .. code-block:: python 48 | 49 | from decomp import UDSCorpus 50 | 51 | # read the train split of the UDS corpus 52 | uds_train = UDSCorpus(split='train') 53 | 54 | Adding annotations 55 | ------------------ 56 | 57 | Additional annotations beyond the standard UDS annotations can be 58 | added using this method by passing a list of `UDSAnnotation`_ 59 | objects. These annotations can be added at two levels: the sentence level 60 | and the document level. Sentence-level annotations contain attributes of 61 | `UDSSentenceGraph`_ nodes or edges. Document-level annotations contain 62 | attributes for `UDSDocumentGraph`_ nodes or edges. Document-level 63 | edge annotations may relate nodes associated with different sentences 64 | in a document, although they are added as annotations only to the 65 | the appropriate `UDSDocumentGraph`_. 66 | 67 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph 68 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph 69 | .. _UDSAnnotation: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSAnnotation 70 | 71 | Sentence-level and document-level annotations share the same two in-memory 72 | representations: ``RawUDSDataset`` and ``NormalizedUDSDataset``. The former 73 | may have multiple annotations for the same node or edge attribute, while the 74 | latter must have only a single annotation. Both are loaded from 75 | JSON-formatted files, but differ in the expected format (see the 76 | `from_json`_ methods of each class for formatting guidelines). For example, 77 | if you have some additional *normalized* sentence-level annotations in a file 78 | ``new_annotations.json``, those can be added to the existing UDS annotations 79 | using: 80 | 81 | .. _NormalizedUDSDataset: ../package/decomp.semantics.uds.html#decomp.semantics.uds.NormalizedUDSDataset 82 | .. _from_json: ../package/decomp.semantics.uds.html#decomp.semantics.uds.NormalizedUDSDataset.from_json 83 | 84 | .. code-block:: python 85 | 86 | from decomp import NormalizedUDSDataset 87 | 88 | # read annotations 89 | new_annotations = [NormalizedUDSDataset.from_json("new_annotations.json")] 90 | 91 | # read the train split of the UDS corpus and append new annotations 92 | uds_train_plus = UDSCorpus(split='train', sentence_annotations=new_annotations) 93 | 94 | If instead you wished to add *raw* annotations (and supposing those 95 | annotations were still in "new_annotations.json"), you would do the following: 96 | 97 | .. code-block:: python 98 | 99 | from decomp import RawUDSDataset 100 | 101 | # read annotations 102 | new_annotations = [RawUDSDataset.from_json("new_annotations.json")] 103 | 104 | # read the train split of the UDS corpus and append new annotations 105 | uds_train_plus = UDSCorpus(split='train', sentence_annotations=new_annotations, 106 | annotation_format="raw") 107 | 108 | If ``new_annotations.json`` contained document-level annotations 109 | you would pass ``new_annotations.json`` to the constructor keyword 110 | argument ``document_annotations`` instead of to ``sentence_annotations``. 111 | Importantly, these annotations are added *in addition* to the existing 112 | UDS annotations that ship with the toolkit. You do not need to add these 113 | manually. 114 | 115 | Finally, it should be noted that querying is currently **not** supported 116 | for document-level graphs or for sentence-level graphs containing raw 117 | annotations. 118 | 119 | Reading from an alternative location 120 | ------------------------------------ 121 | 122 | If you would like to read the dataset from an alternative 123 | location—e.g. if you have serialized the dataset to JSON, using the 124 | `to_json`_ instance method—this can be accomplished using 125 | ``UDSCorpus`` class methods (see :doc:`serializing` for more 126 | information on serialization). For example, if you serialize 127 | ``uds_train`` to the files ``uds-ewt-sentences-train.json`` (for 128 | sentences) and ``uds-ewt-documents-train.json`` (for the documents), 129 | you can read it back into memory using: 130 | 131 | .. _to_json: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus.to_json 132 | 133 | .. code-block:: python 134 | 135 | # serialize uds_train to JSON 136 | uds_train.to_json("uds-ewt-sentences-train.json", "uds-ewt-documents-train.json") 137 | 138 | # read JSON serialized uds_train 139 | uds_train = UDSCorpus.from_json("uds-ewt-sentences-train.json", "uds-ewt-documents-train.json") 140 | 141 | Rebuilding the corpus 142 | --------------------- 143 | 144 | If you would like to rebuild the corpus from the UD-EWT CoNLL files 145 | and some set of JSON-formatted annotation files, you can use the 146 | analogous `from_conll`_ class method. Importantly, unlike the 147 | standard instance initialization described above, the UDS annotations 148 | are *not* automatically added. For example, if ``en-ud-train.conllu`` 149 | is in the current working directory and you have already loaded 150 | ``new_annotations`` as above, a corpus containing only those 151 | annotations (without the UDS annotations) can be loaded using: 152 | 153 | .. _from_conll: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus.from_conll 154 | 155 | .. code-block:: python 156 | 157 | # read the train split of the UD corpus and append new annotations 158 | uds_train_annotated = UDSCorpus.from_conll("en-ud-train.conllu", sentence_annotations=new_annotations) 159 | 160 | This also means that if you only want the semantic graphs as implied 161 | by PredPatt (without annotations), you can use the ``from_conll`` 162 | class method to load them. 163 | 164 | .. code-block:: python 165 | 166 | # read the train split of the UD corpus 167 | ud_train = UDSCorpus.from_conll("en-ud-train.conllu") 168 | 169 | Note that, because PredPatt is used for predicate-argument extraction, 170 | only versions of UD-EWT that are compatible with PredPatt can be used 171 | here. Version 1.2 is suggested. 172 | 173 | Though other serialization formats are available (see 174 | :doc:`serializing`), these formats are not yet supported for reading. 175 | -------------------------------------------------------------------------------- /docs/source/tutorial/serializing.rst: -------------------------------------------------------------------------------- 1 | Serializing the UDS dataset 2 | =========================== 3 | 4 | The canonical serialization format for the Universal Decompositional 5 | Semantics (UDS) dataset is JSON. Sentence- and document-level graphs 6 | are serialized separately. For example, if you wanted to serialize 7 | the entire UDS dataset to the files ``uds-sentence.json`` (for 8 | sentences) and ``uds-document.json`` (for documents), you would use: 9 | 10 | .. code-block:: python 11 | 12 | from decomp import uds 13 | 14 | uds.to_json("uds-sentence.json", "uds-document.json") 15 | 16 | The particular format is based directly on the `adjacency_data`_ 17 | method implemented in `NetworkX`_ 18 | 19 | .. _adjacency_data: https://networkx.github.io/documentation/stable/reference/readwrite/generated/networkx.readwrite.json_graph.adjacency_data.html#networkx.readwrite.json_graph.adjacency_data 20 | .. _NetworkX: https://github.com/networkx/networkx 21 | 22 | For the sentence-level graphs only, in addition to this JSON format, 23 | any serialization format supported by `RDFLib`_ can also be used by 24 | accessing the `rdf`_ attribute of each `UDSSentenceGraph`_ object. 25 | This attribute exposes an `rdflib.graph.Graph`_ object, which implements 26 | a `serialize`_ method. By default, this method outputs rdf/xml. The 27 | ``format`` parameter can also be set to ``'n3'``, ``'turtle'``, 28 | ``'nt'``, ``'pretty-xml'``, ``'trix'``, ``'trig'``, or ``'nquads'``; 29 | and additional formats, such as JSON-LD, can be supported by installing 30 | plugins for RDFLib. 31 | 32 | .. _serialize: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.graph.Graph.serialize 33 | .. _rdf: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph.rdf 34 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph 35 | .. _rdflib.graph.Graph: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#graph-module 36 | 37 | Before considering serialization to such a format, be aware that only 38 | the JSON format mentioned above can be read by the 39 | toolkit. Additionally, note that if your aim is to query the graphs in 40 | the corpus, this can be done using the `query`_ instance method in 41 | ``UDSSentenceGraph``. See :doc:`querying` for details. 42 | 43 | .. _RDFLib: https://github.com/RDFLib/rdflib 44 | .. _query: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph.query 45 | -------------------------------------------------------------------------------- /docs/source/tutorial/visualization.rst: -------------------------------------------------------------------------------- 1 | Visualizing UDS Graphs 2 | ====================== 3 | 4 | Decomp comes with a built-in interactive visualization tool using the `UDSVisualization`_ object. This object visualizes a `UDSSentenceGraph`_. 5 | 6 | .. _UDSVisualization: ../package/decomp.vis.uds_vis.html#decomp.vis.uds_vis.UDSVisualization 7 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph 8 | 9 | A visualization (which is based on `Dash`_) is served to your local browser via port 8050 (e.g. `http://localhost:8050`). 10 | The following snippet visualizes the first graph in the dev split: 11 | 12 | .. _Dash: https://dash.plotly.com 13 | 14 | 15 | .. code-block:: python 16 | 17 | graph = uds["ewt-dev-1"] 18 | vis = UDSVisualization(graph) 19 | vis.serve() 20 | 21 | The browser window will look like this: 22 | 23 | .. image:: assets/vis_no_syntax.png 24 | 25 | Black edges indicate edges in the semantic graph, while gray arrows are instance edges between semantics and syntax nodes. 26 | Thick gray arrows indicate the syntactic head of a semantic argument or predicate. 27 | Semantics nodes have a thick outline when they are annotated with decomp properties. 28 | Hovering over such a node will reveal the annotations in a pop-out window. 29 | 30 | .. image:: assets/vis_node_props_no_syntax.png 31 | 32 | Similarly, yellow boxes on edges indicate protorole annotations, and can be hovered over to reveal their values. 33 | 34 | .. image:: assets/vis_protoroles_no_syntax.png 35 | 36 | Using the checkboxes at the top left, annotation subspaces can be selected and de-selected. 37 | If all the annotations for a node or edge are de-selected, it will become non-bolded or disappear 38 | 39 | .. image:: assets/vis_no_protoroles_no_syntax.png 40 | 41 | 42 | Several options can be supplied to a visualization via arguments. For example, we can visualize the syntactic parse along with the semantic parse by setting 43 | 44 | .. code-block:: python 45 | 46 | vis = UDSVisualization(graph, add_syntax_edges = True) 47 | 48 | which results in the following visualization. 49 | 50 | 51 | .. image:: assets/vis_syntax.png 52 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.22.0 2 | networkx>=2.5.1 3 | memoized_property==1.0.3 4 | typing==3.6.2 5 | rdflib==4.2.2 6 | setuptools>=52.0.0 7 | numpy>=1.16.4 8 | pyparsing==2.2.0 9 | overrides==3.1.0 10 | http://github.com/hltcoe/PredPatt/tarball/master#egg=predpatt 11 | dash[testing]==1.9.1 12 | selenium==3.141.0 13 | jsonpickle==1.4.1 14 | pytest==6.2.2 15 | matplotlib==3.2.1 16 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup(name='decomp', 4 | version='0.2.2', 5 | description='Toolkit for working with Universal\ 6 | Decompositional Semantics graphs', 7 | url='https://decomp.io/', 8 | author='Aaron Steven White', 9 | author_email='aaron.white@rochester.edu', 10 | license='MIT', 11 | packages=find_packages(), 12 | package_dir={'decomp': 'decomp'}, 13 | package_data={'decomp': ['data/*']}, 14 | install_requires=['requests==2.22.0', 15 | 'networkx>=2.5.1', 16 | 'memoized_property==1.0.3', 17 | 'overrides==3.1.0', 18 | 'typing==3.6.2', 19 | 'rdflib==4.2.2', 20 | 'setuptools>=52.0.0', 21 | 'numpy>=1.16.4', 22 | 'pyparsing==2.2.0', 23 | 'predpatt @ http://github.com/hltcoe/PredPatt/tarball/master#egg=predpatt'], 24 | test_suite='nose.collector', 25 | tests_require=['nose'], 26 | include_package_data=True, 27 | zip_safe=False) 28 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | This directory contains the tests for the [Decomp 2 | toolkit](https://github.com/decompositional-semantics-initiative/decomp). Theses 3 | tests use the [`pytest` framework](https://docs.pytest.org/). 4 | 5 | # Installation 6 | 7 | To run the tests in this directory, ensure that both the toolkit and 8 | `pytest` are installed. 9 | 10 | ```bash 11 | pip install --user pytest==6.0.* git+git://github.com/decompositional-semantics-initiative/decomp.git 12 | ``` 13 | 14 | # Running the test suite 15 | 16 | The entire test suite can be run from the root directory of the 17 | toolkit installation using: 18 | 19 | ```bash 20 | pytest 21 | ``` 22 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import os 4 | 5 | from decomp.semantics.uds.annotation import NormalizedUDSAnnotation 6 | from decomp.semantics.uds.annotation import RawUDSAnnotation 7 | 8 | def pytest_configure(config): 9 | config.addinivalue_line( 10 | "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')" 11 | ) 12 | 13 | def pytest_addoption(parser): 14 | parser.addoption( 15 | "--runslow", action="store_true", default=False, help="run slow tests" 16 | ) 17 | 18 | def pytest_collection_modifyitems(config, items): 19 | if config.getoption("--runslow"): 20 | # --runslow given in cli: do not skip slow tests 21 | return 22 | 23 | skip_slow = pytest.mark.skip(reason="need --runslow option to run") 24 | 25 | for item in items: 26 | if "slow" in item.keywords: 27 | item.add_marker(skip_slow) 28 | 29 | @pytest.fixture 30 | def test_dir(): 31 | return os.path.dirname(os.path.abspath(__file__)) 32 | 33 | @pytest.fixture 34 | def test_data_dir(test_dir): 35 | return os.path.join(test_dir, 'data/') 36 | 37 | 38 | @pytest.fixture 39 | def normalized_node_sentence_annotation(test_data_dir): 40 | fpath = os.path.join(test_data_dir, 'normalized_node_sentence_annotation.json') 41 | 42 | with open(fpath) as f: 43 | return f.read() 44 | 45 | @pytest.fixture 46 | def normalized_edge_sentence_annotation(test_data_dir): 47 | fpath = os.path.join(test_data_dir, 'normalized_edge_sentence_annotation.json') 48 | 49 | with open(fpath) as f: 50 | return f.read() 51 | 52 | @pytest.fixture 53 | def normalized_sentence_annotations(normalized_node_sentence_annotation, 54 | normalized_edge_sentence_annotation): 55 | norm_node_ann = NormalizedUDSAnnotation.from_json(normalized_node_sentence_annotation) 56 | norm_edge_ann = NormalizedUDSAnnotation.from_json(normalized_edge_sentence_annotation) 57 | 58 | return norm_node_ann, norm_edge_ann 59 | 60 | @pytest.fixture 61 | def raw_node_sentence_annotation(test_data_dir): 62 | fpath = os.path.join(test_data_dir, 'raw_node_sentence_annotation.json') 63 | 64 | with open(fpath) as f: 65 | return f.read() 66 | 67 | @pytest.fixture 68 | def raw_edge_sentence_annotation(test_data_dir): 69 | fpath = os.path.join(test_data_dir, 'raw_edge_sentence_annotation.json') 70 | 71 | with open(fpath) as f: 72 | return f.read() 73 | 74 | @pytest.fixture 75 | def raw_sentence_annotations(raw_node_sentence_annotation, 76 | raw_edge_sentence_annotation): 77 | raw_node_ann = RawUDSAnnotation.from_json(raw_node_sentence_annotation) 78 | raw_edge_ann = RawUDSAnnotation.from_json(raw_edge_sentence_annotation) 79 | 80 | return raw_node_ann, raw_edge_ann 81 | -------------------------------------------------------------------------------- /tests/data/normalized_edge_document_annotation.json: -------------------------------------------------------------------------------- 1 | {"answers-20111105112131AA6gIX6_ans": {"ewt-train-7192-document-pred-20%%ewt-train-7192-document-arg-2": {"protoroles": {"instigation": {"confidence": 1.0, "value": -0.0}, "change_of_possession": {"confidence": 1.0, "value": -0.0}, "existed_before": {"confidence": 0.6796, "value": 0.0111}, "was_for_benefit": {"confidence": 1.0, "value": -0.0}, "change_of_state_continuous": {"confidence": 0.1675, "value": 0.0032}, "change_of_state": {"confidence": 0.1675, "value": 0.0032}, "volition": {"confidence": 1.0, "value": -0.0}, "change_of_location": {"confidence": 1.0, "value": -0.0}, "partitive": {"confidence": 0.564, "value": -0.0941}, "existed_during": {"confidence": 1.0, "value": 1.3421}, "existed_after": {"confidence": 0.6796, "value": 0.0111}, "awareness": {"confidence": 1.0, "value": -0.0}, "sentient": {"confidence": 1.0, "value": -0.9348}, "was_used": {"confidence": 0.564, "value": -0.0}}}, "ewt-train-7192-document-pred-25%%ewt-train-7191-document-arg-18": {"protoroles": {"instigation": {"confidence": 1.0, "value": 1.3557}, "change_of_possession": {"confidence": 0.7724, "value": -0.0}, "existed_before": {"confidence": 1.0, "value": 1.3527}, "was_for_benefit": {"confidence": 0.1976, "value": -0.0504}, "change_of_state_continuous": {"confidence": 1.0, "value": -0.0}, "change_of_state": {"confidence": 0.2067, "value": -0.0548}, "volition": {"confidence": 1.0, "value": 1.3545}, "change_of_location": {"confidence": 0.272, "value": -0.0922}, "partitive": {"confidence": 0.1148, "value": -0.0018}, "existed_during": {"confidence": 1.0, "value": 1.3557}, "existed_after": {"confidence": 1.0, "value": 1.3527}, "awareness": {"confidence": 1.0, "value": 1.3526}, "sentient": {"confidence": 1.0, "value": 1.354}, "was_used": {"confidence": 0.4373, "value": -0.0207}}}, "ewt-train-7192-document-pred-20%%ewt-train-7190-document-arg-3": {"protoroles": {"instigation": {"confidence": 1.0, "value": -1.5074}, "change_of_possession": {"confidence": 1.0, "value": -0.3909}, "existed_before": {"confidence": 1.0, "value": 1.3954}, "was_for_benefit": {"confidence": 0.3418, "value": 0.0008}, "change_of_state_continuous": {"confidence": 0.0791, "value": -0.0351}, "change_of_state": {"confidence": 0.3333, "value": -0.0085}, "volition": {"confidence": 1.0, "value": -0.3909}, "change_of_location": {"confidence": 0.1395, "value": -0.0549}, "partitive": {"confidence": 0.0791, "value": -0.1354}, "existed_during": {"confidence": 1.0, "value": 1.3959}, "existed_after": {"confidence": 0.6567, "value": 0.124}, "awareness": {"confidence": 0.1395, "value": -0.0549}, "sentient": {"confidence": 1.0, "value": -1.508}, "was_used": {"confidence": 0.3333, "value": -0.0085}}}}} 2 | -------------------------------------------------------------------------------- /tests/data/normalized_edge_sentence_annotation.json: -------------------------------------------------------------------------------- 1 | {"metadata": {"protoroles": {"awareness": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_location": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_possession": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_state": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_state_continuous": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "existed_after": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "existed_before": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "existed_during": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "instigation": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "location": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "manner": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "partitive": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "purpose": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "sentient": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "time": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "volition": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "was_for_benefit": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "was_used": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}}}, "data": {"tree1": {"tree1-semantics-pred-11%%tree1-semantics-arg-13": {"protoroles": {"instigation": {"confidence": 1.0, "value": -0.0}, "change_of_possession": {"confidence": 1.0, "value": -0.0}, "existed_before": {"confidence": 0.6796, "value": 0.0111}, "was_for_benefit": {"confidence": 1.0, "value": -0.0}, "change_of_state_continuous": {"confidence": 0.1675, "value": 0.0032}, "change_of_state": {"confidence": 0.1675, "value": 0.0032}, "volition": {"confidence": 1.0, "value": -0.0}, "change_of_location": {"confidence": 1.0, "value": -0.0}, "partitive": {"confidence": 0.564, "value": -0.0941}, "existed_during": {"confidence": 1.0, "value": 1.3421}, "existed_after": {"confidence": 0.6796, "value": 0.0111}, "awareness": {"confidence": 1.0, "value": -0.0}, "sentient": {"confidence": 1.0, "value": -0.9348}, "was_used": {"confidence": 0.564, "value": -0.0}}}, "tree1-semantics-pred-7%%tree1-semantics-arg-3": {"protoroles": {"instigation": {"confidence": 1.0, "value": 1.3557}, "change_of_possession": {"confidence": 0.7724, "value": -0.0}, "existed_before": {"confidence": 1.0, "value": 1.3527}, "was_for_benefit": {"confidence": 0.1976, "value": -0.0504}, "change_of_state_continuous": {"confidence": 1.0, "value": -0.0}, "change_of_state": {"confidence": 0.2067, "value": -0.0548}, "volition": {"confidence": 1.0, "value": 1.3545}, "change_of_location": {"confidence": 0.272, "value": -0.0922}, "partitive": {"confidence": 0.1148, "value": -0.0018}, "existed_during": {"confidence": 1.0, "value": 1.3557}, "existed_after": {"confidence": 1.0, "value": 1.3527}, "awareness": {"confidence": 1.0, "value": 1.3526}, "sentient": {"confidence": 1.0, "value": 1.354}, "was_used": {"confidence": 0.4373, "value": -0.0207}}}, "tree1-semantics-pred-11%%tree1-semantics-arg-9": {"protoroles": {"instigation": {"confidence": 1.0, "value": -1.5074}, "change_of_possession": {"confidence": 1.0, "value": -0.3909}, "existed_before": {"confidence": 1.0, "value": 1.3954}, "was_for_benefit": {"confidence": 0.3418, "value": 0.0008}, "change_of_state_continuous": {"confidence": 0.0791, "value": -0.0351}, "change_of_state": {"confidence": 0.3333, "value": -0.0085}, "volition": {"confidence": 1.0, "value": -0.3909}, "change_of_location": {"confidence": 0.1395, "value": -0.0549}, "partitive": {"confidence": 0.0791, "value": -0.1354}, "existed_during": {"confidence": 1.0, "value": 1.3959}, "existed_after": {"confidence": 0.6567, "value": 0.124}, "awareness": {"confidence": 0.1395, "value": -0.0549}, "sentient": {"confidence": 1.0, "value": -1.508}, "was_used": {"confidence": 0.3333, "value": -0.0085}}}}}} 2 | -------------------------------------------------------------------------------- /tests/data/normalized_node_document_annotation.json: -------------------------------------------------------------------------------- 1 | {"answers-20111105112131AA6gIX6_ans": {"ewt-train-7189-document-arg-2": {"genericity": {"arg-kind": {"confidence": 1.0, "value": 1.1619}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7192-document-pred-25": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.54}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "ewt-train-7191-document-arg-18": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7192-document-pred-20": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.5399}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "ewt-train-7192-document-pred-20": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": -1.5399}, "pred-hypothetical": {"confidence": 1.0, "value": 0.7748}, "pred-particular": {"confidence": 1.0, "value": -1.54}}}, "ewt-train-7194-document-arg-13": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7194-document-arg-1": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7192-document-arg-2": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}}} 2 | -------------------------------------------------------------------------------- /tests/data/normalized_node_sentence_annotation.json: -------------------------------------------------------------------------------- 1 | {"metadata": {"genericity": {"pred-dynamic": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "pred-hypothetical": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "pred-particular": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "arg-abstract": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "arg-kind": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "arg-particular": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}}}, "data": {"tree1": {"tree1-semantics-arg-15": {"genericity": {"arg-kind": {"confidence": 1.0, "value": 1.1619}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-pred-7": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.54}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "tree1-semantics-arg-3": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-pred-11": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.5399}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "tree1-semantics-pred-20": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": -1.5399}, "pred-hypothetical": {"confidence": 1.0, "value": 0.7748}, "pred-particular": {"confidence": 1.0, "value": -1.54}}}, "tree1-semantics-arg-23": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-arg-9": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-arg-13": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}}}} 2 | -------------------------------------------------------------------------------- /tests/data/raw_edge_sentence_annotators.txt: -------------------------------------------------------------------------------- 1 | protoroles-annotator-0 2 | protoroles-annotator-1 3 | protoroles-annotator-10 4 | protoroles-annotator-11 5 | protoroles-annotator-12 6 | protoroles-annotator-13 7 | protoroles-annotator-14 8 | protoroles-annotator-15 9 | protoroles-annotator-16 10 | protoroles-annotator-17 11 | protoroles-annotator-18 12 | protoroles-annotator-19 13 | protoroles-annotator-2 14 | protoroles-annotator-20 15 | protoroles-annotator-21 16 | protoroles-annotator-22 17 | protoroles-annotator-23 18 | protoroles-annotator-24 19 | protoroles-annotator-25 20 | protoroles-annotator-26 21 | protoroles-annotator-27 22 | protoroles-annotator-28 23 | protoroles-annotator-29 24 | protoroles-annotator-3 25 | protoroles-annotator-30 26 | protoroles-annotator-31 27 | protoroles-annotator-32 28 | protoroles-annotator-33 29 | protoroles-annotator-34 30 | protoroles-annotator-35 31 | protoroles-annotator-36 32 | protoroles-annotator-37 33 | protoroles-annotator-38 34 | protoroles-annotator-39 35 | protoroles-annotator-4 36 | protoroles-annotator-40 37 | protoroles-annotator-41 38 | protoroles-annotator-42 39 | protoroles-annotator-43 40 | protoroles-annotator-44 41 | protoroles-annotator-45 42 | protoroles-annotator-5 43 | protoroles-annotator-6 44 | protoroles-annotator-7 45 | protoroles-annotator-8 46 | protoroles-annotator-9 47 | -------------------------------------------------------------------------------- /tests/data/rawtree.conllu: -------------------------------------------------------------------------------- 1 | 1 The the DET DT Definite=Def|PronType=Art 3 det _ _ 2 | 2 police police NOUN NN Number=Sing 3 compound _ _ 3 | 3 commander commander NOUN NN Number=Sing 7 nsubj _ _ 4 | 4 of of ADP IN _ 6 case _ _ 5 | 5 Ninevah Ninevah PROPN NNP Number=Sing 6 compound _ _ 6 | 6 Province Province PROPN NNP Number=Sing 3 nmod _ _ 7 | 7 announced announce VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 0 root _ _ 8 | 8 that that SCONJ IN _ 11 mark _ _ 9 | 9 bombings bombing NOUN NNS Number=Plur 11 nsubj _ _ 10 | 10 had have AUX VBD Mood=Ind|Tense=Past|VerbForm=Fin 11 aux _ _ 11 | 11 declined decline VERB VBN Tense=Past|VerbForm=Part 7 ccomp _ _ 12 | 12 80 80 NUM CD NumType=Card 13 nummod _ _ 13 | 13 percent percent NOUN NN Number=Sing 11 dobj _ _ 14 | 14 in in ADP IN _ 15 case _ _ 15 | 15 Mosul Mosul PROPN NNP Number=Sing 11 nmod _ SpaceAfter=No 16 | 16 , , PUNCT , _ 11 punct _ _ 17 | 17 whereas whereas SCONJ IN _ 20 mark _ _ 18 | 18 there there PRON EX _ 20 expl _ _ 19 | 19 had have AUX VBD Mood=Ind|Tense=Past|VerbForm=Fin 20 aux _ _ 20 | 20 been be VERB VBN Tense=Past|VerbForm=Part 11 advcl _ _ 21 | 21 a a DET DT Definite=Ind|PronType=Art 23 det _ _ 22 | 22 big big ADJ JJ Degree=Pos 23 amod _ _ 23 | 23 jump jump NOUN NN Number=Sing 20 nsubj _ _ 24 | 24 in in ADP IN _ 26 case _ _ 25 | 25 the the DET DT Definite=Def|PronType=Art 26 det _ _ 26 | 26 number number NOUN NN Number=Sing 23 nmod _ _ 27 | 27 of of ADP IN _ 28 case _ _ 28 | 28 kidnappings kidnapping NOUN NNS Number=Plur 26 nmod _ SpaceAfter=No 29 | 29 . . PUNCT . _ 7 punct _ _ -------------------------------------------------------------------------------- /tests/data/vis_data.json: -------------------------------------------------------------------------------- 1 | {"directed": true, "multigraph": false, "graph": [["name", "ewt-dev-1"]], "nodes": [{"domain": "syntax", "type": "token", "position": 1, "form": "From", "lemma": "from", "upos": "ADP", "xpos": "IN", "id": "ewt-dev-1-syntax-1"}, {"domain": "syntax", "type": "token", "position": 2, "form": "the", "lemma": "the", "upos": "DET", "xpos": "DT", "Definite": "Def", "PronType": "Art", "id": "ewt-dev-1-syntax-2"}, {"domain": "syntax", "type": "token", "position": 3, "form": "AP", "lemma": "AP", "upos": "PROPN", "xpos": "NNP", "Number": "Sing", "id": "ewt-dev-1-syntax-3"}, {"domain": "syntax", "type": "token", "position": 4, "form": "comes", "lemma": "come", "upos": "VERB", "xpos": "VBZ", "Mood": "Ind", "Number": "Sing", "Person": "3", "Tense": "Pres", "VerbForm": "Fin", "id": "ewt-dev-1-syntax-4"}, {"domain": "syntax", "type": "token", "position": 5, "form": "this", "lemma": "this", "upos": "DET", "xpos": "DT", "Number": "Sing", "PronType": "Dem", "id": "ewt-dev-1-syntax-5"}, {"domain": "syntax", "type": "token", "position": 6, "form": "story", "lemma": "story", "upos": "NOUN", "xpos": "NN", "Number": "Sing", "id": "ewt-dev-1-syntax-6"}, {"domain": "syntax", "type": "token", "position": 7, "form": ":", "lemma": ":", "upos": "PUNCT", "xpos": ":", "id": "ewt-dev-1-syntax-7"}, {"position": 0, "domain": "root", "type": "root", "id": "ewt-dev-1-root-0"}, {"domain": "semantics", "frompredpatt": true, "type": "predicate", "factuality": {"factual": {"confidence": 1.0, "value": 0.967}}, "time": {"dur-weeks": {"confidence": 0.2564, "value": -1.3247}, "dur-decades": {"confidence": 0.2564, "value": -1.1146}, "dur-days": {"confidence": 0.2564, "value": 0.8558}, "dur-hours": {"confidence": 0.2564, "value": 0.9952}, "dur-seconds": {"confidence": 0.2564, "value": 0.8931}, "dur-forever": {"confidence": 0.2564, "value": -1.4626}, "dur-centuries": {"confidence": 0.2564, "value": -1.1688}, "dur-instant": {"confidence": 0.2564, "value": -1.4106}, "dur-years": {"confidence": 0.2564, "value": 0.9252}, "dur-minutes": {"confidence": 0.2564, "value": -0.9337}, "dur-months": {"confidence": 0.2564, "value": -1.2142}}, "genericity": {"pred-dynamic": {"confidence": 0.627, "value": -0.0469}, "pred-hypothetical": {"confidence": 0.5067, "value": -0.0416}, "pred-particular": {"confidence": 1.0, "value": 1.1753}}, "id": "ewt-dev-1-semantics-pred-4"}, {"domain": "semantics", "frompredpatt": true, "type": "argument", "genericity": {"arg-kind": {"confidence": 1.0, "value": -1.1642}, "arg-abstract": {"confidence": 1.0, "value": -1.1642}, "arg-particular": {"confidence": 1.0, "value": 1.2257}}, "id": "ewt-dev-1-semantics-arg-3"}, {"domain": "semantics", "frompredpatt": true, "type": "argument", "wordsense": {"supersense-noun.object": {"confidence": 1.0, "value": -3.0}, "supersense-noun.Tops": {"confidence": 1.0, "value": -3.0}, "supersense-noun.quantity": {"confidence": 1.0, "value": -3.0}, "supersense-noun.feeling": {"confidence": 1.0, "value": -3.0}, "supersense-noun.food": {"confidence": 1.0, "value": -3.0}, "supersense-noun.shape": {"confidence": 1.0, "value": -3.0}, "supersense-noun.event": {"confidence": 1.0, "value": -3.0}, "supersense-noun.motive": {"confidence": 1.0, "value": -3.0}, "supersense-noun.substance": {"confidence": 1.0, "value": -3.0}, "supersense-noun.time": {"confidence": 1.0, "value": -3.0}, "supersense-noun.person": {"confidence": 1.0, "value": -3.0}, "supersense-noun.process": {"confidence": 1.0, "value": -3.0}, "supersense-noun.attribute": {"confidence": 1.0, "value": -3.0}, "supersense-noun.artifact": {"confidence": 1.0, "value": -1.3996}, "supersense-noun.group": {"confidence": 1.0, "value": -3.0}, "supersense-noun.animal": {"confidence": 1.0, "value": -3.0}, "supersense-noun.location": {"confidence": 1.0, "value": -3.0}, "supersense-noun.plant": {"confidence": 1.0, "value": -3.0}, "supersense-noun.possession": {"confidence": 1.0, "value": -3.0}, "supersense-noun.relation": {"confidence": 1.0, "value": -3.0}, "supersense-noun.phenomenon": {"confidence": 1.0, "value": -3.0}, "supersense-noun.cognition": {"confidence": 1.0, "value": -3.0}, "supersense-noun.act": {"confidence": 1.0, "value": -3.0}, "supersense-noun.state": {"confidence": 1.0, "value": -3.0}, "supersense-noun.communication": {"confidence": 1.0, "value": 0.2016}, "supersense-noun.body": {"confidence": 1.0, "value": -3.0}}, "genericity": {"arg-kind": {"confidence": 0.7138, "value": -0.035}, "arg-abstract": {"confidence": 1.0, "value": -1.1685}, "arg-particular": {"confidence": 1.0, "value": 1.2257}}, "id": "ewt-dev-1-semantics-arg-6"}, {"domain": "semantics", "type": "predicate", "frompredpatt": false, "id": "ewt-dev-1-semantics-pred-root"}, {"domain": "semantics", "type": "argument", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-0"}, {"domain": "semantics", "type": "argument", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-author"}, {"domain": "semantics", "type": "argument", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-addressee"}], "adjacency": [[], [], [{"deprel": "case", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-1"}, {"deprel": "det", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-2"}], [{"deprel": "nmod", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-3"}, {"deprel": "nsubj", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-6"}, {"deprel": "punct", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-7"}], [], [{"deprel": "det", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-5"}], [], [{"deprel": "root", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-4"}], [{"domain": "interface", "type": "head", "id": "ewt-dev-1-syntax-4"}, {"domain": "interface", "type": "nonhead", "id": "ewt-dev-1-syntax-1"}, {"domain": "semantics", "type": "dependency", "frompredpatt": true, "protoroles": {"manner": {"confidence": 1.0, "value": -1.3932}, "location": {"confidence": 1.0, "value": 1.4353}, "time": {"confidence": 1.0, "value": -1.3913}, "purpose": {"confidence": 1.0, "value": -1.3941}}, "id": "ewt-dev-1-semantics-arg-3"}, {"domain": "semantics", "type": "dependency", "frompredpatt": true, "protoroles": {"instigation": {"confidence": 0.1128, "value": 0.0458}, "change_of_possession": {"confidence": 0.7669, "value": -0.0561}, "existed_before": {"confidence": 0.1128, "value": 0.1096}, "was_for_benefit": {"confidence": 0.7669, "value": -0.1343}, "change_of_state_continuous": {"confidence": 1.0, "value": -0.0}, "change_of_state": {"confidence": 0.7669, "value": -0.1343}, "volition": {"confidence": 0.3073, "value": -0.0}, "change_of_location": {"confidence": 0.7669, "value": -0.0561}, "partitive": {"confidence": 0.5736, "value": -0.2656}, "existed_during": {"confidence": 0.4211, "value": 0.236}, "existed_after": {"confidence": 0.4211, "value": 0.236}, "awareness": {"confidence": 0.7669, "value": -0.0}, "sentient": {"confidence": 0.4612, "value": -0.3556}, "was_used": {"confidence": 0.013, "value": -0.0204}}, "id": "ewt-dev-1-semantics-arg-6"}], [{"domain": "interface", "type": "head", "id": "ewt-dev-1-syntax-3"}, {"domain": "interface", "type": "nonhead", "id": "ewt-dev-1-syntax-2"}], [{"domain": "interface", "type": "head", "id": "ewt-dev-1-syntax-6"}, {"domain": "interface", "type": "nonhead", "id": "ewt-dev-1-syntax-5"}], [{"domain": "semantics", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-0"}, {"domain": "semantics", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-author"}, {"domain": "semantics", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-addressee"}], [{"domain": "semantics", "type": "head", "frompredpatt": false, "id": "ewt-dev-1-semantics-pred-4"}, {"domain": "interface", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-root-0"}], [], []]} -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest==6.0.* 2 | -------------------------------------------------------------------------------- /tests/test_dependency.py: -------------------------------------------------------------------------------- 1 | from numpy import array 2 | from networkx import DiGraph 3 | from decomp.syntax.dependency import DependencyGraphBuilder, CoNLLDependencyTreeCorpus 4 | 5 | rawtree = '''1 I I PRON PRP Case=Nom|Number=Sing|Person=1|PronType=Prs 4 nsubj _ _ 6 | 2 ca can AUX MD VerbForm=Fin 4 aux _ SpaceAfter=No 7 | 3 n't not PART RB _ 4 advmod _ _ 8 | 4 imagine imagine VERB VB VerbForm=Inf 0 root _ _ 9 | 5 they they PRON PRP Case=Nom|Number=Plur|Person=3|PronType=Prs 6 nsubj _ _ 10 | 6 wanted want VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 4 ccomp _ _ 11 | 7 to to PART TO _ 8 mark _ _ 12 | 8 do do VERB VB VerbForm=Inf 6 xcomp _ _ 13 | 9 this this PRON DT Number=Sing|PronType=Dem 8 obj _ SpaceAfter=No 14 | 10 . . PUNCT . _ 4 punct _ _''' 15 | 16 | sentence = "I ca n't imagine they wanted to do this ." 17 | 18 | listtree = [l.split() for l in rawtree.split('\n')] 19 | 20 | 21 | def setup_tree(): 22 | # build and extract tree 23 | graph = DependencyGraphBuilder().from_conll(listtree, 'tree1') 24 | 25 | return graph 26 | 27 | 28 | def setup_corpus(): 29 | listtrees = {'tree1': listtree, 30 | 'tree2': listtree} 31 | 32 | corpus = CoNLLDependencyTreeCorpus(listtrees) 33 | 34 | return corpus 35 | 36 | 37 | # could use @nose.with_setup 38 | def test_dependency_tree_builder(): 39 | tree = setup_tree() 40 | 41 | assert tree.name == 'tree1' 42 | assert (tree.graph['conll'] == array(listtree)).all() 43 | 44 | print(tree.nodes['tree1-root-0']) 45 | # test the root 46 | # test syntax nodes 47 | assert tree.nodes['tree1-root-0'] == {'position': 0, 48 | 'domain': 'root', 49 | 'type': 'root'} 50 | 51 | for idx, node in tree.nodes.items(): 52 | for row in listtree: 53 | if int(row[0]) == idx: 54 | assert node['form'] == row[1] 55 | assert node['lemma'] == row[2] 56 | assert node['upos'] == row[3] 57 | assert node['xpos'] == row[4] 58 | 59 | for (idx1, idx2), edge in tree.edges.items(): 60 | for row in listtree: 61 | if int(row[0]) == idx2: 62 | assert int(row[6]) == idx1 63 | assert row[7] == edge['deprel'] 64 | 65 | 66 | def test_dependency_tree_corpus(): 67 | corpus = setup_corpus() 68 | 69 | assert all([isinstance(t, DiGraph) for gid, t in corpus.graphs.items()]) 70 | assert all([isinstance(t, DiGraph) for gid, t in corpus.items()]) 71 | assert all([isinstance(gid, str) for gid in corpus]) 72 | -------------------------------------------------------------------------------- /tests/test_predpatt.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | from networkx import DiGraph 3 | from predpatt import load_conllu, PredPatt, PredPattOpts 4 | from decomp.syntax.dependency import DependencyGraphBuilder 5 | from decomp.semantics.predpatt import PredPattCorpus, PredPattGraphBuilder 6 | 7 | rawtree = '''1 The the DET DT Definite=Def|PronType=Art 3 det _ _ 8 | 2 police police NOUN NN Number=Sing 3 compound _ _ 9 | 3 commander commander NOUN NN Number=Sing 7 nsubj _ _ 10 | 4 of of ADP IN _ 6 case _ _ 11 | 5 Ninevah Ninevah PROPN NNP Number=Sing 6 compound _ _ 12 | 6 Province Province PROPN NNP Number=Sing 3 nmod _ _ 13 | 7 announced announce VERB VBD Mood=Ind|Tense=Past|VerbForm=Fin 0 root _ _ 14 | 8 that that SCONJ IN _ 11 mark _ _ 15 | 9 bombings bombing NOUN NNS Number=Plur 11 nsubj _ _ 16 | 10 had have AUX VBD Mood=Ind|Tense=Past|VerbForm=Fin 11 aux _ _ 17 | 11 declined decline VERB VBN Tense=Past|VerbForm=Part 7 ccomp _ _ 18 | 12 80 80 NUM CD NumType=Card 13 nummod _ _ 19 | 13 percent percent NOUN NN Number=Sing 11 dobj _ _ 20 | 14 in in ADP IN _ 15 case _ _ 21 | 15 Mosul Mosul PROPN NNP Number=Sing 11 nmod _ SpaceAfter=No 22 | 16 , , PUNCT , _ 11 punct _ _ 23 | 17 whereas whereas SCONJ IN _ 20 mark _ _ 24 | 18 there there PRON EX _ 20 expl _ _ 25 | 19 had have AUX VBD Mood=Ind|Tense=Past|VerbForm=Fin 20 aux _ _ 26 | 20 been be VERB VBN Tense=Past|VerbForm=Part 11 advcl _ _ 27 | 21 a a DET DT Definite=Ind|PronType=Art 23 det _ _ 28 | 22 big big ADJ JJ Degree=Pos 23 amod _ _ 29 | 23 jump jump NOUN NN Number=Sing 20 nsubj _ _ 30 | 24 in in ADP IN _ 26 case _ _ 31 | 25 the the DET DT Definite=Def|PronType=Art 26 det _ _ 32 | 26 number number NOUN NN Number=Sing 23 nmod _ _ 33 | 27 of of ADP IN _ 28 case _ _ 34 | 28 kidnappings kidnapping NOUN NNS Number=Plur 26 nmod _ SpaceAfter=No 35 | 29 . . PUNCT . _ 7 punct _ _''' 36 | 37 | sentence = 'The police commander of Ninevah Province announced that bombings had declined 80 percent in Mosul , whereas there had been a big jump in the number of kidnappings .' 38 | 39 | listtree = [l.split() for l in rawtree.split('\n')] 40 | 41 | def setup_graph(): 42 | ud = DependencyGraphBuilder.from_conll(listtree, 'tree1') 43 | 44 | pp = PredPatt(next(load_conllu(rawtree))[1], 45 | opts=PredPattOpts(resolve_relcl=True, 46 | borrow_arg_for_relcl=True, 47 | resolve_conj=False, 48 | cut=True)) 49 | 50 | graph = PredPattGraphBuilder.from_predpatt(pp, ud, 'tree1') 51 | 52 | return pp, graph 53 | 54 | def setup_corpus_from_str(): 55 | return PredPattCorpus.from_conll(rawtree) 56 | 57 | def setup_corpus_from_io(): 58 | rawfile = StringIO(rawtree) 59 | return PredPattCorpus.from_conll(rawfile) 60 | 61 | ## could use @nose.with_setup 62 | def test_predpatt_graph_builder(): 63 | pp, pp_graph = setup_graph() 64 | 65 | assert pp_graph.name == 'tree1' 66 | assert all(['tree1' in nodeid 67 | for nodeid in pp_graph.nodes]) 68 | 69 | # test syntax nodes 70 | print(pp_graph.nodes['tree1-root-0']) 71 | assert pp_graph.nodes['tree1-root-0'] == {'position': 0, 72 | 'domain': 'root', 73 | 'type': 'root'} 74 | 75 | for idx, node in pp_graph.nodes.items(): 76 | if 'syntax' in idx: 77 | idx = idx.split('-')[-1] 78 | for row in listtree: 79 | if int(row[0]) == idx: 80 | assert node['form'] == row[1] 81 | assert node['lemma'] == row[2] 82 | assert node['upos'] == row[3] 83 | assert node['xpos'] == row[4] 84 | 85 | for (idx1, idx2), edge in pp_graph.edges.items(): 86 | if 'syntax' in idx1 and 'syntax' in idx2: 87 | idx1, idx2 = idx1.split('-')[-1], idx2.split('-')[-1] 88 | for row in listtree: 89 | if int(row[0]) == idx2: 90 | assert int(row[6]) == idx1 91 | assert row[7] == edge['deprel'] 92 | 93 | # test semantics nodes 94 | assert 'tree1-semantics-pred-0' not in pp_graph.nodes 95 | assert 'tree1-semantics-arg-0' not in pp_graph.nodes 96 | 97 | assert all(['arg' in nodeid or 'pred' in nodeid 98 | for nodeid in pp_graph.nodes 99 | if 'semantics' in nodeid]) 100 | 101 | assert all(['domain' in pp_graph.nodes[nodeid] 102 | for nodeid in pp_graph.nodes 103 | if 'semantics' in nodeid]) 104 | 105 | assert all([pp_graph.nodes[nodeid]['domain'] == 'semantics' 106 | for nodeid in pp_graph.nodes 107 | if 'semantics' in nodeid]) 108 | 109 | assert all(['type' in pp_graph.nodes[nodeid] 110 | for nodeid in pp_graph.nodes 111 | if 'semantics' in nodeid]) 112 | 113 | assert all([pp_graph.nodes[nodeid]['type'] in ['argument', 'predicate'] 114 | for nodeid in pp_graph.nodes 115 | if 'semantics' in nodeid]) 116 | 117 | assert all([('arg' in nodeid) == 118 | (pp_graph.nodes[nodeid]['type'] == 'argument') 119 | for nodeid in pp_graph.nodes 120 | if 'semantics' in nodeid]) 121 | 122 | assert all([('pred' in nodeid) == 123 | (pp_graph.nodes[nodeid]['type'] == 'predicate') 124 | for nodeid in pp_graph.nodes 125 | if 'semantics' in nodeid]) 126 | 127 | assert all(['arg' not in nodeid and 'pred' not in nodeid 128 | for nodeid in pp_graph.nodes 129 | if 'syntax' in nodeid]) 130 | 131 | # test argument edges 132 | assert all([pp_graph.edges[(nodeid2, nodeid1)]['domain'] == 'semantics' and 133 | pp_graph.edges[(nodeid2, nodeid1)]['type'] == 'dependency' 134 | for nodeid1, node1 in pp_graph.nodes.items() 135 | for nodeid2 in pp_graph.nodes 136 | if 'semantics-arg' in nodeid1 137 | if 'semantics-pred' in nodeid2 138 | if (nodeid2, nodeid1) in pp_graph.edges]) 139 | 140 | # tests subpredicate edges 141 | subprededge = ('tree1-semantics-arg-11', 'tree1-semantics-pred-11') 142 | assert pp_graph.edges[subprededge]['domain'] == 'semantics' 143 | assert pp_graph.edges[subprededge]['type'] == 'head' 144 | 145 | assert all([(nodeid2, nodeid1) in pp_graph.edges and 146 | pp_graph.edges[(nodeid2, nodeid1)]['domain'] == 'semantics' and 147 | pp_graph.edges[(nodeid2, nodeid1)]['type'] == 'head' 148 | for nodeid1, node1 in pp_graph.nodes.items() 149 | for nodeid2 in pp_graph.nodes 150 | if 'semantics-pred' in nodeid1 151 | if 'semantics-arg' in nodeid2 152 | if nodeid1.split('-')[-1] == nodeid2.split('-')[-1]]) 153 | 154 | def test_predpatt_corpus(): 155 | corpus = setup_corpus_from_str() 156 | 157 | assert all([isinstance(t, DiGraph) for gid, t in corpus.graphs.items()]) 158 | assert all([isinstance(t, DiGraph) for gid, t in corpus.items()]) 159 | assert all([isinstance(gid, str) for gid in corpus]) 160 | 161 | corpus = setup_corpus_from_io() 162 | 163 | assert all([isinstance(t, DiGraph) for gid, t in corpus.graphs.items()]) 164 | assert all([isinstance(t, DiGraph) for gid, t in corpus.items()]) 165 | assert all([isinstance(gid, str) for gid in corpus]) 166 | -------------------------------------------------------------------------------- /tests/test_uds_annotation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import os, json 4 | 5 | from pprint import pprint 6 | 7 | from decomp.semantics.uds.metadata import UDSAnnotationMetadata 8 | from decomp.semantics.uds.annotation import UDSAnnotation 9 | from decomp.semantics.uds.annotation import NormalizedUDSAnnotation 10 | from decomp.semantics.uds.annotation import RawUDSAnnotation 11 | 12 | class TestUDSAnnotation: 13 | 14 | def test_direct_instantiation_of_uds_annotation_fails(self): 15 | with pytest.raises(TypeError): 16 | UDSAnnotation(None) 17 | 18 | class TestNormalizedUDSAnnotation: 19 | 20 | def test_from_json(self, 21 | normalized_node_sentence_annotation, 22 | normalized_edge_sentence_annotation, 23 | normalized_sentence_annotations): 24 | norm_node_ann, norm_edge_ann = normalized_sentence_annotations 25 | norm_node_ann_direct = json.loads(normalized_node_sentence_annotation) 26 | norm_edge_ann_direct = json.loads(normalized_edge_sentence_annotation) 27 | 28 | assert norm_node_ann.metadata == UDSAnnotationMetadata.from_dict(norm_node_ann_direct['metadata']) 29 | assert norm_edge_ann.metadata == UDSAnnotationMetadata.from_dict(norm_edge_ann_direct['metadata']) 30 | 31 | assert all([not edge_attrs 32 | for n, (node_attrs, edge_attrs) in norm_node_ann.items()]) 33 | 34 | assert all([norm_node_ann_direct['data']['tree1'][k] == v 35 | for n, (node_attrs, edge_attrs) in norm_node_ann.items() 36 | for k, v in node_attrs.items()]) 37 | 38 | assert all([not node_attrs 39 | for n, (node_attrs, edge_attrs) in norm_edge_ann.items()]) 40 | 41 | assert all([norm_edge_ann_direct['data']['tree1']['%%'.join(k)] == v 42 | for n, (node_attrs, edge_attrs) in norm_edge_ann.items() 43 | for k, v in edge_attrs.items()]) 44 | 45 | class TestRawUDSAnnotation: 46 | 47 | def test_from_json(self, 48 | raw_node_sentence_annotation, 49 | raw_edge_sentence_annotation, 50 | raw_sentence_annotations): 51 | raw_node_ann, raw_edge_ann = raw_sentence_annotations 52 | raw_node_ann_direct = json.loads(raw_node_sentence_annotation) 53 | raw_edge_ann_direct = json.loads(raw_edge_sentence_annotation) 54 | 55 | assert raw_node_ann.metadata == UDSAnnotationMetadata.from_dict(raw_node_ann_direct['metadata']) 56 | assert raw_edge_ann.metadata == UDSAnnotationMetadata.from_dict(raw_edge_ann_direct['metadata']) 57 | 58 | assert all([not edge_attrs 59 | for n, (node_attrs, edge_attrs) in raw_node_ann.items()]) 60 | 61 | assert all([raw_node_ann_direct['data']['tree1'][k] == v 62 | for n, (node_attrs, edge_attrs) in raw_node_ann.items() 63 | for k, v in node_attrs.items()]) 64 | 65 | assert all([not node_attrs 66 | for n, (node_attrs, edge_attrs) in raw_edge_ann.items()]) 67 | 68 | assert all([raw_edge_ann_direct['data']['tree1']['%%'.join(k)] == v 69 | for n, (node_attrs, edge_attrs) in raw_edge_ann.items() 70 | for k, v in edge_attrs.items()]) 71 | 72 | 73 | def test_annotators(self, raw_sentence_annotations, test_data_dir): 74 | raw_node_ann, raw_edge_ann = raw_sentence_annotations 75 | 76 | with open(os.path.join(test_data_dir, 'raw_node_sentence_annotators.txt')) as f: 77 | assert raw_node_ann.annotators() == {line.strip() for line in f} 78 | 79 | with open(os.path.join(test_data_dir, 'raw_edge_sentence_annotators.txt')) as f: 80 | assert raw_edge_ann.annotators() == {line.strip() for line in f} 81 | 82 | def test_items(self, raw_sentence_annotations): 83 | raw_node_ann, raw_edge_ann = raw_sentence_annotations 84 | 85 | # verify that items by annotator generator works 86 | for gid, (node_attrs, edge_attrs) in raw_node_ann.items(annotator_id='genericity-pred-annotator-88'): 87 | assert gid == 'tree1' 88 | assert json.dumps(node_attrs) == '{"tree1-semantics-pred-7": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-11": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-20": {"genericity": {"pred-dynamic": {"confidence": 0, "value": 1}, "pred-hypothetical": {"confidence": 0, "value": 1}, "pred-particular": {"confidence": 0, "value": 1}}}}' 89 | assert json.dumps(edge_attrs) == '{}' 90 | 91 | # verify that node attribute-only generator works 92 | for gid, node_attrs in raw_node_ann.items(annotation_type="node", 93 | annotator_id='genericity-pred-annotator-88'): 94 | assert gid == 'tree1' 95 | assert json.dumps(node_attrs) == '{"tree1-semantics-pred-7": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-11": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-20": {"genericity": {"pred-dynamic": {"confidence": 0, "value": 1}, "pred-hypothetical": {"confidence": 0, "value": 1}, "pred-particular": {"confidence": 0, "value": 1}}}}' 96 | 97 | # generator for edge attributes for the node attribute-only annotation 98 | # should yield empty results for the graph 99 | with pytest.raises(ValueError): 100 | for gid, edge_attrs in raw_node_ann.items(annotation_type="edge", 101 | annotator_id='genericity-pred-annotator-88'): 102 | pass 103 | 104 | # verify that edge attribute-only generator works 105 | for gid, (node_attrs, edge_attrs) in raw_edge_ann.items(annotator_id='protoroles-annotator-14'): 106 | assert gid == 'tree1' 107 | assert json.dumps({'%%'.join(e): attrs for e, attrs in edge_attrs.items()}) == '{"tree1-semantics-pred-11%%tree1-semantics-arg-9": {"protoroles": {"awareness": {"confidence": 1, "value": 4}, "change_of_location": {"confidence": 1, "value": 4}, "change_of_possession": {"confidence": 1, "value": 4}, "change_of_state": {"confidence": 1, "value": 4}, "change_of_state_continuous": {"confidence": 1, "value": 4}, "existed_after": {"confidence": 1, "value": 4}, "existed_before": {"confidence": 1, "value": 4}, "existed_during": {"confidence": 1, "value": 4}, "instigation": {"confidence": 1, "value": 4}, "partitive": {"confidence": 1, "value": 4}, "sentient": {"confidence": 1, "value": 4}, "volition": {"confidence": 1, "value": 4}, "was_for_benefit": {"confidence": 1, "value": 4}, "was_used": {"confidence": 1, "value": 4}}}}' 108 | 109 | # generator for node attributes for the edge attribute-only annotation 110 | # should yield empty results for the graph 111 | with pytest.raises(ValueError): 112 | for gid, node_attrs in raw_edge_ann.items(annotation_type="node", 113 | annotator_id='protoroles-annotator-14'): 114 | pass 115 | -------------------------------------------------------------------------------- /tests/test_uds_corpus.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import logging 4 | import pytest 5 | 6 | from glob import glob 7 | from pkg_resources import resource_filename 8 | from decomp.semantics.uds import UDSCorpus 9 | 10 | test_document_name = 'answers-20111105112131AA6gIX6_ans' 11 | test_document_genre = 'answers' 12 | test_document_timestamp = '20111105112131' 13 | test_document_text = 'My dad just does n\'t understand ? Ugh my dad is so stupid ... he just does n\'t understand anything ! I have 5 sisters and so including my mom ... he is the only guy in a house of six females . Now I \'m the youngest and I just got my period so now we all have ours and he thinks it \'s a good thing ? He \'s always like " ohh you must be so happy to finally have yours , I wish I had mine ! " and he is n\'t even joking . I think just living in a house with so many girls is making him go crazy ? Yep , the females are just getting to him ... dads .. Do n\'t blame him please , he feels lonely and wants to show his attention to all of you to look after you , please forgive and sympathy if he miss something . I am sorry for him , he is a good dad' 14 | test_document_sentence_ids = {'ewt-train-7189': 'answers-20111105112131AA6gIX6_ans-0001', 15 | 'ewt-train-7190': 'answers-20111105112131AA6gIX6_ans-0002', 16 | 'ewt-train-7191': 'answers-20111105112131AA6gIX6_ans-0003', 17 | 'ewt-train-7192': 'answers-20111105112131AA6gIX6_ans-0004', 18 | 'ewt-train-7193': 'answers-20111105112131AA6gIX6_ans-0005', 19 | 'ewt-train-7194': 'answers-20111105112131AA6gIX6_ans-0006', 20 | 'ewt-train-7195': 'answers-20111105112131AA6gIX6_ans-0007', 21 | 'ewt-train-7196': 'answers-20111105112131AA6gIX6_ans-0008', 22 | 'ewt-train-7197': 'answers-20111105112131AA6gIX6_ans-0009'} 23 | test_document_node = 'ewt-train-7195-document-pred-7' 24 | test_document_semantics_node_normalized = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics', 25 | 'frompredpatt': True, 26 | 'type': 'predicate', 27 | 'factuality': {'factual': {'confidence': 1.0, 'value': 1.2225}}, 28 | 'time': {'dur-weeks': {'confidence': 0.3991, 'value': 0.7263}, 29 | 'dur-decades': {'confidence': 0.3991, 'value': -1.378}, 30 | 'dur-days': {'confidence': 0.3991, 'value': 0.7498}, 31 | 'dur-hours': {'confidence': 0.3991, 'value': -1.1733}, 32 | 'dur-seconds': {'confidence': 0.3991, 'value': -1.4243}, 33 | 'dur-forever': {'confidence': 0.3991, 'value': -1.2803}, 34 | 'dur-centuries': {'confidence': 0.3991, 'value': -1.1213}, 35 | 'dur-instant': {'confidence': 0.3991, 'value': -1.3219}, 36 | 'dur-years': {'confidence': 0.3991, 'value': -1.1953}, 37 | 'dur-minutes': {'confidence': 0.3991, 'value': 0.8558}, 38 | 'dur-months': {'confidence': 0.3991, 'value': 0.6852}}, 39 | 'genericity': {'pred-dynamic': {'confidence': 1.0, 'value': 1.1508}, 40 | 'pred-hypothetical': {'confidence': 1.0, 'value': -1.1583}, 41 | 'pred-particular': {'confidence': 1.0, 'value': 1.1508}}}} 42 | test_document_semantics_node_raw = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics', 'frompredpatt': True, 'type': 'predicate', 'factuality': {'factual': {'value': {'factuality-annotator-26': 1, 'factuality-annotator-34': 1}, 'confidence': {'factuality-annotator-26': 4, 'factuality-annotator-34': 4}}}, 'time': {'duration': {'value': {'time-annotator-508': 4, 'time-annotator-619': 6, 'time-annotator-310': 5, 'time-annotator-172': 4, 'time-annotator-448': 5, 'time-annotator-548': 6}, 'confidence': {'time-annotator-508': 2, 'time-annotator-619': 4, 'time-annotator-310': 4, 'time-annotator-172': 4, 'time-annotator-448': 1, 'time-annotator-548': 2}}}, 'genericity': {'pred-dynamic': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-hypothetical': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-particular': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}}}} 43 | 44 | 45 | total_graphs = 16622 46 | total_documents = 1174 47 | 48 | 49 | data_dir = resource_filename('decomp', 'data/') 50 | 51 | 52 | def _load_corpus(base, version, annotation_format): 53 | UDSCorpus.CACHE_DIR = base 54 | 55 | try: 56 | os.makedirs(os.path.join(base, 57 | version, 58 | annotation_format, 59 | 'sentence/')) 60 | os.makedirs(os.path.join(base, 61 | version, 62 | annotation_format, 63 | 'document/')) 64 | 65 | except FileExistsError: 66 | pass 67 | 68 | return UDSCorpus(version=version, 69 | annotation_format=annotation_format) 70 | 71 | def _assert_correct_corpus_initialization(uds, raw): 72 | # Assert all graphs and documents initialized 73 | assert uds.ngraphs == total_graphs 74 | assert uds.ndocuments == total_documents 75 | 76 | n_sentence_graphs = 0 77 | 78 | for doc_id in uds.documentids: 79 | n_sentence_graphs += len(uds.documents[doc_id].sentence_graphs) 80 | 81 | assert n_sentence_graphs == total_graphs 82 | 83 | # Inspect a test document 84 | test_doc = uds.documents[test_document_name] 85 | assert test_doc.genre == test_document_genre 86 | assert test_doc.timestamp == test_document_timestamp 87 | assert test_doc.sentence_ids == test_document_sentence_ids 88 | assert test_doc.text == test_document_text 89 | assert test_doc.document_graph is not None 90 | 91 | print(test_doc.semantics_node(test_document_node)) 92 | 93 | if raw: 94 | assert uds.annotation_format == 'raw' 95 | #assert test_doc.semantics_node(test_document_node) == test_document_semantics_node_raw 96 | else: 97 | assert uds.annotation_format == 'normalized' 98 | #assert test_doc.semantics_node(test_document_node) == test_document_semantics_node_normalized 99 | 100 | def _assert_document_annotation(uds, raw): 101 | if raw: 102 | node_ann, edge_ann = setup_raw_document_annotations() 103 | else: 104 | node_ann, edge_ann = setup_normalized_document_annotations() 105 | 106 | document = list(node_ann.node_attributes.keys())[0] 107 | 108 | # assert node annotations 109 | node_ann_attrs = dict(list(node_ann.node_attributes.values())[0]) 110 | 111 | for doc_node, node_annotation in node_ann_attrs.items(): 112 | for k, v in node_annotation.items(): 113 | assert uds.documents[document].document_graph.nodes[doc_node][k] == v 114 | 115 | # assert edge annotations 116 | edge_ann_attrs = dict(list(edge_ann.edge_attributes.values())[0]) 117 | 118 | for doc_edge, edge_annotation in edge_ann_attrs.items(): 119 | for k, v in edge_annotation.items(): 120 | assert uds.documents[document].document_graph.edges[doc_edge][k] == v 121 | 122 | class TestUDSCorpus: 123 | 124 | # @pytest.mark.slow 125 | # def test_load_v1_normalized(self, tmp_path, caplog): 126 | # caplog.set_level(logging.WARNING) 127 | 128 | # uds = _load_corpus(tmp_path, '1.0', 'normalized') 129 | 130 | # raw = False 131 | 132 | # _assert_correct_corpus_initialization(uds, raw) 133 | # #_assert_document_annotation(uds, raw) 134 | 135 | # # reload the UDSCorpus, which will initialize it from 136 | # # the now-serialized graphs 137 | # uds_cached = _load_corpus(tmp_path, '1.0', 'normalized') 138 | 139 | # _assert_correct_corpus_initialization(uds_cached, raw) 140 | # #_assert_document_annotation(uds_cached, raw) 141 | 142 | 143 | # @pytest.mark.slow 144 | # def test_load_v2_normalized(self, tmp_path, caplog): 145 | # caplog.set_level(logging.WARNING) 146 | 147 | # uds = _load_corpus(tmp_path, '2.0', 'normalized') 148 | 149 | # raw = False 150 | 151 | # _assert_correct_corpus_initialization(uds, raw) 152 | # #_assert_document_annotation(uds, raw) 153 | 154 | # # reload the UDSCorpus, which will initialize it from 155 | # # the now-serialized graphs 156 | # uds_cached = _load_corpus(tmp_path, '2.0', 'normalized') 157 | 158 | # _assert_correct_corpus_initialization(uds_cached, raw) 159 | # #_assert_document_annotation(uds_cached, raw) 160 | 161 | # @pytest.mark.slow 162 | # def test_load_v1_raw(self, tmp_path, caplog): 163 | # caplog.set_level(logging.WARNING) 164 | 165 | # uds = _load_corpus(tmp_path, '1.0', 'raw') 166 | 167 | # raw = True 168 | 169 | # _assert_correct_corpus_initialization(uds, raw) 170 | # #_assert_document_annotation(uds, raw) 171 | 172 | # # reload the UDSCorpus, which will initialize it from 173 | # # the now-serialized graphs 174 | # uds_cached = _load_corpus(tmp_path, '1.0', 'raw') 175 | 176 | # _assert_correct_corpus_initialization(uds_cached, raw) 177 | # #_assert_document_annotation(uds_cached, raw) 178 | 179 | @pytest.mark.slow 180 | def test_load_v2_raw(self, tmp_path, caplog): 181 | caplog.set_level(logging.WARNING) 182 | 183 | uds = _load_corpus(tmp_path, '2.0', 'raw') 184 | 185 | raw = True 186 | 187 | #print(uds.metadata.to_dict()) 188 | 189 | print(uds._sentences_paths) 190 | print(uds._documents_paths) 191 | _assert_correct_corpus_initialization(uds, raw) 192 | #_assert_document_annotation(uds, raw) 193 | 194 | # reload the UDSCorpus, which will initialize it from 195 | # the now-serialized graphs 196 | uds_cached = _load_corpus(tmp_path, '2.0', 'raw') 197 | 198 | print() 199 | #print(uds_cached.metadata.to_dict()) 200 | 201 | raise Exception 202 | 203 | 204 | _assert_correct_corpus_initialization(uds_cached, raw) 205 | #_assert_document_annotation(uds_cached, raw) 206 | 207 | # def _test_uds_corpus_load(version, raw, data_dir): 208 | # # Remove cached graphs 209 | # if raw: 210 | # annotation_format = 'raw' 211 | # else: 212 | # annotation_format = 'normalized' 213 | 214 | # sentence_path = os.path.join(data_dir, version, annotation_format, 'sentence') 215 | # doc_path = os.path.join(data_dir, version, annotation_format, 'document') 216 | 217 | # if glob(os.path.join(sentence_path, '*.json')): 218 | # os.system('rm ' + sentence_path + '/*.json') 219 | 220 | # if glob(os.path.join(doc_path, '*.json')): 221 | # os.system('rm ' + doc_path + '/*.json') 222 | 223 | 224 | # annotations_dir = os.path.join(doc_path, 'annotations') 225 | # if not glob(annotations_dir): 226 | # os.system('mkdir ' + annotations_dir) 227 | # if raw: 228 | # # Dump the test anontations to JSON files 229 | # raw_node_ann = json.loads(raw_node_document_annotation) 230 | # raw_edge_ann = json.loads(raw_edge_document_annotation) 231 | # raw_node_ann_path = os.path.join(annotations_dir, 'raw_node.json') 232 | # raw_edge_ann_path = os.path.join(annotations_dir, 'raw_edge.json') 233 | # annotations = [raw_node_ann, raw_edge_ann] 234 | # paths = [raw_node_ann_path, raw_edge_ann_path] 235 | # else: 236 | # norm_node_ann = json.loads(normalized_node_document_annotation) 237 | # norm_edge_ann = json.loads(normalized_edge_document_annotation) 238 | # norm_node_ann_path = os.path.join(annotations_dir, 'norm_node.json') 239 | # norm_edge_ann_path = os.path.join(annotations_dir, 'norm_edge.json') 240 | # annotations = [norm_node_ann, norm_edge_ann] 241 | # paths = [norm_node_ann_path, norm_edge_ann_path] 242 | 243 | 244 | # for ann, path in zip(annotations, paths): 245 | # os.system('touch ' + path) 246 | # with open(path, 'w') as out: 247 | # json.dump(ann, out) 248 | 249 | # # Load the UDSCorpus without any options 250 | # uds = UDSCorpus(version=version, annotation_format=annotation_format) 251 | # assert_correct_corpus_initialization(uds, raw) 252 | # assert_document_annotation(uds, raw) 253 | 254 | # # Reload the UDSCorpus, which will initialize it from 255 | # # the now-serialized graphs 256 | # uds_cached = UDSCorpus(version=version, annotation_format=annotation_format) 257 | # assert_correct_corpus_initialization(uds_cached, raw) 258 | # assert_document_annotation(uds, raw) 259 | 260 | # # Remove the cached graphs and annotations 261 | # os.system('rm ' + sentence_path + '/*.json') 262 | # os.system('rm ' + doc_path + '/*.json') 263 | # for path in paths: 264 | # os.system('rm ' + path) 265 | -------------------------------------------------------------------------------- /tests/test_uds_document.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | test_document_name = 'answers-20111105112131AA6gIX6_ans' 4 | test_document_genre = 'answers' 5 | test_document_timestamp = '20111105112131' 6 | 7 | 8 | test_document_text = 'My dad just does n\'t understand ? Ugh my dad is so stupid ... he just does n\'t understand anything ! I have 5 sisters and so including my mom ... he is the only guy in a house of six females . Now I \'m the youngest and I just got my period so now we all have ours and he thinks it \'s a good thing ? He \'s always like " ohh you must be so happy to finally have yours , I wish I had mine ! " and he is n\'t even joking . I think just living in a house with so many girls is making him go crazy ? Yep , the females are just getting to him ... dads .. Do n\'t blame him please , he feels lonely and wants to show his attention to all of you to look after you , please forgive and sympathy if he miss something . I am sorry for him , he is a good dad' 9 | 10 | test_document_sentence_ids = {'ewt-train-7189': 'answers-20111105112131AA6gIX6_ans-0001', 11 | 'ewt-train-7190': 'answers-20111105112131AA6gIX6_ans-0002', 12 | 'ewt-train-7191': 'answers-20111105112131AA6gIX6_ans-0003', 13 | 'ewt-train-7192': 'answers-20111105112131AA6gIX6_ans-0004', 14 | 'ewt-train-7193': 'answers-20111105112131AA6gIX6_ans-0005', 15 | 'ewt-train-7194': 'answers-20111105112131AA6gIX6_ans-0006', 16 | 'ewt-train-7195': 'answers-20111105112131AA6gIX6_ans-0007', 17 | 'ewt-train-7196': 'answers-20111105112131AA6gIX6_ans-0008', 18 | 'ewt-train-7197': 'answers-20111105112131AA6gIX6_ans-0009'} 19 | 20 | test_document_node = 'ewt-train-7195-document-pred-7' 21 | 22 | test_document_semantics_node_normalized = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics', 23 | 'frompredpatt': True, 24 | 'type': 'predicate', 25 | 'factuality': {'factual': {'confidence': 1.0, 'value': 1.2225}}, 26 | 'time': {'dur-weeks': {'confidence': 0.3991, 'value': 0.7263}, 27 | 'dur-decades': {'confidence': 0.3991, 'value': -1.378}, 28 | 'dur-days': {'confidence': 0.3991, 'value': 0.7498}, 29 | 'dur-hours': {'confidence': 0.3991, 'value': -1.1733}, 30 | 'dur-seconds': {'confidence': 0.3991, 'value': -1.4243}, 31 | 'dur-forever': {'confidence': 0.3991, 'value': -1.2803}, 32 | 'dur-centuries': {'confidence': 0.3991, 'value': -1.1213}, 33 | 'dur-instant': {'confidence': 0.3991, 'value': -1.3219}, 34 | 'dur-years': {'confidence': 0.3991, 'value': -1.1953}, 35 | 'dur-minutes': {'confidence': 0.3991, 'value': 0.8558}, 36 | 'dur-months': {'confidence': 0.3991, 'value': 0.6852}}, 37 | 'genericity': {'pred-dynamic': {'confidence': 1.0, 'value': 1.1508}, 38 | 'pred-hypothetical': {'confidence': 1.0, 'value': -1.1583}, 39 | 'pred-particular': {'confidence': 1.0, 'value': 1.1508}}}} 40 | 41 | test_document_semantics_node_raw = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics', 'frompredpatt': True, 'type': 'predicate', 'factuality': {'factual': {'value': {'factuality-annotator-26': 1, 'factuality-annotator-34': 1}, 'confidence': {'factuality-annotator-26': 4, 'factuality-annotator-34': 4}}}, 'time': {'duration': {'value': {'time-annotator-508': 4, 'time-annotator-619': 6, 'time-annotator-310': 5, 'time-annotator-172': 4, 'time-annotator-448': 5, 'time-annotator-548': 6}, 'confidence': {'time-annotator-508': 2, 'time-annotator-619': 4, 'time-annotator-310': 4, 'time-annotator-172': 4, 'time-annotator-448': 1, 'time-annotator-548': 2}}}, 'genericity': {'pred-dynamic': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-hypothetical': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-particular': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}}}} 42 | 43 | @pytest.fixture 44 | def normalized_node_document_annotation(test_data_dir): 45 | fpath = os.path.join(test_data_dir, 46 | 'normalized_node_document_annotation.json') 47 | with open(fpath) as f: 48 | return f.read() 49 | 50 | 51 | @pytest.fixture 52 | def normalized_edge_document_annotation(test_data_dir): 53 | fpath = os.path.join(test_data_dir, 54 | 'normalized_edge_document_annotation.json') 55 | with open(fpath) as f: 56 | return f.read() 57 | 58 | 59 | @pytest.fixture 60 | def normalized_document_annotations(normalized_node_document_annotation, 61 | normalized_edge_document_annotation): 62 | norm_node_ann = NormalizedUDSAnnotation.from_json(normalized_node_document_annotation) 63 | norm_edge_ann = NormalizedUDSAnnotation.from_json(normalized_edge_document_annotation) 64 | 65 | return norm_node_ann, norm_edge_ann 66 | 67 | 68 | @pytest.fixture 69 | def raw_node_document_annotation(): 70 | return '{"answers-20111105112131AA6gIX6_ans": {"ewt-train-7192-document-pred-25": {"subspace": {"property": {"confidence": {"annotator1": 0.12}, "value": {"annotator1": 0.0}}}}, "ewt-train-7192-document-pred-20": {"subspace": {"property": {"confidence": {"annotator2": 0.55, "annotator3": 0.07}, "value": {"annotator2": 0.0, "annotator3": 0.0}}}}, "ewt-train-7192-document-pred-20": {"subspace": {"property": {"confidence": {"annotator2": 0.55}, "value": {"annotator2": 0.0}}}}}}' 71 | 72 | 73 | @pytest.fixture 74 | def raw_edge_document_annotation(): 75 | return '{"answers-20111105112131AA6gIX6_ans": {"ewt-train-7192-document-pred-20%%ewt-train-7192-document-arg-2": {"subspace": {"property": {"confidence": {"annotator1": 0.12}, "value": {"annotator1": 0.0}}}}, "ewt-train-7192-document-pred-20%%ewt-train-7189-document-arg-2": {"subspace": {"property": {"confidence": {"annotator2": 0.55, "annotator3": 0.07}, "value": {"annotator2": 0.0, "annotator3": 0.0}}}}, "ewt-train-7192-document-pred-25%%ewt-train-7191-document-arg-18": {"subspace": {"property": {"confidence": {"annotator2": 0.55}, "value": {"annotator2": 0.0}}}}}}' 76 | 77 | @pytest.fixture 78 | def raw_document_annotations(raw_node_document_annotation, 79 | raw_edge_document_annotation): 80 | raw_node_ann = RawUDSAnnotation.from_json(raw_node_document_annotation) 81 | raw_edge_ann = RawUDSAnnotation.from_json(raw_edge_document_annotation) 82 | 83 | return raw_node_ann, raw_edge_ann 84 | -------------------------------------------------------------------------------- /tests/test_uds_metadata.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from copy import deepcopy 4 | from typing import List 5 | 6 | from decomp.semantics.uds.metadata import _dtype 7 | from decomp.semantics.uds.metadata import UDSDataType 8 | from decomp.semantics.uds.metadata import UDSPropertyMetadata 9 | from decomp.semantics.uds.metadata import UDSAnnotationMetadata 10 | 11 | def test_dtype(): 12 | assert _dtype('int') is int 13 | assert _dtype('str') is str 14 | assert _dtype('float') is float 15 | assert _dtype('bool') is bool 16 | 17 | 18 | class TestUDSDataType: 19 | 20 | catdict = {'int': [1, 2, 3, 4, 5], 21 | 'str': ['yes', 'maybe', 'no']} 22 | 23 | cases = [({'datatype': 'int', 24 | 'categories': [1, 2, 3, 4, 5], 25 | 'ordered': True}, 26 | {'datatype': 'int', 27 | 'categories': [1, 2, 3, 4, 5], 28 | 'ordered': True, 29 | 'lower_bound': 1, 30 | 'upper_bound': 5}), 31 | ({'datatype': 'int'}, 32 | {'datatype': 'int'}), 33 | ({'datatype': 'float', 34 | 'lower_bound': 0.0, 35 | 'upper_bound': 1.0}, 36 | {'datatype': 'float', 37 | 'ordered': True, 38 | 'lower_bound': 0.0, 39 | 'upper_bound': 1.0})] 40 | 41 | def test_init_simple(self): 42 | UDSDataType(datatype=str) 43 | UDSDataType(datatype=int) 44 | UDSDataType(datatype=bool) 45 | UDSDataType(datatype=float) 46 | 47 | def test_init_categorical(self): 48 | for t, c in self.catdict.items(): 49 | for o in [True, False]: 50 | t = int if t == 'int' else str 51 | UDSDataType(datatype=t, 52 | categories=c, 53 | ordered=o) 54 | 55 | def test_from_dict_simple(self): 56 | UDSDataType.from_dict({'datatype': 'str'}) 57 | UDSDataType.from_dict({'datatype': 'int'}) 58 | UDSDataType.from_dict({'datatype': 'bool'}) 59 | UDSDataType.from_dict({'datatype': 'float'}) 60 | 61 | def test_from_dict_categorical(self): 62 | # the name for the categories key is "categories" 63 | with pytest.raises(KeyError): 64 | UDSDataType.from_dict({'datatype': 'int', 65 | 'category': [1, 2, 3, 4, 5], 66 | 'ordered': True}) 67 | 68 | # floats cannot be categorical 69 | with pytest.raises(ValueError): 70 | UDSDataType.from_dict({'datatype': 'float', 71 | 'categories': [1, 2, 3, 4, 5], 72 | 'ordered': True}) 73 | 74 | # bounds can only be specified if ordered is not specified or 75 | # is True 76 | with pytest.raises(ValueError): 77 | UDSDataType.from_dict({'datatype': 'str', 78 | 'categories': ["no", "maybe", "yes"], 79 | 'ordered': False, 80 | 'lower_bound': "no", 81 | 'upper_bound': "yes"}) 82 | 83 | # these are good 84 | for t, c in self.catdict.items(): 85 | for o in [True, False]: 86 | dt = UDSDataType.from_dict({'datatype': t, 87 | 'categories': c, 88 | 'ordered': o}) 89 | 90 | assert dt.is_categorical 91 | assert dt.is_ordered_categorical == o 92 | 93 | if o: 94 | assert dt.categories == c 95 | else: 96 | assert dt.categories == set(c) 97 | 98 | def test_from_dict_bounded(self): 99 | # bounded datatypes should only be float or int 100 | with pytest.raises(ValueError): 101 | UDSDataType.from_dict({'datatype': 'str', 102 | 'categories': ['yes', 'maybe', 'no'], 103 | 'ordered': True, 104 | 'lower_bound': 'no', 105 | 'upper_bound': 'yes'}) 106 | 107 | # the the datatype is categorical, the lower bound should 108 | # match the category lower bound 109 | with pytest.raises(ValueError): 110 | UDSDataType.from_dict({'datatype': 'int', 111 | 'categories': [1, 2, 3, 4, 5], 112 | 'ordered': True, 113 | 'lower_bound': 2, 114 | 'upper_bound': 5}) 115 | 116 | # these are good 117 | for c, _ in self.cases: 118 | UDSDataType.from_dict(c) 119 | 120 | def test_to_dict(self): 121 | for c_in, c_out in self.cases: 122 | loaded = UDSDataType.from_dict(c_in) 123 | assert loaded.to_dict() == c_out 124 | 125 | def test_eq(self): 126 | for c_in, c_out in self.cases: 127 | loaded1 = UDSDataType.from_dict(c_in) 128 | loaded2 = UDSDataType.from_dict(c_out) 129 | 130 | assert loaded1 == loaded2 131 | 132 | sentence_metadata_example = {'protoroles': {'awareness': {'annotators': ['protoroles-annotator-8', 133 | 'protoroles-annotator-9'], 134 | 'confidence': {'categories': [0, 1], 135 | 'datatype': 'int', 136 | 'ordered': False}, 137 | 'value': {'categories': [1, 2, 3, 4, 5], 138 | 'datatype': 'int', 139 | 'ordered': True}}, 140 | 'change_of_location': {'annotators': ['protoroles-annotator-0', 141 | 'protoroles-annotator-1'], 142 | 'confidence': {'categories': [0, 1], 143 | 'datatype': 'int', 144 | 'ordered': False}, 145 | 'value': {'categories': [1, 2, 3, 4, 5], 146 | 'datatype': 'int', 147 | 'ordered': True}}}} 148 | 149 | sentence_metadata_example_full = {'protoroles': {'awareness': {'annotators': ['protoroles-annotator-8', 150 | 'protoroles-annotator-9'], 151 | 'confidence': {'categories': [0, 1], 152 | 'datatype': 'int', 153 | 'ordered': False}, 154 | 'value': {'categories': [1, 2, 3, 4, 5], 155 | 'datatype': 'int', 156 | 'ordered': True, 157 | 'lower_bound': 1, 158 | 'upper_bound': 5}}, 159 | 'change_of_location': {'annotators': ['protoroles-annotator-0', 160 | 'protoroles-annotator-1'], 161 | 'confidence': {'categories': [0, 1], 162 | 'datatype': 'int', 163 | 'ordered': False}, 164 | 'value': {'categories': [1, 2, 3, 4, 5], 165 | 'datatype': 'int', 166 | 'ordered': True, 167 | 'lower_bound': 1, 168 | 'upper_bound': 5}}}} 169 | 170 | 171 | sentence_metadata_example_noann = deepcopy(sentence_metadata_example) 172 | 173 | for subspace, propdict in sentence_metadata_example_noann.items(): 174 | for prop, md in propdict.items(): 175 | del md['annotators'] 176 | 177 | 178 | class TestUDSPropertyMetadata: 179 | 180 | def test_init(self): 181 | pass 182 | 183 | def test_from_dict(self): 184 | metadatadict = sentence_metadata_example['protoroles']['awareness'] 185 | metadata = UDSPropertyMetadata.from_dict(metadatadict) 186 | 187 | assert isinstance(metadata.value, UDSDataType) 188 | assert isinstance(metadata.confidence, UDSDataType) 189 | 190 | assert metadata.value.datatype is int 191 | assert metadata.confidence.datatype is int 192 | 193 | assert metadata.value.categories == [1, 2, 3, 4, 5] 194 | assert metadata.confidence.categories == {0, 1} 195 | 196 | assert metadata.annotators == {'protoroles-annotator-8', 197 | 'protoroles-annotator-9'} 198 | 199 | def test_to_dict(self): 200 | metadatadict = sentence_metadata_example['protoroles']['awareness'] 201 | metadata = UDSPropertyMetadata.from_dict(metadatadict) 202 | 203 | out_in_out = UDSPropertyMetadata.from_dict(metadata.to_dict()).to_dict() 204 | 205 | # have to check that the set of annotators is equal, because 206 | # they could be put out of order when loaded in 207 | assert set(sentence_metadata_example_full['protoroles']['awareness']['annotators']) ==\ 208 | set(out_in_out['annotators']) 209 | 210 | assert sentence_metadata_example_full['protoroles']['awareness']['value'] ==\ 211 | out_in_out['value'] 212 | 213 | assert sentence_metadata_example_full['protoroles']['awareness']['confidence'] ==\ 214 | out_in_out['confidence'] 215 | 216 | class TestUDSAnnotationMetadata: 217 | 218 | metadata = UDSAnnotationMetadata.from_dict(sentence_metadata_example) 219 | metadata_noann = UDSAnnotationMetadata.from_dict(sentence_metadata_example_noann) 220 | 221 | def test_getitem(self): 222 | self.metadata['protoroles'] 223 | self.metadata['protoroles', 'awareness'] 224 | self.metadata['protoroles']['awareness'] 225 | self.metadata['protoroles', 'awareness'].value 226 | 227 | with pytest.raises(TypeError): 228 | self.metadata['protoroles', 'awareness', 'value'] 229 | 230 | def test_add(self): 231 | assert self.metadata == self.metadata + self.metadata 232 | 233 | metadatadict1 = {'protoroles': {'awareness': sentence_metadata_example['protoroles']['awareness']}} 234 | metadatadict2 = {'protoroles': {'change_of_location': sentence_metadata_example['protoroles']['change_of_location']}} 235 | 236 | metadata1 = UDSAnnotationMetadata.from_dict(metadatadict1) 237 | metadata2 = UDSAnnotationMetadata.from_dict(metadatadict2) 238 | 239 | metadata = metadata1 + metadata2 240 | 241 | def test_subspaces(self): 242 | assert self.metadata.subspaces == {'protoroles'} 243 | 244 | def test_properties(self): 245 | assert self.metadata.properties() == {'awareness', 246 | 'change_of_location'} 247 | 248 | assert self.metadata.properties('protoroles') == {'awareness', 249 | 'change_of_location'} 250 | 251 | def test_annotators(self): 252 | assert self.metadata.annotators() == {'protoroles-annotator-0', 253 | 'protoroles-annotator-1', 254 | 'protoroles-annotator-8', 255 | 'protoroles-annotator-9'} 256 | 257 | assert self.metadata.annotators('protoroles') == {'protoroles-annotator-0', 258 | 'protoroles-annotator-1', 259 | 'protoroles-annotator-8', 260 | 'protoroles-annotator-9'} 261 | 262 | assert self.metadata.annotators('protoroles', 'awareness') == {'protoroles-annotator-8', 263 | 'protoroles-annotator-9'} 264 | 265 | 266 | with pytest.raises(ValueError): 267 | self.metadata.annotators(prop='awareness') 268 | 269 | assert self.metadata_noann.annotators() is None 270 | 271 | def test_has_annotators(self): 272 | assert self.metadata.has_annotators() 273 | assert self.metadata.has_annotators('protoroles') 274 | assert self.metadata.has_annotators('protoroles', 'awareness') 275 | assert not self.metadata_noann.has_annotators() 276 | 277 | 278 | class TestUDSCorpusMetadata: 279 | 280 | metadata = UDSAnnotationMetadata.from_dict(sentence_metadata_example) 281 | -------------------------------------------------------------------------------- /tests/test_vis.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from predpatt import PredPatt, PredPattOpts, load_conllu 4 | from decomp.syntax.dependency import DependencyGraphBuilder 5 | from decomp.semantics.predpatt import PredPattGraphBuilder 6 | from decomp.semantics.uds import UDSSentenceGraph, UDSCorpus 7 | from decomp.vis.uds_vis import UDSVisualization 8 | from decomp import NormalizedUDSAnnotation 9 | import pdb 10 | 11 | from test_uds_graph import raw_sentence_graph, rawtree, listtree 12 | import pytest 13 | import dash 14 | from dash.testing.application_runners import import_app 15 | 16 | 17 | @pytest.fixture 18 | def basic_sentence_graph(test_data_dir): 19 | graph_data = json.load(open(os.path.join(test_data_dir, "vis_data.json"))) 20 | graph = UDSSentenceGraph.from_dict(graph_data) 21 | return graph 22 | 23 | def test_vis_basic(basic_sentence_graph, dash_duo): 24 | vis = UDSVisualization(basic_sentence_graph, add_syntax_edges=True) 25 | app = vis.serve(do_return = True) 26 | dash_duo.start_server(app) 27 | assert(dash_duo.find_element("title") is not None) 28 | 29 | def test_vis_raw(raw_sentence_graph): 30 | with pytest.raises(AttributeError): 31 | vis = UDSVisualization(raw_sentence_graph, add_syntax_edges=True) 32 | vis.serve() 33 | -------------------------------------------------------------------------------- /uds-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/uds-graph.png --------------------------------------------------------------------------------