├── .dockerignore
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── decomp
    ├── __init__.py
    ├── corpus
    │   ├── __init__.py
    │   └── corpus.py
    ├── data
    │   ├── 1.0
    │   │   ├── normalized
    │   │   │   ├── document
    │   │   │   │   └── annotations
    │   │   │   │   │   └── .gitkeep
    │   │   │   └── sentence
    │   │   │   │   └── annotations
    │   │   │   │       ├── factuality.zip
    │   │   │   │       ├── genericity.zip
    │   │   │   │       ├── protoroles.zip
    │   │   │   │       ├── time.zip
    │   │   │   │       └── wordsense.zip
    │   │   └── raw
    │   │   │   └── sentence
    │   │   │       └── annotations
    │   │   │           ├── factuality.zip
    │   │   │           ├── genericity.zip
    │   │   │           ├── protoroles.zip
    │   │   │           ├── time.zip
    │   │   │           └── wordsense.zip
    │   ├── 2.0
    │   │   ├── normalized
    │   │   │   ├── document
    │   │   │   │   ├── .gitkeep
    │   │   │   │   └── annotations
    │   │   │   │   │   ├── .gitkeep
    │   │   │   │   │   └── event_structure_mereology.zip
    │   │   │   └── sentence
    │   │   │   │   └── annotations
    │   │   │   │       ├── .gitkeep
    │   │   │   │       ├── event_structure_distributivity.zip
    │   │   │   │       ├── event_structure_natural_parts.zip
    │   │   │   │       ├── factuality.zip
    │   │   │   │       ├── genericity.zip
    │   │   │   │       ├── protoroles.zip
    │   │   │   │       ├── time.zip
    │   │   │   │       └── wordsense.zip
    │   │   └── raw
    │   │   │   ├── document
    │   │   │       └── annotations
    │   │   │       │   ├── event_structure_mereology.zip
    │   │   │       │   └── time.zip
    │   │   │   └── sentence
    │   │   │       └── annotations
    │   │   │           ├── event_structure_distributivity.zip
    │   │   │           ├── event_structure_natural_parts.zip
    │   │   │           ├── factuality.zip
    │   │   │           ├── genericity.zip
    │   │   │           ├── protoroles.zip
    │   │   │           ├── time.zip
    │   │   │           └── wordsense.zip
    │   ├── LICENSE
    │   └── ud_ids.json
    ├── graph
    │   ├── __init__.py
    │   ├── nx.py
    │   └── rdf.py
    ├── semantics
    │   ├── __init__.py
    │   ├── predpatt.py
    │   └── uds
    │   │   ├── __init__.py
    │   │   ├── annotation.py
    │   │   ├── corpus.py
    │   │   ├── document.py
    │   │   ├── graph.py
    │   │   └── metadata.py
    ├── syntax
    │   ├── __init__.py
    │   └── dependency.py
    └── vis
    │   ├── __init__.py
    │   └── uds_vis.py
├── docs
    ├── Makefile
    ├── README.md
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── conf.py
    │   ├── data
    │       ├── document-graphs.rst
    │       ├── index.rst
    │       ├── semantic-types.rst
    │       ├── sentence-graphs.rst
    │       └── syntactic-graphs.rst
    │   ├── index.rst
    │   ├── install.rst
    │   ├── package
    │       ├── decomp.corpus.corpus.rst
    │       ├── decomp.corpus.rst
    │       ├── decomp.graph.nx.rst
    │       ├── decomp.graph.rdf.rst
    │       ├── decomp.graph.rst
    │       ├── decomp.semantics.predpatt.rst
    │       ├── decomp.semantics.rst
    │       ├── decomp.semantics.uds.annotation.rst
    │       ├── decomp.semantics.uds.corpus.rst
    │       ├── decomp.semantics.uds.document.rst
    │       ├── decomp.semantics.uds.graph.rst
    │       ├── decomp.semantics.uds.metadata.rst
    │       ├── decomp.semantics.uds.rst
    │       ├── decomp.syntax.dependency.rst
    │       ├── decomp.syntax.rst
    │       ├── decomp.vis.rst
    │       ├── decomp.vis.uds_vis.rst
    │       └── index.rst
    │   └── tutorial
    │       ├── assets
    │           ├── vis_genericity_no_syntax.png
    │           ├── vis_no_protoroles_no_syntax.png
    │           ├── vis_no_protoroles_syntax.png
    │           ├── vis_no_syntax.png
    │           ├── vis_node_props_no_syntax.png
    │           ├── vis_node_props_syntax.png
    │           ├── vis_protoroles_no_syntax.png
    │           ├── vis_protoroles_syntax.png
    │           └── vis_syntax.png
    │       ├── index.rst
    │       ├── querying.rst
    │       ├── quick-start.rst
    │       ├── reading.rst
    │       ├── serializing.rst
    │       └── visualization.rst
├── requirements.txt
├── setup.py
├── tests
    ├── README.md
    ├── conftest.py
    ├── data
    │   ├── normalized_edge_document_annotation.json
    │   ├── normalized_edge_sentence_annotation.json
    │   ├── normalized_node_document_annotation.json
    │   ├── normalized_node_sentence_annotation.json
    │   ├── raw_edge_sentence_annotation.json
    │   ├── raw_edge_sentence_annotators.json
    │   ├── raw_edge_sentence_annotators.txt
    │   ├── raw_node_sentence_annotation.json
    │   ├── raw_node_sentence_annotators.txt
    │   ├── rawtree.conllu
    │   └── vis_data.json
    ├── requirements.txt
    ├── test_dependency.py
    ├── test_predpatt.py
    ├── test_uds_annotation.py
    ├── test_uds_corpus.py
    ├── test_uds_document.py
    ├── test_uds_graph.py
    ├── test_uds_metadata.py
    └── test_vis.py
└── uds-graph.png


/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.6
2 | 
3 | WORKDIR /usr/src/decomp
4 | 
5 | COPY . .
6 | 
7 | RUN pip install --no-cache-dir -r requirements.txt && \
8 |     pip install --no-cache-dir . && \
9 |     python -c "from decomp import UDSCorpus; UDSCorpus()"


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Aaron Steven White
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include decomp/ *
2 | recursive-include docs/ *
3 | recursive-include tests/ *
4 | include requirements.txt
5 | include README.md
6 | include LICENSE
7 | include Dockerfile


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Overview
  2 | 
  3 | [Decomp](https://github.com/decompositional-semantics-initiative/decomp)
  4 | is a toolkit for working with the [Universal Decompositional Semantics
  5 | (UDS) dataset](http://decomp.io), which is a collection of directed
  6 | acyclic semantic graphs with real-valued node and edge attributes
  7 | pointing into [Universal
  8 | Dependencies](https://universaldependencies.org/) syntactic dependency
  9 | trees.
 10 | 
 11 | ![UDS graph example](https://github.com/decompositional-semantics-initiative/decomp/raw/master/uds-graph.png)
 12 | 
 13 | The toolkit is built on top of
 14 | [NetworkX](https://github.com/networkx/networkx) and
 15 | [RDFLib](https://github.com/RDFLib/rdflib) making it straightforward to:
 16 | 
 17 |    - read the UDS dataset from its native JSON format
 18 |    - query both the syntactic and semantic subgraphs of UDS (as well as
 19 |      pointers between them) using SPARQL 1.1 queries
 20 |    - serialize UDS graphs to many common formats, such as
 21 |      [Notation3](https://www.w3.org/TeamSubmission/n3/),
 22 |      [N-Triples](https://www.w3.org/TR/n-triples/),
 23 |      [turtle](https://www.w3.org/TeamSubmission/turtle/), and
 24 |      [JSON-LD](https://json-ld.org/), as well as any other format
 25 |      supported by NetworkX
 26 | 
 27 | The toolkit was built by [Aaron Steven
 28 | White](http://aaronstevenwhite.io/) and is maintained by the
 29 | [Decompositional Semantics Initiative](http://decomp.io/). The UDS
 30 | dataset was constructed from annotations collected by the
 31 | [Decompositional Semantics Initiative](http://decomp.io/).
 32 | 
 33 | # Documentation
 34 | 
 35 | The [full documentation for the
 36 | package](https://decomp.readthedocs.io/en/latest/index.html) is hosted
 37 | at [Read the Docs](https://readthedocs.org/).
 38 | 	
 39 | # Citation
 40 | 
 41 | If you make use of the dataset and/or toolkit in your research, we ask
 42 | that you please cite the following paper in addition to the paper that
 43 | introduces the underlying dataset(s) on which UDS is based.
 44 | 
 45 | > White, Aaron Steven, Elias Stengel-Eskin, Siddharth Vashishtha, Venkata Subrahmanyan Govindarajan, Dee Ann Reisinger, Tim Vieira, Keisuke Sakaguchi, et al. 2020. [The Universal Decompositional Semantics Dataset and Decomp Toolkit](https://www.aclweb.org/anthology/2020.lrec-1.699/). In Proceedings of The 12th Language Resources and Evaluation Conference, 5698–5707. Marseille, France: European Language Resources Association.
 46 | 
 47 | ```latex
 48 | @inproceedings{white-etal-2020-universal,
 49 |     title = "The Universal Decompositional Semantics Dataset and Decomp Toolkit",
 50 |     author = "White, Aaron Steven  and
 51 |       Stengel-Eskin, Elias  and
 52 |       Vashishtha, Siddharth  and
 53 |       Govindarajan, Venkata Subrahmanyan  and
 54 |       Reisinger, Dee Ann  and
 55 |       Vieira, Tim  and
 56 |       Sakaguchi, Keisuke  and
 57 |       Zhang, Sheng  and
 58 |       Ferraro, Francis  and
 59 |       Rudinger, Rachel  and
 60 |       Rawlins, Kyle  and
 61 |       Van Durme, Benjamin",
 62 |     booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference",
 63 |     month = may,
 64 |     year = "2020",
 65 |     address = "Marseille, France",
 66 |     publisher = "European Language Resources Association",
 67 |     url = "https://www.aclweb.org/anthology/2020.lrec-1.699",
 68 |     pages = "5698--5707",
 69 |     ISBN = "979-10-95546-34-4",
 70 | }
 71 | ```
 72 | 
 73 | # License
 74 | 
 75 | Everything besides the contents of `decomp/data` are covered by the
 76 | MIT License contained at the same directory level as this README. All
 77 | contents of `decomp/data` are covered by the CC-BY-SA 4.0 license
 78 | contained in that directory.
 79 | 
 80 | # Installation
 81 | 
 82 | The most painless way to get started quickly is to use the included
 83 | barebones Python 3.6-based Dockerfile. To build the image and start a
 84 | python interactive prompt, use:
 85 | 
 86 | ```bash
 87 | git clone git://github.com/decompositional-semantics-initiative/decomp.git
 88 | cd decomp
 89 | docker build -t decomp .
 90 | docker run -it decomp python
 91 | ```
 92 | 
 93 | If you prefer to install directly to your local environment, simply
 94 | use `pip`.
 95 | 
 96 | ```bash
 97 | pip install --user git+git://github.com/decompositional-semantics-initiative/decomp.git
 98 | ```
 99 | 
100 | You can also clone and use the included `setup.py`.
101 | 
102 | ```bash
103 | git clone git://github.com/decompositional-semantics-initiative/decomp.git
104 | cd decomp
105 | pip install --user --no-cache-dir -r ./requirements.txt
106 | python setup.py install
107 | ```
108 | 
109 | If you would like to install the package for the purposes of
110 | development, use:
111 | 
112 | ```bash
113 | git clone git://github.com/decompositional-semantics-initiative/decomp.git
114 | cd decomp
115 | pip install --user --no-cache-dir -r ./requirements.txt
116 | python setup.py develop
117 | ```
118 | 
119 | # Quick Start
120 | 
121 | The UDS corpus can be read by directly importing it.
122 | 
123 | ```python
124 | from decomp import UDSCorpus
125 | 
126 | uds = UDSCorpus()
127 | ```
128 | 
129 | This imports a `UDSCorpus` object `uds`, which contains all graphs
130 | across all splits in the data.  If you would like a corpus, e.g.,
131 | containing only a particular split, see other loading options in [the
132 | tutorial on reading the
133 | corpus](https://decomp.readthedocs.io/en/latest/tutorial/reading.html)
134 | for details.
135 | 
136 | The first time you read UDS, it will take several minutes to complete
137 | while the dataset is built from the [Universal Dependencies English Web
138 | Treebank](https://github.com/UniversalDependencies/UD_English-EWT),
139 | which is not shipped with the package (but is downloaded automatically
140 | on import in the background), and the [UDS
141 | annotations](http://decomp.io/data/), which are shipped with the
142 | package. Subsequent uses will be faster, since the dataset is cached on
143 | build.
144 | 
145 | `UDSGraph` objects in the corpus can be accessed using standard
146 | dictionary getters or iteration. For instance, to get the UDS graph
147 | corresponding to the 12th sentence in `en-ud-train.conllu`, you can
148 | use:
149 | 
150 | ``` python
151 | uds["ewt-train-12"]
152 | ```
153 | 
154 | More generally, `UDSCorpus` objects behave like dictionaries. For
155 | example, to print all the graph identifiers in the corpus (e.g.
156 | `"ewt-train-12"`), you can use:
157 | 
158 | ``` python
159 | for graphid in uds:
160 |     print(graphid)
161 | ```
162 | 
163 | Similarly, to print all the graph identifiers in the corpus (e.g.
164 | "ewt-in-12") along with the corresponding sentence, you can use:
165 | 
166 | ``` python
167 | for graphid, graph in uds.items():
168 |     print(graphid)
169 |     print(graph.sentence)
170 | ```
171 | 
172 | A list of graph identifiers can also be accessed via the `graphids`
173 | attribute of the UDSCorpus. A mapping from these identifiers and the
174 | corresponding graph can be accessed via the `graphs` attribute.
175 | 
176 | ``` python
177 | # a list of the graph identifiers in the corpus
178 | uds.graphids
179 | 
180 | # a dictionary mapping the graph identifiers to the
181 | # corresponding graph
182 | uds.graphs
183 | ```
184 | 
185 | There are various instance attributes and methods for accessing nodes,
186 | edges, and their attributes in the UDS graphs. For example, to get a
187 | dictionary mapping identifiers for syntax nodes in the UDS graph to
188 | their attributes, you can use:
189 | 
190 | ``` python
191 | uds["ewt-train-12"].syntax_nodes
192 | ```
193 | 
194 | To get a dictionary mapping identifiers for semantics nodes in the UDS
195 | graph to their attributes, you can use:
196 | 
197 | ``` python
198 | uds["ewt-train-12"].semantics_nodes   
199 | ```
200 | 
201 | To get a dictionary mapping identifiers for semantics edges (tuples of
202 | node identifiers) in the UDS graph to their attributes, you can use:
203 | 
204 | ``` python
205 | uds["ewt-train-12"].semantics_edges()
206 | ```
207 | 
208 | To get a dictionary mapping identifiers for semantics edges (tuples of
209 | node identifiers) in the UDS graph involving the predicate headed by the
210 | 7th token to their attributes, you can use:
211 | 
212 | ``` python
213 | uds["ewt-train-12"].semantics_edges('ewt-train-12-semantics-pred-7')
214 | ```
215 | 
216 | To get a dictionary mapping identifiers for syntax edges (tuples of node
217 | identifiers) in the UDS graph to their attributes, you can use:
218 | 
219 | ``` python
220 | uds["ewt-train-12"].syntax_edges()
221 | ```
222 | 
223 | And to get a dictionary mapping identifiers for syntax edges (tuples of
224 | node identifiers) in the UDS graph involving the node for the 7th token
225 | to their attributes, you can use:
226 | 
227 | ``` python
228 | uds["ewt-train-12"].syntax_edges('ewt-train-12-syntax-7')
229 | ```
230 | 
231 | There are also methods for accessing relationships between semantics and
232 | syntax nodes. For example, you can get a tuple of the ordinal position
233 | for the head syntax node in the UDS graph that maps of the predicate
234 | headed by the 7th token in the corresponding sentence to a list of the
235 | form and lemma attributes for that token, you can use:
236 | 
237 | ``` python
238 | uds["ewt-train-12"].head('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
239 | ```
240 | 
241 | And if you want the same information for every token in the span, you
242 | can use:
243 | 
244 | ``` python
245 | uds["ewt-train-12"].span('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
246 | ```
247 | 
248 | This will return a dictionary mapping ordinal position for syntax nodes
249 | in the UDS graph that make of the predicate headed by the 7th token in
250 | the corresponding sentence to a list of the form and lemma attributes
251 | for the corresponding tokens.
252 | 
253 | More complicated queries of the UDS graph can be performed using the
254 | `query` method, which accepts arbitrary SPARQL 1.1 queries. See [the
255 | tutorial on querying the
256 | corpus](https://decomp.readthedocs.io/en/latest/tutorial/querying.html)
257 | for details.
258 | 


--------------------------------------------------------------------------------
/decomp/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from pkg_resources import resource_filename
 4 | from logging import basicConfig, DEBUG
 5 | 
 6 | DATA_DIR = resource_filename('decomp', 'data/')
 7 | basicConfig(filename=os.path.join(DATA_DIR, 'build.log'),
 8 |             filemode='w',
 9 |             level=DEBUG)
10 | 
11 | from .semantics.uds import UDSCorpus
12 | from .semantics.uds import NormalizedUDSAnnotation
13 | from .semantics.uds import RawUDSAnnotation
14 | 


--------------------------------------------------------------------------------
/decomp/corpus/__init__.py:
--------------------------------------------------------------------------------
1 | """Module for defining abstract corpus readers"""
2 | 
3 | from .corpus import *
4 | 


--------------------------------------------------------------------------------
/decomp/corpus/corpus.py:
--------------------------------------------------------------------------------
 1 | """Module for defining abstract graph corpus readers"""
 2 | 
 3 | from abc import ABCMeta, abstractmethod
 4 | 
 5 | from random import sample
 6 | from logging import warning
 7 | from typing import Dict, List, Tuple, Iterable, Hashable, Any, TypeVar
 8 | 
 9 | InGraph = TypeVar('InGraph')  # the input graph type
10 | OutGraph = TypeVar('OutGraph')  # the output graph type
11 | 
12 | 
13 | class Corpus(metaclass=ABCMeta):
14 |     """Container for graphs
15 | 
16 |     Parameters
17 |     ----------
18 |     graphs_raw
19 |         a sequence of graphs in a format that the graphbuilder for a
20 |         subclass of this abstract class can process
21 |     """
22 | 
23 |     def __init__(self, graphs_raw: Iterable[InGraph]):
24 |         self._graphs_raw = graphs_raw
25 |         self._build_graphs()
26 | 
27 |     def __iter__(self) -> Iterable[Hashable]:
28 |         return iter(self._graphs)
29 | 
30 |     def items(self) -> Iterable[Tuple[Hashable, OutGraph]]:
31 |         """Dictionary-like iterator for (graphid, graph) pairs"""
32 |         return self._graphs.items()
33 | 
34 |     def __getitem__(self, k: Hashable) -> Any:
35 |         return self._graphs[k]
36 | 
37 |     def __contains__(self, k: Hashable) -> bool:
38 |         return k in self._graphs
39 | 
40 |     def __len__(self) -> int:
41 |         return len(self._graphs)
42 | 
43 |     def _build_graphs(self) -> None:
44 |         self._graphs = {}
45 | 
46 |         for graphid, rawgraph in self._graphs_raw.items():
47 |             try:
48 |                 self._graphs[graphid] = self._graphbuilder(graphid, rawgraph)
49 |             except ValueError:
50 |                 warning(graphid+' has no or multiple root nodes')
51 |             except RecursionError:
52 |                 warning(graphid+' has loops')
53 | 
54 |     @abstractmethod
55 |     def _graphbuilder(self,
56 |                       graphid: Hashable,
57 |                       rawgraph: InGraph) -> OutGraph:
58 |         raise NotImplementedError
59 | 
60 |     @property
61 |     def graphs(self) -> Dict[Hashable, OutGraph]:
62 |         """the graphs in corpus"""
63 |         return self._graphs
64 | 
65 |     @property
66 |     def graphids(self) -> List[Hashable]:
67 |         """The graph ids in corpus"""
68 | 
69 |         return list(self._graphs)
70 | 
71 |     @property
72 |     def ngraphs(self) -> int:
73 |         """Number of graphs in corpus"""
74 | 
75 |         return len(self._graphs)
76 | 
77 |     def sample(self, k: int) -> Dict[Hashable, OutGraph]:
78 |         """Sample k graphs without replacement
79 | 
80 |         Parameters
81 |         ----------
82 |         k
83 |             the number of graphs to sample
84 |         """
85 |         
86 |         return {tid: self._graphs[tid]
87 |                 for tid
88 |                 in sample(self._graphs.keys(), k=k)}
89 | 


--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/document/annotations/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/document/annotations/.gitkeep


--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/sentence/annotations/factuality.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/factuality.zip


--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/sentence/annotations/genericity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/genericity.zip


--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/sentence/annotations/protoroles.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/protoroles.zip


--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/sentence/annotations/time.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/time.zip


--------------------------------------------------------------------------------
/decomp/data/1.0/normalized/sentence/annotations/wordsense.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/normalized/sentence/annotations/wordsense.zip


--------------------------------------------------------------------------------
/decomp/data/1.0/raw/sentence/annotations/factuality.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/factuality.zip


--------------------------------------------------------------------------------
/decomp/data/1.0/raw/sentence/annotations/genericity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/genericity.zip


--------------------------------------------------------------------------------
/decomp/data/1.0/raw/sentence/annotations/protoroles.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/protoroles.zip


--------------------------------------------------------------------------------
/decomp/data/1.0/raw/sentence/annotations/time.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/time.zip


--------------------------------------------------------------------------------
/decomp/data/1.0/raw/sentence/annotations/wordsense.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/1.0/raw/sentence/annotations/wordsense.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/document/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/document/.gitkeep


--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/document/annotations/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/document/annotations/.gitkeep


--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/document/annotations/event_structure_mereology.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/document/annotations/event_structure_mereology.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/.gitkeep


--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/event_structure_distributivity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/event_structure_distributivity.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/event_structure_natural_parts.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/event_structure_natural_parts.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/factuality.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/factuality.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/genericity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/genericity.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/protoroles.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/protoroles.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/time.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/time.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/normalized/sentence/annotations/wordsense.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/normalized/sentence/annotations/wordsense.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/raw/document/annotations/event_structure_mereology.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/document/annotations/event_structure_mereology.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/raw/document/annotations/time.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/document/annotations/time.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/event_structure_distributivity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/event_structure_distributivity.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/event_structure_natural_parts.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/event_structure_natural_parts.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/factuality.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/factuality.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/genericity.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/genericity.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/protoroles.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/protoroles.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/time.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/time.zip


--------------------------------------------------------------------------------
/decomp/data/2.0/raw/sentence/annotations/wordsense.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/data/2.0/raw/sentence/annotations/wordsense.zip


--------------------------------------------------------------------------------
/decomp/graph/__init__.py:
--------------------------------------------------------------------------------
1 | """Module for converting between NetworkX and RDFLib graphs"""
2 | 
3 | from .rdf import RDFConverter
4 | from .nx import NXConverter
5 | 


--------------------------------------------------------------------------------
/decomp/graph/nx.py:
--------------------------------------------------------------------------------
  1 | """Module for converting from networkx to RDF"""
  2 | 
  3 | from networkx import DiGraph, to_dict_of_dicts
  4 | from rdflib import Graph, URIRef, Literal
  5 | 
  6 | 
  7 | class NXConverter:
  8 |     """A converter between RDFLib graphs and NetworkX digraphs
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     graph
 13 |         the graph to convert
 14 |     """
 15 | 
 16 |     def __init__(self, rdfgraph: Graph):
 17 |         self.nxgraph = DiGraph()
 18 |         self.rdfgraph = rdfgraph
 19 | 
 20 |     @classmethod
 21 |     def rdf_to_networkx(cls, rdfgraph: Graph) -> DiGraph:
 22 |         """Convert an RDFLib graph to a NetworkX digraph
 23 | 
 24 |         Parameters
 25 |         ----------
 26 |         rdfgraph
 27 |             the RDFLib graph to convert
 28 |         """
 29 | 
 30 |         converter = cls(rdfgraph)
 31 | 
 32 |         raise NotImplementedError
 33 | 
 34 |         # nxdict = to_dict_of_dicts(nxgraph)
 35 | 
 36 |         # for nodeid1, edgedict in nxdict.items():
 37 |         #     converter._add_node_attributes(nodeid1)
 38 | 
 39 |         #     for nodeid2, properties in edgedict.items():
 40 |         #         converter._add_node_attributes(nodeid2)
 41 |         #         converter._add_edge_attributes(nodeid1, nodeid2, properties)
 42 | 
 43 |         # cls._reset_attributes()
 44 | 
 45 |         # return converter.rdfgraph
 46 | 
 47 |     # def _add_node_attributes(self, nodeid):
 48 |     #     for propid, val in self.nxgraph.nodes[nodeid].items():
 49 |     #         triple = self.__class__._construct_property(nodeid, propid, val)
 50 |     #         self.rdfgraph.add(triple)
 51 | 
 52 |     # def _add_edge_attributes(self, nodeid1, nodeid2, properties):
 53 |     #     triple = self.__class__._construct_edge(nodeid1, nodeid2)
 54 |     #     self.rdfgraph.add(triple)
 55 | 
 56 |     #     edgeid = triple[1]
 57 | 
 58 |     #     for propid, val in properties.items():
 59 |     #         triple = self.__class__._construct_property(edgeid, propid, val)
 60 |     #         self.rdfgraph.add(triple)
 61 | 
 62 |     # @classmethod
 63 |     # def _construct_node(cls, nodeid):
 64 |     #     if nodeid not in cls.NODES:
 65 |     #         cls.NODES[nodeid] = URIRef(nodeid)
 66 | 
 67 |     #     return cls.NODES[nodeid]
 68 | 
 69 |     # @classmethod
 70 |     # def _construct_edge(cls, nodeid1, nodeid2):
 71 |     #     node1 = cls._construct_node(nodeid1)
 72 |     #     node2 = cls._construct_node(nodeid2)
 73 | 
 74 |     #     edgeid = nodeid1 + '%%' + nodeid2
 75 | 
 76 |     #     if edgeid not in cls.EDGES:
 77 |     #         cls.EDGES[edgeid] = URIRef(edgeid)
 78 | 
 79 |     #     return (node1, cls.EDGES[edgeid], node2)
 80 | 
 81 |     # @classmethod
 82 |     # def _construct_property(cls, nodeid, propid, val):
 83 |     #     if nodeid not in cls.NODES:
 84 |     #         cls.NODES[nodeid] = URIRef(nodeid)
 85 | 
 86 |     #     if propid not in cls.NODES:
 87 |     #         cls.PROPERTIES[propid] = URIRef(propid)
 88 | 
 89 |     #     if propid in ['type', 'subtype']:
 90 |     #         if val not in cls.VALUES:
 91 |     #             cls.VALUES[val] = URIRef(val)
 92 | 
 93 |     #         return (cls.NODES[nodeid],
 94 |     #                 cls.PROPERTIES[propid],
 95 |     #                 cls.VALUES[val])
 96 | 
 97 |     #     else:
 98 |     #         return (cls.NODES[nodeid],
 99 |     #                 cls.PROPERTIES[propid],
100 |     #                 Literal(val))
101 | 
102 |     # @classmethod
103 |     # def _reset_attributes(cls):
104 |     #     cls.NODES = {}
105 |     #     cls.EDGES = {}
106 | 


--------------------------------------------------------------------------------
/decomp/graph/rdf.py:
--------------------------------------------------------------------------------
  1 | """Module for converting from networkx to RDF"""
  2 | 
  3 | from networkx import DiGraph, to_dict_of_dicts
  4 | from rdflib import Graph, URIRef, Literal
  5 | 
  6 | 
  7 | class RDFConverter:
  8 |     """A converter between NetworkX digraphs and RDFLib graphs
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     nxgraph
 13 |         the graph to convert
 14 |     """
 15 | 
 16 |     SUBSPACES = {}
 17 |     PROPERTIES = {'domain': URIRef('domain'),
 18 |                   'type': URIRef('type'),
 19 |                   'subspace': URIRef('subspace'),
 20 |                   'confidence': URIRef('confidence')}
 21 |     VALUES = {}
 22 | 
 23 |     def __init__(self, nxgraph: DiGraph):
 24 |         self.nxgraph = nxgraph
 25 |         self.rdfgraph = Graph()
 26 |         self.nodes = {}
 27 | 
 28 |     @classmethod
 29 |     def networkx_to_rdf(cls, nxgraph: DiGraph) -> Graph:
 30 |         """Convert a NetworkX digraph to an RDFLib graph
 31 | 
 32 |         Parameters
 33 |         ----------
 34 |         nxgraph
 35 |             the NetworkX graph to convert
 36 |         """
 37 | 
 38 |         converter = cls(nxgraph)
 39 | 
 40 |         nxdict = to_dict_of_dicts(nxgraph)
 41 | 
 42 |         for nodeid1, edgedict in nxdict.items():
 43 |             converter._add_node_attributes(nodeid1)
 44 |             for nodeid2 in edgedict:
 45 |                 converter._add_node_attributes(nodeid2)
 46 |                 converter._add_edge_attributes(nodeid1, nodeid2)
 47 | 
 48 |         return converter.rdfgraph
 49 | 
 50 |     def _add_node_attributes(self, nodeid):
 51 |         self._construct_node(nodeid)
 52 |         
 53 |         self._add_attributes(nodeid,
 54 |                              self.nxgraph.nodes[nodeid].items())
 55 | 
 56 |         
 57 |     def _add_edge_attributes(self, nodeid1, nodeid2):
 58 |         edgeid = self._construct_edge(nodeid1, nodeid2)
 59 |         edgetup = (nodeid1, nodeid2)
 60 |         
 61 |         self._add_attributes(edgeid,
 62 |                              self.nxgraph.edges[edgetup].items())
 63 |         
 64 | 
 65 |     def _add_attributes(self, nid, attributes):
 66 |         triples = []
 67 |         
 68 |         for attrid1, attrs1 in attributes:
 69 |             if not isinstance(attrs1, dict):
 70 |                 if isinstance(attrs1, list) or isinstance(attrs1, tuple):
 71 |                     errmsg = 'Cannot convert list- or tuple-valued' +\
 72 |                              ' attributes to RDF'
 73 |                     raise ValueError(errmsg)
 74 |                     
 75 |                 triples += self._construct_property(nid,
 76 |                                                     attrid1,
 77 |                                                     attrs1)
 78 | 
 79 |             else:            
 80 |                 for attrid2, attrs2 in attrs1.items():
 81 |                     triples += self._construct_property(nid,
 82 |                                                         attrid2,
 83 |                                                         attrs2,
 84 |                                                         attrid1)
 85 | 
 86 |         for t in triples:
 87 |             self.rdfgraph.add(t)                    
 88 |         
 89 |     def _construct_node(self, nodeid):        
 90 |         if nodeid not in self.nodes:
 91 |             self.nodes[nodeid] = URIRef(nodeid)
 92 | 
 93 |     def _construct_edge(self, nodeid1, nodeid2):
 94 |         edgeid = nodeid1 + '%%' + nodeid2
 95 | 
 96 |         if edgeid not in self.nodes:
 97 |             node1 = self.nodes[nodeid1]
 98 |             node2 = self.nodes[nodeid2]
 99 | 
100 |             self.nodes[edgeid] = URIRef(edgeid)
101 |             triple = (node1, self.nodes[edgeid], node2)
102 | 
103 |             self.rdfgraph.add(triple)
104 |             
105 |             return edgeid
106 | 
107 |         else:
108 |             return edgeid
109 | 
110 |     def _construct_property(self, nodeid, propid, val,
111 |                             subspaceid=None):
112 | 
113 |         c = self.__class__
114 |         
115 |         if isinstance(val, dict) and subspaceid is not None:
116 |             # We currently do not support querying on raw UDS
117 |             # annotations, all of which have dict-valued 'value'
118 |             # and 'confidence' fields.
119 |             if isinstance(val['value'], dict) or isinstance(val['confidence'], dict):
120 |                 raise TypeError('Attempted query of graph with raw properties. Querying '\
121 |                                 'graphs with raw properties is prohibited.')
122 |             triples = c._construct_subspace(subspaceid, propid)        
123 |             triples += [(self.nodes[nodeid],
124 |                          c.PROPERTIES[propid],
125 |                          Literal(val['value'])),
126 |                         (self.nodes[nodeid],
127 |                          c.PROPERTIES[propid+'-confidence'],
128 |                          Literal(val['confidence']))]
129 | 
130 |         elif propid in ['domain', 'type']:
131 |             if val not in c.VALUES:
132 |                 c.VALUES[val] = URIRef(val)
133 |             
134 |             triples = [(self.nodes[nodeid],
135 |                         c.PROPERTIES[propid],
136 |                         c.VALUES[val])]
137 | 
138 |         else:
139 |             if propid not in c.PROPERTIES:
140 |                 c.PROPERTIES[propid] = URIRef(propid)
141 |             
142 |             triples = [(self.nodes[nodeid],
143 |                         c.PROPERTIES[propid],
144 |                         Literal(val))]            
145 |             
146 |         return triples
147 | 
148 |     @classmethod
149 |     def _construct_subspace(cls, subspaceid, propid):
150 |         if subspaceid not in cls.SUBSPACES:
151 |             cls.SUBSPACES[subspaceid] = URIRef(subspaceid)
152 |             
153 |         if propid not in cls.PROPERTIES:
154 |             cls.PROPERTIES[propid] = URIRef(propid)
155 |             cls.PROPERTIES[propid+'-confidence'] = URIRef(propid+'-confidence')
156 | 
157 |         return [(cls.PROPERTIES[propid],
158 |                  cls.PROPERTIES['subspace'],
159 |                  cls.SUBSPACES[subspaceid]),
160 |                 (cls.PROPERTIES[propid+'-confidence'],
161 |                  cls.PROPERTIES['subspace'],
162 |                  cls.SUBSPACES[subspaceid]),                
163 |                 (cls.PROPERTIES[propid],
164 |                  cls.PROPERTIES['confidence'],
165 |                  cls.PROPERTIES[propid+'-confidence'])]
166 | 


--------------------------------------------------------------------------------
/decomp/semantics/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for representing PredPatt and UDS graphs
3 | 
4 | This module represents PredPatt and UDS graphs using networkx. It
5 | incorporates the dependency parse-based graphs from the syntax module
6 | as subgraphs.
7 | """
8 | 


--------------------------------------------------------------------------------
/decomp/semantics/predpatt.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=W0221
  2 | # pylint: disable=R0903
  3 | # pylint: disable=R1704
  4 | """Module for converting PredPatt objects to networkx digraphs"""
  5 | 
  6 | from os.path import basename, splitext
  7 | from typing import Tuple, Hashable, TextIO, Optional, Union
  8 | from networkx import DiGraph
  9 | from predpatt import load_conllu, PredPatt, PredPattOpts
 10 | from ..corpus import Corpus
 11 | from ..syntax.dependency import CoNLLDependencyTreeCorpus
 12 | 
 13 | DEFAULT_PREDPATT_OPTIONS = PredPattOpts(resolve_relcl=True,
 14 |                                         borrow_arg_for_relcl=True,
 15 |                                         resolve_conj=False,
 16 |                                         cut=True)  # Resolve relative clause
 17 | 
 18 | 
 19 | class PredPattCorpus(Corpus):
 20 |     """Container for predpatt graphs"""
 21 | 
 22 |     def _graphbuilder(self,
 23 |                       graphid: Hashable,
 24 |                       predpatt_depgraph: Tuple[PredPatt, DiGraph]) -> DiGraph:
 25 |         """
 26 |         Parameters
 27 |         ----------
 28 |         treeid
 29 |             an identifier for the tree
 30 |         predpatt_depgraph
 31 |             a pairing of the predpatt for a dependency parse and the graph
 32 |             representing that dependency parse
 33 |         """
 34 | 
 35 |         predpatt, depgraph = predpatt_depgraph
 36 | 
 37 |         return PredPattGraphBuilder.from_predpatt(predpatt, depgraph, graphid)
 38 | 
 39 |     @classmethod
 40 |     def from_conll(cls,
 41 |                    corpus: Union[str, TextIO],
 42 |                    name: str = 'ewt',
 43 |                    options: Optional[PredPattOpts] = None) -> 'PredPattCorpus':
 44 |         """Load a CoNLL dependency corpus and apply predpatt
 45 | 
 46 |         Parameters
 47 |         ----------
 48 |         corpus
 49 |             (path to) a .conllu file
 50 |         name
 51 |             the name of the corpus; used in constructing treeids
 52 |         options
 53 |             options for predpatt extraction
 54 |         """
 55 | 
 56 |         options = DEFAULT_PREDPATT_OPTIONS if options is None else options
 57 | 
 58 |         corp_is_str = isinstance(corpus, str)
 59 | 
 60 |         if corp_is_str and splitext(basename(corpus))[1] == '.conllu':
 61 |             with open(corpus) as infile:
 62 |                 data = infile.read()
 63 | 
 64 |         elif corp_is_str:
 65 |             data = corpus
 66 | 
 67 |         else:
 68 |             data = corpus.read()
 69 | 
 70 |         # load the CoNLL dependency parses as graphs
 71 |         ud_corp = {name+'-'+str(i+1): [line.split()
 72 |                                        for line in block.split('\n')
 73 |                                        if len(line) > 0
 74 |                                        if line[0] != '#']
 75 |                    for i, block in enumerate(data.split('\n\n'))}
 76 |         ud_corp = CoNLLDependencyTreeCorpus(ud_corp)
 77 | 
 78 |         # extract the predpatt for those dependency parses
 79 |         try:
 80 |             predpatt = {name+'-'+sid.split('_')[1]: PredPatt(ud_parse,
 81 |                                                              opts=options)
 82 |                         for sid, ud_parse in load_conllu(data)}
 83 | 
 84 |         except ValueError:
 85 |             errmsg = 'PredPatt was unable to parse the CoNLL you provided.' +\
 86 |                      ' This is likely due to using a version of UD that is' +\
 87 |                      ' incompatible with PredPatt. Use of version 1.2 is' +\
 88 |                      ' suggested.'
 89 | 
 90 |             raise ValueError(errmsg)
 91 |             
 92 |         return cls({n: (pp, ud_corp[n])
 93 |                     for n, pp in predpatt.items()})
 94 | 
 95 | 
 96 | class PredPattGraphBuilder:
 97 |     """A predpatt graph builder"""
 98 | 
 99 |     @classmethod
100 |     def from_predpatt(cls,
101 |                       predpatt: PredPatt,
102 |                       depgraph: DiGraph,
103 |                       graphid: str = '') -> DiGraph:
104 |         """Build a DiGraph from a PredPatt object and another DiGraph
105 | 
106 |         Parameters
107 |         ----------
108 |         predpatt
109 |             the predpatt extraction for the dependency parse
110 |         depgraph
111 |             the dependency graph
112 |         graphid
113 |             the tree indentifier; will be a prefix of all node
114 |             identifiers
115 |         """
116 |         # handle null graphids
117 |         graphid = graphid+'-' if graphid else ''
118 | 
119 |         # initialize the predpatt graph
120 |         # predpattgraph = DiGraph(predpatt=predpatt)
121 |         predpattgraph = DiGraph()
122 |         predpattgraph.name = graphid.strip('-')
123 | 
124 |         # include all of the syntax edges in the original dependendency graph
125 |         predpattgraph.add_nodes_from([(n, attr)
126 |                                       for n, attr in depgraph.nodes.items()])
127 |         predpattgraph.add_edges_from([(n1, n2, attr)
128 |                                       for (n1, n2), attr
129 |                                       in depgraph.edges.items()])
130 | 
131 |         # add links between predicate nodes and syntax nodes
132 |         predpattgraph.add_edges_from([edge
133 |                                       for event in predpatt.events
134 |                                       for edge
135 |                                       in cls._instantiation_edges(graphid,
136 |                                                                   event,
137 |                                                                   'pred')])
138 | 
139 |         # add links between argument nodes and syntax nodes
140 |         edges = [edge
141 |                  for event in predpatt.events
142 |                  for arg in event.arguments
143 |                  for edge
144 |                  in cls._instantiation_edges(graphid, arg, 'arg')]
145 | 
146 |         predpattgraph.add_edges_from(edges)
147 | 
148 |         # add links between predicate nodes and argument nodes
149 |         edges = [edge
150 |                  for event in predpatt.events
151 |                  for arg in event.arguments
152 |                  for edge in cls._predarg_edges(graphid, event, arg,
153 |                                                 arg.position
154 |                                                 in [e.position
155 |                                                     for e
156 |                                                     in predpatt.events])]
157 | 
158 |         predpattgraph.add_edges_from(edges)
159 | 
160 |         # mark that all the semantic nodes just added were from predpatt
161 |         # this is done to distinguish them from nodes added through annotations
162 |         for node in predpattgraph.nodes:
163 |             if 'semantics' in node:
164 |                 predpattgraph.nodes[node]['domain'] = 'semantics'
165 |                 predpattgraph.nodes[node]['frompredpatt'] = True
166 | 
167 |                 if 'arg' in node:
168 |                     predpattgraph.nodes[node]['type'] = 'argument'
169 |                 elif 'pred' in node:
170 |                     predpattgraph.nodes[node]['type'] = 'predicate'
171 | 
172 |         return predpattgraph
173 | 
174 |     @staticmethod
175 |     def _instantiation_edges(graphid, node, typ):
176 |         parent_id = graphid+'semantics-'+typ+'-'+str(node.position+1)
177 |         child_head_token_id = graphid+'syntax-'+str(node.position+1)
178 |         child_span_token_ids = [graphid+'syntax-'+str(tok.position+1)
179 |                                 for tok in node.tokens
180 |                                 if child_head_token_id !=
181 |                                 graphid+'syntax-'+str(tok.position+1)]
182 | 
183 |         return [(parent_id, child_head_token_id,
184 |                  {'domain': 'interface',
185 |                   'type': 'head'})] +\
186 |                [(parent_id, tokid, {'domain': 'interface',
187 |                                     'type': 'nonhead'})
188 |                 for tokid in child_span_token_ids]
189 | 
190 |     @staticmethod
191 |     def _predarg_edges(graphid, parent_node, child_node, pred_child):
192 |         parent_id = graphid+'semantics-pred-'+str(parent_node.position+1)
193 |         child_id = graphid+'semantics-arg-'+str(child_node.position+1)
194 | 
195 |         if pred_child:
196 |             child_id_pred = graphid +\
197 |                             'semantics-pred-' +\
198 |                             str(child_node.position+1)
199 |             return [(parent_id,
200 |                      child_id,
201 |                      {'domain': 'semantics',
202 |                       'type': 'dependency',
203 |                       'frompredpatt': True})] +\
204 |                    [(child_id,
205 |                      child_id_pred,
206 |                      {'domain': 'semantics',
207 |                       'type': 'head',
208 |                       'frompredpatt': True})]
209 | 
210 |         return [(parent_id,
211 |                  child_id,
212 |                  {'domain': 'semantics',
213 |                   'type': 'dependency',
214 |                   'frompredpatt': True})]
215 | 


--------------------------------------------------------------------------------
/decomp/semantics/uds/__init__.py:
--------------------------------------------------------------------------------
1 | """Module for representing UDS corpora, documents, graphs, and annotations."""
2 | 
3 | from .corpus import UDSCorpus
4 | from .document import UDSDocument
5 | from .graph import UDSDocumentGraph
6 | from .graph import UDSSentenceGraph
7 | from .annotation import RawUDSAnnotation
8 | from .annotation import NormalizedUDSAnnotation
9 | 


--------------------------------------------------------------------------------
/decomp/semantics/uds/document.py:
--------------------------------------------------------------------------------
  1 | """Module for representing UDS documents."""
  2 | 
  3 | import re
  4 | 
  5 | from typing import Optional, Any
  6 | from typing import Dict
  7 | 
  8 | from memoized_property import memoized_property
  9 | from networkx import DiGraph
 10 | from .graph import UDSSentenceGraph, UDSDocumentGraph
 11 | 
 12 | 
 13 | class UDSDocument:
 14 |     """A Universal Decompositional Semantics document
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     sentence_graphs
 19 |         the UDSSentenceGraphs associated with each sentence in the document
 20 |     sentence_ids
 21 |         the UD sentence IDs for each graph
 22 |     name
 23 |         the name of the document (i.e. the UD document ID)
 24 |     genre
 25 |         the genre of the document (e.g. `weblog`)
 26 |     timestamp
 27 |         the timestamp of the UD document on which this UDSDocument is based
 28 |     doc_graph
 29 |         the NetworkX DiGraph for the document. If not provided, this will be
 30 |         initialized without edges from sentence_graphs
 31 |     """
 32 |     def __init__(self, sentence_graphs: Dict[str, UDSSentenceGraph],
 33 |                  sentence_ids: Dict[str, str], name: str, genre: str,
 34 |                  timestamp: Optional[str] = None, doc_graph: Optional[UDSDocumentGraph] = None):
 35 |         self.sentence_graphs = {}
 36 |         self.sentence_ids = {}
 37 |         self.name = name
 38 |         self.genre = genre
 39 |         self.timestamp = timestamp
 40 | 
 41 |         # Initialize the document-level graph
 42 |         if doc_graph:
 43 |             self.document_graph = doc_graph
 44 |         else:
 45 |             self.document_graph = UDSDocumentGraph(DiGraph(), name)
 46 | 
 47 |         # Initialize the sentence-level graphs
 48 |         self.add_sentence_graphs(sentence_graphs, sentence_ids)
 49 | 
 50 |     def to_dict(self) -> Dict:
 51 |         """Convert the graph to a dictionary"""
 52 |         return self.document_graph.to_dict()
 53 | 
 54 |     @classmethod
 55 |     def from_dict(cls, document: Dict[str, Dict], sentence_graphs: Dict[str, UDSSentenceGraph], 
 56 |                        sentence_ids: Dict[str, str], name: str = 'UDS') -> 'UDSDocument':
 57 |         """Construct a UDSDocument from a dictionary
 58 | 
 59 |         Since only the document graphs are serialized, the sentence
 60 |         graphs must also be provided to this method call in order
 61 |         to properly associate them with their documents.
 62 | 
 63 |         Parameters
 64 |         ----------
 65 |         document
 66 |             a dictionary constructed by networkx.adjacency_data,
 67 |             containing the graph for the document
 68 |         sentence_graphs
 69 |             a dictionary containing (possibly a superset of) the
 70 |             sentence-level graphs for the sentences in the document
 71 |         sentence_ids
 72 |             a dictionary containing (possibly a superset of) the
 73 |             UD sentence IDs for each graph
 74 |         name
 75 |             identifier to append to the beginning of node ids
 76 |         """
 77 |         document_graph = UDSDocumentGraph.from_dict(document, name)
 78 |         sent_graph_names = set(map(lambda node: node['semantics']['graph'], document['nodes']))
 79 |         sent_graphs = {}
 80 |         sent_ids = {}
 81 |         for gname in sent_graph_names:
 82 |             sentence_graphs[gname].document_id = name
 83 |             sentence_graphs[gname].sentence_id = sentence_ids[gname]
 84 |             sent_graphs[gname] = sentence_graphs[gname]
 85 |             sent_ids[gname] = sentence_ids[gname]
 86 |         genre = name.split('-')[0]
 87 |         timestamp = cls._get_timestamp_from_document_name(name)
 88 |         return cls(sent_graphs, sent_ids, name, genre, timestamp, document_graph)
 89 | 
 90 |     @staticmethod
 91 |     def _get_timestamp_from_document_name(document_name):
 92 |         timestamp = re.search('\d{8}_?\d{6}', document_name)
 93 |         return timestamp[0] if timestamp else None
 94 | 
 95 |     def add_sentence_graphs(self, sentence_graphs: Dict[str, UDSSentenceGraph], 
 96 |                                   sentence_ids: Dict[str, str]) -> None:
 97 |         """Add additional sentences to a document
 98 | 
 99 |         Parameters
100 |         ----------
101 |         sentence_graphs
102 |             a dictionary containing the sentence-level graphs
103 |             for the sentences in the document
104 |         sentence_ids
105 |             a dictionary containing the UD sentence IDs for each graph
106 |         name
107 |             identifier to append to the beginning of node ids
108 |         """
109 |         for gname, graph in sentence_graphs.items():
110 |             sentence_graphs[gname].sentence_id = sentence_ids[gname]
111 |             sentence_graphs[gname].document_id = self.name
112 |             self.sentence_graphs[gname] = graph
113 |             self.sentence_ids[gname] = sentence_ids[gname]
114 |             for node_name, node in graph.semantics_nodes.items():
115 |                 semantics = {'graph': gname, 'node': node_name}
116 |                 document_node_name = node_name.replace('semantics', 'document')
117 |                 self.document_graph.graph.add_node(document_node_name,
118 |                             domain='document', type=node['type'],
119 |                             frompredpatt=False, semantics=semantics)
120 | 
121 |     def add_annotation(self, node_attrs: Dict[str, Dict[str, Any]],
122 |                              edge_attrs: Dict[str, Dict[str, Any]]) -> None:
123 |         """Add node or edge annotations to the document-level graph
124 | 
125 |         Parameters
126 |         ----------
127 |         node_attrs
128 |             the node annotations to be added
129 |         edge_attrs
130 |             the edge annotations to be added
131 |         """
132 |         self.document_graph.add_annotation(node_attrs, edge_attrs, self.sentence_ids)
133 | 
134 |     def semantics_node(self, document_node: str) -> Dict[str, Dict]:
135 |         """The semantics node for a given document node
136 | 
137 |         Parameters
138 |         ----------
139 |         document_node
140 |             the document domain node whose semantics node is to be
141 |             retrieved
142 |         """
143 |         semantics = self.document_graph.nodes[document_node]['semantics']
144 |         semantics_node = self.sentence_graphs[semantics['graph']].semantics_nodes[semantics['node']]
145 |         return {semantics['node']: semantics_node}
146 | 
147 |     @memoized_property
148 |     def text(self) -> str:
149 |         """The document text"""
150 |         return ' '.join([sent_graph.sentence for gname, sent_graph in sorted(self.sentence_graphs.items())])
151 | 


--------------------------------------------------------------------------------
/decomp/syntax/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for representing CoNLL dependency tree corpora
3 | 
4 | This module provides readers for corpora represented using
5 | conll-formatted dependency parses. All dependency parses are read in
6 | as networkx graphs. These graphs become subgraphs of the PredPatt and
7 | UDS graphs in the semantics module.
8 | """
9 | 


--------------------------------------------------------------------------------
/decomp/syntax/dependency.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=R1717
  2 | # pylint: disable=R0903
  3 | """Module for building/containing dependency trees from CoNLL"""
  4 | 
  5 | from typing import List
  6 | from numpy import array
  7 | from networkx import DiGraph
  8 | from ..corpus import Corpus
  9 | 
 10 | CONLL_HEAD = {'u': ['id', 'form', 'lemma', 'upos', 'xpos',
 11 |                     'feats', 'head', 'deprel', 'deps', 'misc'],
 12 |               'x': ['id', 'form', 'lemma', 'cpostag', 'postag',
 13 |                     'feats', 'head', 'deprel', 'phead', 'pdeprel']}
 14 | 
 15 | CONLL_NODE_ATTRS = {'u': {k: CONLL_HEAD['u'].index(k)
 16 |                           for k in ['form', 'lemma', 'upos', 'xpos', 'feats']},
 17 |                     'x': {k: CONLL_HEAD['x'].index(k)
 18 |                           for k in ['form', 'lemma', 'cpostag',
 19 |                                     'postag', 'feats']}}
 20 | 
 21 | CONLL_EDGE_ATTRS = {'u': {k: CONLL_HEAD['u'].index(k)
 22 |                           for k in ['deprel']},
 23 |                     'x': {k: CONLL_HEAD['x'].index(k)
 24 |                           for k in ['deprel']}}
 25 | 
 26 | 
 27 | class CoNLLDependencyTreeCorpus(Corpus):
 28 |     """Class for building/containing dependency trees from CoNLL-U
 29 | 
 30 |     Attributes
 31 |     ----------
 32 |     graphs
 33 |         trees constructed from annotated sentences
 34 |     graphids
 35 |         ids for trees constructed from annotated sentences
 36 |     ngraphs
 37 |         number of graphs in corpus
 38 |     """
 39 | 
 40 |     def _graphbuilder(self, graphid: str, rawgraph: str):
 41 |         return DependencyGraphBuilder.from_conll(rawgraph, graphid)
 42 | 
 43 | 
 44 | class DependencyGraphBuilder:
 45 |     """A dependency graph builder"""
 46 | 
 47 |     @classmethod
 48 |     def from_conll(cls,
 49 |                    conll: List[List[str]],
 50 |                    treeid: str='',
 51 |                    spec: str='u') -> DiGraph:
 52 |         """Build DiGraph from a CoNLL representation
 53 | 
 54 |         Parameters
 55 |         ----------
 56 |         conll
 57 |             conll representation
 58 |         treeid
 59 |             a unique identifier for the tree
 60 |         spec
 61 |             the specification to assume of the conll representation
 62 |             ("u" or "x")
 63 |         """
 64 | 
 65 |         # handle null treeids
 66 |         treeid = treeid+'-' if treeid else ''
 67 | 
 68 |         # initialize the dependency graph
 69 |         depgraph = DiGraph(conll=array(conll))
 70 |         depgraph.name = treeid.strip('-')
 71 | 
 72 |         # populate graph with nodes
 73 |         depgraph.add_nodes_from([cls._conll_node_attrs(treeid, row, spec)
 74 |                                  for row in conll])
 75 | 
 76 |         # add the root
 77 |         depgraph.add_node(treeid+'root-0',
 78 |                           position=0,
 79 |                           domain='root',
 80 |                           type='root')
 81 | 
 82 |         # connect nodes
 83 |         depgraph.add_edges_from([cls._conll_edge_attrs(treeid, row, spec)
 84 |                                  for row in conll])
 85 | 
 86 |         return depgraph
 87 | 
 88 |     @staticmethod
 89 |     def _conll_node_attrs(treeid, row, spec):
 90 |         node_id = row[0]
 91 | 
 92 |         node_attrs = {'domain': 'syntax',
 93 |                       'type': 'token',
 94 |                       'position': int(node_id)}
 95 |         other_attrs = {}
 96 | 
 97 |         for attr, idx in CONLL_NODE_ATTRS[spec].items():
 98 |             # convert features into a dictionary
 99 |             if attr == 'feats':
100 |                 if row[idx] != '_':
101 |                     feat_split = row[idx].split('|')
102 |                     other_attrs = dict([kv.split('=')
103 |                                         for kv in feat_split])
104 | 
105 |             else:
106 |                 node_attrs[attr] = row[idx]
107 | 
108 |         node_attrs = dict(node_attrs, **other_attrs)
109 | 
110 |         return (treeid+'syntax-'+node_id, node_attrs)
111 | 
112 |     @staticmethod
113 |     def _conll_edge_attrs(treeid, row, spec):
114 |         child_id = treeid+'syntax-'+row[0]
115 | 
116 |         parent_position = row[CONLL_HEAD[spec].index('head')]
117 | 
118 |         if parent_position == '0':
119 |             parent_id = treeid+'root-0'
120 |         else:
121 |             parent_id = treeid+'syntax-'+parent_position
122 | 
123 |         edge_attrs = {attr: row[idx]
124 |                       for attr, idx in CONLL_EDGE_ATTRS[spec].items()}
125 | 
126 |         edge_attrs['domain'] = 'syntax'
127 |         edge_attrs['type'] = 'dependency'
128 | 
129 |         return (parent_id, child_id, edge_attrs)
130 | 


--------------------------------------------------------------------------------
/decomp/vis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/decomp/vis/__init__.py


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Decomp documentation
 2 | 
 3 | To build the documentation, you will need Sphinx and three Sphinx extensions:
 4 | 
 5 | ```bash
 6 | pip install --user sphinx==3.1.2 sphinxcontrib-napoleon sphinx-autodoc-typehints sphinx_rtd_theme
 7 | ```
 8 | 
 9 | Then, while in this directory, use:
10 | 
11 | ```bash
12 | make clean
13 | make html
14 | ```
15 | 
16 | To view the built documentation, start a python http server with:
17 | 
18 | 
19 | ```bash
20 | python3 -m http.server
21 | ```
22 | 
23 | Then, navigate to [http://localhost:8000/build/html/](http://localhost:8000/build/html/) in your browser.
24 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>=3.0.0
2 | sphinxcontrib-napoleon
3 | sphinx-autodoc-typehints
4 | sphinx_rtd_theme
5 | http://github.com/decompositional-semantics-initiative/decomp/tarball/master#egg=decomp
6 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # http://www.sphinx-doc.org/en/master/config
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('../../decomp/'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'Decomp'
21 | copyright = '2020, Aaron Steven White'
22 | author = 'Aaron Steven White'
23 | 
24 | # The full version, including alpha/beta/rc tags
25 | release = '0.2.2'
26 | 
27 | # Changes root document from contents.rst to index.rst
28 | master_doc = 'index'
29 | 
30 | # -- General configuration ---------------------------------------------------
31 | 
32 | # Add any Sphinx extension module names here, as strings. They can be
33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
34 | # ones.
35 | extensions = [
36 |     'sphinx.ext.autodoc',
37 |     'sphinxcontrib.napoleon',  # MUST be loaded before typehints
38 |     'sphinx_autodoc_typehints'
39 | ]
40 | 
41 | # Napoleon settings
42 | napoleon_google_docstring = True
43 | napoleon_numpy_docstring = True
44 | napoleon_include_init_with_doc = False
45 | napoleon_include_private_with_doc = False
46 | napoleon_include_special_with_doc = False
47 | napoleon_use_admonition_for_examples = False
48 | napoleon_use_admonition_for_notes = False
49 | napoleon_use_admonition_for_references = False
50 | napoleon_use_ivar = False
51 | napoleon_use_param = True
52 | napoleon_use_rtype = True
53 | napoleon_use_keyword = True
54 | napoleon_custom_sections = None
55 | 
56 | # Add any paths that contain templates here, relative to this directory.
57 | templates_path = ['_templates']
58 | 
59 | # List of patterns, relative to source directory, that match files and
60 | # directories to ignore when looking for source files.
61 | # This pattern also affects html_static_path and html_extra_path.
62 | exclude_patterns = []
63 | 
64 | 
65 | # -- Options for HTML output -------------------------------------------------
66 | 
67 | # The theme to use for HTML and HTML Help pages.  See the documentation for
68 | # a list of builtin themes.
69 | #
70 | html_theme = 'sphinx_rtd_theme'
71 | 
72 | # Add any paths that contain custom static files (such as style sheets) here,
73 | # relative to this directory. They are copied after the builtin static files,
74 | # so a file named "default.css" will overwrite the builtin "default.css".
75 | html_static_path = ['_static']
76 | 


--------------------------------------------------------------------------------
/docs/source/data/document-graphs.rst:
--------------------------------------------------------------------------------
 1 | Universal Decompositional Document Graphs
 2 | =========================================
 3 | 
 4 | The semantic graphs that form the third layer of annotation represent
 5 | document-level relations. These graphs contain a node for each node in
 6 | the document's constituent sentence-level graphs along with a pointer
 7 | from the document-level node to the sentence-level node. Unlike the
 8 | sentence-level graphs, they are not produced by PredPatt, so whether
 9 | any two nodes in a document-level graph are joined by an edge is
10 | determined by whether the relation between the two nodes is annotated
11 | in some UDS dataset.
12 | 
13 | At minimum, each of these nodes has the following attributes:
14 | 
15 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph
16 | 
17 |    - ``domain`` (``str``): the subgraph this node is part of (always ``document``)
18 |    - ``type`` (``str``): the type of object corresponding to this node in the ``semantics`` domain (either ``predicate`` or ``argument``)
19 |    - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``False``, although the corresponding ``semantics`` node will have this set as ``True``)
20 |    - ``semantics`` (``dict``): a two-item dictionary containing information about the corresponding ``semantics`` node. The first item, ``graph``, indicates the sentence-level graph that the semantics node comes from. The second item, ``node``, contains the name of the node.
21 | 
22 | Document graphs are initialized without edges, which are created dynamically
23 | when edge attribute annotations are added. These edges may span nodes
24 | associated with different sentences within a document and may connect not
25 | only predicates to arguments, but predicates to predicates and arguments to
26 | arguments. Any annotations that are provided that cross document boundaries
27 | will be automatically filtered out. Finally, beyond the attributes provided 
28 | by annotations, each edge will also contain all but the last of the core
29 | set of node attributes listed above.
30 | 
31 | The `UDSDocumentGraph`_ object is wrapped by a `UDSDocument`_, which
32 | holds additional metadata associated with the document, data relating
33 | to its constituent sentences (and their graphs), and methods for
34 | interacting with it. Finally, it should be noted that querying on
35 | document graphs is not currently supported.
36 | 
37 | .. _UDSDocument: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument
38 | 


--------------------------------------------------------------------------------
/docs/source/data/index.rst:
--------------------------------------------------------------------------------
 1 | Dataset Reference
 2 | =================
 3 | 
 4 | The Universal Decompositional Semantics (UDS) dataset consists of four
 5 | layers of annotations built on top of the `English Web Treebank`_
 6 | (EWT).
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 2
10 |    :caption: Contents:	      
11 | 
12 |    syntactic-graphs
13 |    sentence-graphs
14 |    document-graphs   
15 |    semantic-types
16 |    
17 | .. _English Web Treebank: https://catalog.ldc.upenn.edu/LDC2012T13
18 | 
19 | Each layer contains pointers directly to the previous layer.
20 | 


--------------------------------------------------------------------------------
/docs/source/data/semantic-types.rst:
--------------------------------------------------------------------------------
  1 | `Universal Decompositional Semantic`_ Types
  2 | ===========================================
  3 | 
  4 | .. _Universal Decompositional Semantic: http://decomp.io/
  5 | 
  6 | PredPatt makes very coarse-grained typing distinctions—between
  7 | predicate and argument nodes, on the one hand, and between dependency
  8 | and head edges, on the other. UDS provides ultra fine-grained typing
  9 | distinctions, represented as collections of real-valued
 10 | attributes. The union of all node and edge attributes defined in UDS
 11 | determines the *UDS type space*; any proper subset determines a *UDS
 12 | type subspace*.
 13 | 
 14 | UDS attributes are derived from crowd-sourced annotations of the heads
 15 | or spans corresponding to predicates and/or arguments and are
 16 | represented in the dataset as node and/or edge attributes. It is
 17 | important to note that, though all nodes and edges in the semantics
 18 | domain have a ``type`` attribute, UDS does not afford any special
 19 | status to these types. That is, the only thing that UDS "sees" are the
 20 | nodes and edges in the semantics domain. The set of nodes and edges
 21 | visible to UDS is a superset of those associated with PredPatt
 22 | predicates and their arguments.
 23 | 
 24 | There are currently four node type subspaces annotated on
 25 | nodes in sentence-level graphs.
 26 | 
 27 |   - `Factuality`_ (``factuality``)
 28 |   - `Genericity`_ (``genericity``)
 29 |   - `Time`_ (``time``)
 30 |   - `Entity type`_ (``wordsense``)
 31 |   - `Event structure`_ (``event_structure``)
 32 | 
 33 | There is currently one edge type subspace annotated on
 34 | edges in sentence-level graphs.
 35 | 
 36 |   - `Semantic Proto-Roles`_ (``protoroles``)
 37 |   - `Event structure`_ (``event_structure``)    
 38 | 
 39 | There is currently (starting in UDS2.0) one edge type subspace
 40 | annotated on edges in document-level graphs.
 41 | 
 42 |   - `Time`_ (``time``)
 43 |   - `Event structure`_ (``event_structure``)    
 44 |     
 45 | Each subspace key lies at the same level as the ``type`` attribute and
 46 | maps to a dictionary value. This dictionary maps from attribute keys
 47 | (see *Attributes* in each section below) to dictionaries that always
 48 | have two keys ``value`` and ``confidence``. See the below paper for
 49 | information on how the these are derived from the underlying dataset.
 50 | 
 51 | Two versions of these annotations are currently available: one
 52 | containing the raw annotator data (``"raw"``) and the other containing
 53 | normalized data (``"normalized"``). In the former case, both the
 54 | ``value`` and ``confidence`` fields described above map to
 55 | dictionaries keyed on (anonymized) annotator IDs, where the
 56 | corresponding value contains that annotator's response (for the
 57 | ``value`` dictionary) or confidence (for the ``confidence``
 58 | dictionary). In the latter case, the ``value`` and ``confidence``
 59 | fields map to single, normalized value and confidence scores,
 60 | respectively.
 61 | 
 62 | For more information on the normalization used to produce the
 63 | normalized annotations, see:
 64 | 
 65 |   White, Aaron Steven, Elias Stengel-Eskin, Siddharth Vashishtha, Venkata Subrahmanyan Govindarajan, Dee Ann Reisinger, Tim Vieira, Keisuke Sakaguchi, et al. 2020. `The Universal Decompositional Semantics Dataset and Decomp Toolkit`_. *Proceedings of The 12th Language Resources and Evaluation Conference*, 5698–5707. Marseille, France: European Language Resources Association.
 66 | 
 67 | 
 68 | .. _The Universal Decompositional Semantics Dataset and Decomp Toolkit: https://www.aclweb.org/anthology/2020.lrec-1.699/
 69 |   
 70 | .. code-block:: latex
 71 | 
 72 |   @inproceedings{white-etal-2020-universal,
 73 |       title = "The Universal Decompositional Semantics Dataset and Decomp Toolkit",
 74 |       author = "White, Aaron Steven  and
 75 |         Stengel-Eskin, Elias  and
 76 |         Vashishtha, Siddharth  and
 77 |         Govindarajan, Venkata Subrahmanyan  and
 78 |         Reisinger, Dee Ann  and
 79 |         Vieira, Tim  and
 80 |         Sakaguchi, Keisuke  and
 81 |         Zhang, Sheng  and
 82 |         Ferraro, Francis  and
 83 |         Rudinger, Rachel  and
 84 |         Rawlins, Kyle  and
 85 |         Van Durme, Benjamin",
 86 |       booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference",
 87 |       month = may,
 88 |       year = "2020",
 89 |       address = "Marseille, France",
 90 |       publisher = "European Language Resources Association",
 91 |       url = "https://www.aclweb.org/anthology/2020.lrec-1.699",
 92 |       pages = "5698--5707",
 93 |       ISBN = "979-10-95546-34-4",
 94 |   }
 95 | 
 96 | 
 97 | Information about each subspace can be found below. Unless otherwise
 98 | specified the properties in a particular subspace remain constant
 99 | across the raw and normalized formats.
100 |   
101 | Factuality
102 | ----------
103 | 
104 | **Project page**
105 | 
106 | `<http://decomp.io/projects/factuality/>`_
107 | 
108 | **Sentence-level attributes**
109 | 
110 | ``factual``
111 | 
112 | **First UDS version**
113 | 
114 | 1.0
115 | 
116 | **References**
117 | 
118 |   White, A.S., D. Reisinger, K. Sakaguchi, T. Vieira, S. Zhang, R. Rudinger, K. Rawlins, & B. Van Durme. 2016. `Universal Decompositional Semantics on Universal Dependencies`_. *Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing*, pages 1713–1723, Austin, Texas, November 1-5, 2016.
119 | 
120 | 
121 |   Rudinger, R., White, A.S., & B. Van Durme. 2018. `Neural models of factuality`_. *Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)*, pages 731–744. New Orleans, Louisiana, June 1-6, 2018.
122 | 
123 | .. _Neural models of factuality: https://www.aclweb.org/anthology/N18-1067  
124 | .. _Universal Decompositional Semantics on Universal Dependencies: https://www.aclweb.org/anthology/D16-1177
125 |   
126 | .. code-block:: latex
127 | 
128 |   @inproceedings{white-etal-2016-universal,
129 |       title = "Universal Decompositional Semantics on {U}niversal {D}ependencies",
130 |       author = "White, Aaron Steven  and
131 |         Reisinger, Dee Ann  and
132 |         Sakaguchi, Keisuke  and
133 |         Vieira, Tim  and
134 |         Zhang, Sheng  and
135 |         Rudinger, Rachel  and
136 |         Rawlins, Kyle  and
137 |         Van Durme, Benjamin",
138 |       booktitle = "Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing",
139 |       month = nov,
140 |       year = "2016",
141 |       address = "Austin, Texas",
142 |       publisher = "Association for Computational Linguistics",
143 |       url = "https://www.aclweb.org/anthology/D16-1177",
144 |       doi = "10.18653/v1/D16-1177",
145 |       pages = "1713--1723",
146 |   }
147 |   
148 |   @inproceedings{rudinger-etal-2018-neural-models,
149 |       title = "Neural Models of Factuality",
150 |       author = "Rudinger, Rachel  and
151 |         White, Aaron Steven  and
152 |         Van Durme, Benjamin",
153 |       booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)",
154 |       month = jun,
155 |       year = "2018",
156 |       address = "New Orleans, Louisiana",
157 |       publisher = "Association for Computational Linguistics",
158 |       url = "https://www.aclweb.org/anthology/N18-1067",
159 |       doi = "10.18653/v1/N18-1067",
160 |       pages = "731--744",
161 |   }
162 | 
163 | 
164 | Genericity
165 | ----------
166 | 
167 | **Project page**
168 | 
169 | `<http://decomp.io/projects/genericity/>`_
170 | 
171 | **Sentence-level attributes**
172 | 
173 | ``arg-particular``, ``arg-kind``, ``arg-abstract``, ``pred-particular``, ``pred-dynamic``, ``pred-hypothetical``
174 | 
175 | **First UDS version**
176 | 
177 | 1.0
178 | 
179 | **References**
180 | 
181 |   Govindarajan, V.S., B. Van Durme, & A.S. White. 2019. `Decomposing Generalization: Models of Generic, Habitual, and Episodic Statements`_. Transactions of the Association for Computational Linguistics.
182 | 
183 | .. _Decomposing Generalization\: Models of Generic, Habitual, and Episodic Statements: https://www.aclweb.org/anthology/Q19-1035
184 |   
185 | .. code-block:: latex
186 | 
187 |   @article{govindarajan-etal-2019-decomposing,
188 |       title = "Decomposing Generalization: Models of Generic, Habitual, and Episodic Statements",
189 |       author = "Govindarajan, Venkata  and
190 |         Van Durme, Benjamin  and
191 |         White, Aaron Steven",
192 |       journal = "Transactions of the Association for Computational Linguistics",
193 |       volume = "7",
194 |       month = mar,
195 |       year = "2019",
196 |       url = "https://www.aclweb.org/anthology/Q19-1035",
197 |       doi = "10.1162/tacl_a_00285",
198 |       pages = "501--517"
199 |   }
200 | 
201 | 
202 | Time
203 | ----
204 | 
205 | **Project page**
206 | 
207 | `<http://decomp.io/projects/time/>`_
208 | 
209 | **Sentence-level attributes**
210 | 
211 | *normalized*
212 | 
213 | ``dur-hours``, ``dur-instant``, ``dur-forever``, ``dur-weeks``, ``dur-days``, ``dur-months``, ``dur-years``, ``dur-centuries``, ``dur-seconds``, ``dur-minutes``, ``dur-decades``
214 | 
215 | *raw*
216 | 
217 | ``duration``
218 | 
219 | 
220 | **Document-level attributes**
221 | 
222 | *raw*
223 | 
224 | ``rel-start1``, ``rel-start2``, ``rel-end1``, ``rel-end2``
225 | 
226 | **First UDS version**
227 | 
228 | 1.0 (sentence-level), 2.0 (document-level)
229 | 
230 | **References**
231 | 
232 |   Vashishtha, S., B. Van Durme, & A.S. White. 2019. `Fine-Grained Temporal Relation Extraction`_. *Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (ACL 2019)*, 2906—2919. Florence, Italy, July 29-31, 2019.
233 | 
234 | 
235 | .. _Fine-Grained Temporal Relation Extraction: https://www.aclweb.org/anthology/P19-1280
236 |   
237 | .. code-block:: latex
238 | 		
239 |   @inproceedings{vashishtha-etal-2019-fine,
240 |       title = "Fine-Grained Temporal Relation Extraction",
241 |       author = "Vashishtha, Siddharth  and
242 |         Van Durme, Benjamin  and
243 |         White, Aaron Steven",
244 |       booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
245 |       month = jul,
246 |       year = "2019",
247 |       address = "Florence, Italy",
248 |       publisher = "Association for Computational Linguistics",
249 |       url = "https://www.aclweb.org/anthology/P19-1280",
250 |       doi = "10.18653/v1/P19-1280",
251 |       pages = "2906--2919"
252 |   }
253 | 
254 | 
255 | **Notes**
256 | 
257 | 1. The Time dataset has different formats for raw and normalized annotations. The duration attributes from the normalized version are each assigned an ordinal value in the raw version (in ascending order of duration), which is assigned to the single attribute ``duration``.
258 | 2. The document-level relation annotations are *only* available in the raw format and only starting in UDS2.0.
259 | 
260 | Entity type
261 | -----------
262 | 
263 | **Project page**
264 | 
265 | `<http://decomp.io/projects/word-sense/>`_
266 | 
267 | **Sentence-level attributes**
268 | 
269 | ``supersense-noun.shape``, ``supersense-noun.process``, ``supersense-noun.relation``, ``supersense-noun.communication``, ``supersense-noun.time``, ``supersense-noun.plant``, ``supersense-noun.phenomenon``, ``supersense-noun.animal``, ``supersense-noun.state``, ``supersense-noun.substance``, ``supersense-noun.person``, ``supersense-noun.possession``, ``supersense-noun.Tops``, ``supersense-noun.object``, ``supersense-noun.event``, ``supersense-noun.artifact``, ``supersense-noun.act``, ``supersense-noun.body``, ``supersense-noun.attribute``, ``supersense-noun.quantity``, ``supersense-noun.motive``, ``supersense-noun.location``, ``supersense-noun.cognition``, ``supersense-noun.group``, ``supersense-noun.food``, ``supersense-noun.feeling``
270 | 
271 | **First UDS version**
272 | 
273 | 1.0
274 | 
275 | **Notes**
276 | 
277 | 1. The key is called ``wordsense`` because the normalized annotations come from UDS-Word Sense (v1.0).
278 | 
279 | **References**
280 | 
281 |   White, A.S., D. Reisinger, K. Sakaguchi, T. Vieira, S. Zhang, R. Rudinger, K. Rawlins, & B. Van Durme. 2016. `Universal Decompositional Semantics on Universal Dependencies`_. *Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing*, pages 1713–1723, Austin, Texas, November 1-5, 2016.
282 | 
283 | .. code-block:: latex
284 | 
285 |   @inproceedings{white-etal-2016-universal,
286 |       title = "Universal Decompositional Semantics on {U}niversal {D}ependencies",
287 |       author = "White, Aaron Steven  and
288 |         Reisinger, Dee Ann  and
289 |         Sakaguchi, Keisuke  and
290 |         Vieira, Tim  and
291 |         Zhang, Sheng  and
292 |         Rudinger, Rachel  and
293 |         Rawlins, Kyle  and
294 |         Van Durme, Benjamin",
295 |       booktitle = "Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing",
296 |       month = nov,
297 |       year = "2016",
298 |       address = "Austin, Texas",
299 |       publisher = "Association for Computational Linguistics",
300 |       url = "https://www.aclweb.org/anthology/D16-1177",
301 |       doi = "10.18653/v1/D16-1177",
302 |       pages = "1713--1723",
303 |   }
304 | 
305 | 
306 | Semantic Proto-Roles
307 | --------------------
308 | 
309 | **Project page**
310 | 
311 | `<http://decomp.io/projects/semantic-proto-roles/>`_
312 | 
313 | **Sentence-level attributes**
314 | 
315 | ``was_used``, ``purpose``, ``partitive``, ``location``, ``instigation``, ``existed_after``, ``time``, ``awareness``, ``change_of_location``, ``manner``, ``sentient``, ``was_for_benefit``, ``change_of_state_continuous``, ``existed_during``, ``change_of_possession``, ``existed_before``, ``volition``, ``change_of_state``
316 | 
317 | **References**
318 | 
319 |   Reisinger, D., R. Rudinger, F. Ferraro, C. Harman, K. Rawlins, & B. Van Durme. (2015). `Semantic Proto-Roles`_. *Transactions of the Association for Computational Linguistics 3*:475–488.
320 | 
321 |   White, A.S., D. Reisinger, K. Sakaguchi, T. Vieira, S. Zhang, R. Rudinger, K. Rawlins, & B. Van Durme. 2016. `Universal Decompositional Semantics on Universal Dependencies`_. *Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing*, pages 1713–1723, Austin, Texas, November 1-5, 2016.
322 | 
323 | .. _Semantic Proto-Roles: https://www.aclweb.org/anthology/Q15-1034
324 |   
325 | .. code-block:: latex
326 | 
327 |   @article{reisinger-etal-2015-semantic,
328 |       title = "Semantic Proto-Roles",
329 |       author = "Reisinger, Dee Ann  and
330 |         Rudinger, Rachel  and
331 |         Ferraro, Francis  and
332 |         Harman, Craig  and
333 |         Rawlins, Kyle  and
334 |         Van Durme, Benjamin",
335 |       journal = "Transactions of the Association for Computational Linguistics",
336 |       volume = "3",
337 |       year = "2015",
338 |       url = "https://www.aclweb.org/anthology/Q15-1034",
339 |       doi = "10.1162/tacl_a_00152",
340 |       pages = "475--488",
341 |   }
342 | 		
343 |   @inproceedings{white-etal-2016-universal,
344 |       title = "Universal Decompositional Semantics on {U}niversal {D}ependencies",
345 |       author = "White, Aaron Steven  and
346 |         Reisinger, Dee Ann  and
347 |         Sakaguchi, Keisuke  and
348 |         Vieira, Tim  and
349 |         Zhang, Sheng  and
350 |         Rudinger, Rachel  and
351 |         Rawlins, Kyle  and
352 |         Van Durme, Benjamin",
353 |       booktitle = "Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing",
354 |       month = nov,
355 |       year = "2016",
356 |       address = "Austin, Texas",
357 |       publisher = "Association for Computational Linguistics",
358 |       url = "https://www.aclweb.org/anthology/D16-1177",
359 |       doi = "10.18653/v1/D16-1177",
360 |       pages = "1713--1723",
361 |   }
362 | 
363 | 
364 | Event structure
365 | ---------------
366 | 
367 | **Project page**
368 | 
369 | `<http://decomp.io/projects/event-structure/>`_
370 | 
371 | **Sentence-level attributes**
372 | 
373 | *normalized*
374 | 
375 | 
376 | ``distributive``, ``dynamic``, ``natural_parts``, ``part_similarity``, ``telic``, ``avg_part_duration_lbound-centuries``, ``avg_part_duration_ubound-centuries``, ``situation_duration_lbound-centuries``, ``situation_duration_ubound-centuries``, ``avg_part_duration_lbound-days``, ``avg_part_duration_ubound-days``, ``situation_duration_lbound-days``, ``situation_duration_ubound-days``, ``avg_part_duration_lbound-decades``, ``avg_part_duration_ubound-decades``, ``situation_duration_lbound-decades``, ``situation_duration_ubound-decades``, ``avg_part_duration_lbound-forever``, ``avg_part_duration_ubound-forever``, ``situation_duration_lbound-forever``, ``situation_duration_ubound-forever``, ``avg_part_duration_lbound-fractions_of_a_second``, ``avg_part_duration_ubound-fractions_of_a_second``, ``situation_duration_lbound-fractions_of_a_second``, ``situation_duration_ubound-fractions_of_a_second``, ``avg_part_duration_lbound-hours``, ``avg_part_duration_ubound-hours``, ``situation_duration_lbound-hours``, ``situation_duration_ubound-hours``, ``avg_part_duration_lbound-instant``, ``avg_part_duration_ubound-instant``, ``situation_duration_lbound-instant``, ``situation_duration_ubound-instant``, ``avg_part_duration_lbound-minutes``, ``avg_part_duration_ubound-minutes``, ``situation_duration_lbound-minutes``, ``situation_duration_ubound-minutes``, ``avg_part_duration_lbound-months``, ``avg_part_duration_ubound-months``, ``situation_duration_lbound-months``, ``situation_duration_ubound-months``, ``avg_part_duration_lbound-seconds``, ``avg_part_duration_ubound-seconds``, ``situation_duration_lbound-seconds``, ``situation_duration_ubound-seconds``, ``avg_part_duration_lbound-weeks``, ``avg_part_duration_ubound-weeks``, ``situation_duration_lbound-weeks``, ``situation_duration_ubound-weeks``, ``avg_part_duration_lbound-years``, ``avg_part_duration_ubound-years``, ``situation_duration_lbound-years``, ``situation_duration_ubound-years``
377 | 
378 | *raw*
379 | 
380 | ``dynamic``, ``natural_parts``, ``part_similarity``, ``telic``, ``avg_part_duration_lbound``, ``avg_part_duration_ubound``, ``situation_duration_lbound``, ``situation_duration_ubound``
381 | 
382 | 
383 | **Document-level attributes**
384 | 
385 | ``pred1_contains_pred2``, ``pred2_contains_pred1``
386 | 
387 | **First UDS version**
388 | 
389 | 2.0
390 | 
391 | **Notes**
392 | 
393 | 1. Whether ``dynamic``, ``situation_duration_lbound``, and ``situation_duration_ubound`` are answered or ``part_similarity``, ``avg_part_duration_lbound``, and ``avg_part_duration_ubound`` are answered is dependent on the answer an annotator gives to ``natural_parts``. Thus, not all node attributes will necessarily be present on all nodes.
394 | 
395 | **References**
396 | 
397 |   Gantt, W., L. Glass, & A.S. White. 2021. `Decomposing and Recomposing Event Structure`_. arXiv:2103.10387 [cs.CL].
398 | 
399 | 
400 | .. _Decomposing and Recomposing Event Structure: https://arxiv.org/abs/2103.10387
401 |   
402 | .. code-block:: latex
403 | 
404 |   @misc{gantt2021decomposing,
405 |       title={Decomposing and Recomposing Event Structure}, 
406 |       author={William Gantt and Lelia Glass and Aaron Steven White},
407 |       year={2021},
408 |       eprint={2103.10387},
409 |       archivePrefix={arXiv},
410 |       primaryClass={cs.CL}
411 |   }
412 | 
413 | 
414 |  
415 | 


--------------------------------------------------------------------------------
/docs/source/data/sentence-graphs.rst:
--------------------------------------------------------------------------------
  1 | `PredPatt`_ Sentence Graphs
  2 | ===========================
  3 | 
  4 | .. _PredPatt: https://github.com/hltcoe/PredPatt
  5 | 
  6 | The semantic graphs that form the second layer of annotation in the
  7 | dataset are produced by the PredPatt_ system. PredPatt takes as input
  8 | a UD parse for a single sentence and produces a set of predicates and
  9 | set of arguments of each predicate in that sentence. Both predicates
 10 | and arguments are associated with a single head token in the sentence
 11 | as well as a set of tokens that make up the predicate or argument (its
 12 | span). Predicate or argument spans may be trivial in only containinig
 13 | the head token.
 14 | 
 15 | For example, given the dependency parse for the sentence *Chris gave
 16 | the book to Pat .*, PredPatt produces the following.
 17 | 
 18 | ::
 19 |    
 20 |   ?a gave ?b to ?c
 21 |       ?a: Chris
 22 |       ?b: the book
 23 |       ?c: Pat
 24 | 
 25 | Assuming UD's 1-indexation, the single predicate in this sentence
 26 | (*gave...to*) has a head at position 2 and a span over positions {2,
 27 | 5}. This predicate has three arguments, one headed by *Chris* at
 28 | position 1, with span over position {1}; one headed by *book* at
 29 | position 4, with span over positions {3, 4}; and one headed by *Pat*
 30 | at position 6, with span over position {6}.
 31 |       
 32 | See the `PredPatt documentation tests`_ for examples.
 33 | 
 34 | .. _PredPatt documentation tests: https://github.com/hltcoe/PredPatt/blob/master/doc/DOCTEST.md
 35 | 
 36 | Each predicate and argument produced by PredPatt is associated with a
 37 | node in a digraph with identifier
 38 | ``ewt-SPLIT-SENTNUM-semantics-TYPE-HEADTOKNUM``, where ``TYPE`` is
 39 | always either ``pred`` or ``arg`` and ``HEADTOKNUM`` is the ordinal
 40 | position of the head token within the sentence (1-indexed, following
 41 | the convention in UD-EWT). At minimum, each such node has the
 42 | following attributes.
 43 | 
 44 |   - ``domain`` (``str``): the subgraph this node is part of (always ``semantics``)
 45 |   - ``type`` (``str``): the type of the object in the particular domain (either ``predicate`` or ``argument``)
 46 |   - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``)
 47 |     
 48 | Predicate and argument nodes produced by PredPatt furthermore always
 49 | have at least one outgoing *instance* edge that points to nodes in the
 50 | syntax domain that correspond to the associated span of the predicate
 51 | or argument. At minimum, each such edge has the following attributes.
 52 | 
 53 |   - ``domain`` (``str``): the subgraph this node is part of (always ``interface``)
 54 |   - ``type`` (``str``): the type of the object in the particular domain (either ``head`` or ``nonhead``)
 55 |   - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``)     
 56 | 
 57 | Because PredPatt produces a unique head for each predicate and
 58 | argument, there is always exactly one instance edge of type ``head``
 59 | from any particular node in the semantics domain. There may or may not
 60 | be instance edges of type ``nonhead``.
 61 | 
 62 | In addition to instance edges, predicate nodes always have exactly one
 63 | outgoing edge connecting them to each of the nodes corresponding to
 64 | their arguments. At minimum, each such edge has the following
 65 | attributes.
 66 | 
 67 |   - ``domain`` (``str``): the subgraph this node is part of (always ``semantics``)
 68 |   - ``type`` (``str``): the type of the object in the particular domain (always ``dependency``)
 69 |   - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``) 
 70 | 
 71 | There is one special case where an argument nodes has an outgoing edge
 72 | that points to a predicate node: clausal subordination.
 73 | 
 74 | For example, given the dependency parse for the sentence *Gene thought
 75 | that Chris gave the book to Pat .*, PredPatt produces the following.
 76 | 
 77 | ::
 78 | 
 79 |   ?a thinks ?b
 80 |       ?a: Gene
 81 |       ?b: SOMETHING := that Chris gave the book to Pat
 82 |    
 83 |   ?a gave ?b to ?c
 84 |       ?a: Chris
 85 |       ?b: the book
 86 |       ?c: Pat
 87 | 
 88 | In this case, the second argument of the predicate headed by *thinks*
 89 | is the argument *that Chris gave the book to Pat*, which is headed by
 90 | *gave*. This argument is associated with a node of type ``argument``
 91 | with span over positions {3, 4, 5, 6, 7, 8, 9} and identifier
 92 | ``ewt-SPLIT-SENTNUM-semantics-arg-5``. In addition, there is a
 93 | predicate headed by *gave*. This predicate is associated with a node
 94 | with span over positions {5, 8} and identifier
 95 | ``ewt-SPLIT-SENTNUM-semantics-pred-5``. Node
 96 | ``ewt-SPLIT-SENTNUM-semantics-arg-5`` then has an outgoing edge
 97 | pointing to ``ewt-SPLIT-SENTNUM-semantics-pred-5``. At minimum, each
 98 | such edge has the following attributes.
 99 | 
100 |   - ``domain`` (``str``): the subgraph this node is part of (always ``semantics``)
101 |   - ``type`` (``str``): the type of the object in the particular domain (always ``head``)
102 |   - ``frompredpatt`` (``bool``): whether this node is associated with a predicate or argument output by PredPatt (always ``True``) 
103 |      
104 | The ``type`` attribute in this case has the same value as instance
105 | edges, but crucially the ``domain`` attribute is distinct. In the case
106 | of instance edges, it is ``interface`` and in the case of clausal
107 | subordination, it is ``semantics``. This matters when making queries
108 | against the graph.
109 | 
110 | If the ``frompredpatt`` attribute has value ``True``, it is guaranteed
111 | that the only semantics edges of type ``head`` are ones that involve
112 | clausal subordination like the above. This is not guaranteed for nodes
113 | for which the ``frompredpatt`` attribute has value ``False``.
114 | 
115 | Every semantic graph contains at least four additional *performative*
116 | nodes that are note produced by PredPatt (and thus, for which the
117 | ``frompredpatt`` attribute has value ``False``).
118 | 
119 |   - ``ewt-SPLIT-SENTNUM-semantics-arg-0``: an argument node representing the entire sentence in the same way complement clauses are represented
120 |   - ``ewt-SPLIT-SENTNUM-semantics-pred-root``: a predicate node representing the author's production of the entire sentence directed at the addressee
121 |   - ``ewt-SPLIT-SENTNUM-semantics-arg-speaker``: an argument node representing the author
122 |   - ``ewt-SPLIT-SENTNUM-semantics-arg-addressee``: an argument node representing the addressee
123 | 
124 | All of these nodes have a ``domain`` attribute with value ``semantics``. Unlike nodes associated with PredPatt predicates and arguments, ``ewt-SPLIT-SENTNUM-semantics-pred-root``, ``ewt-SPLIT-SENTNUM-semantics-arg-speaker``, and ``ewt-SPLIT-SENTNUM-semantics-arg-addressee`` have no instance edges connecting them to syntactic nodes. In contrast, ``ewt-SPLIT-SENTNUM-semantics-arg-0`` has an instance head edge to ``ewt-SPLIT-SENTNUM-root-0``.
125 | 
126 | The ``ewt-SPLIT-SENTNUM-semantics-arg-0`` node has semantics head edges to each of the predicate nodes in the graph that are not dominated by any other semantics node. This node, in addition to ``ewt-SPLIT-SENTNUM-semantics-arg-speaker`` and ``ewt-SPLIT-SENTNUM-semantics-arg-addressee``, has a dependency edge to ``ewt-SPLIT-SENTNUM-semantics-pred-root``.
127 | 
128 | These nodes are included for purposes of forward compatibility. None of them currently have attributes, but future releases of decomp will include annotations on either them or their edges. 
129 | 


--------------------------------------------------------------------------------
/docs/source/data/syntactic-graphs.rst:
--------------------------------------------------------------------------------
 1 | `Universal Dependencies`_ Syntactic Graphs
 2 | ==========================================
 3 | 
 4 | .. _Universal Dependencies: https://universaldependencies.org/
 5 | 
 6 | The syntactic graphs that form the first layer of annotation in the dataset come from gold UD dependency parses provided in the UD-EWT_ treebank, which contains sentences from the Linguistic Data Consortium's constituency parsed EWT_. UD-EWT has predefined training (``train``), development (``dev``), and test (``test``) data in corresponding files in `CoNLL-U format`_: ``en_ewt-ud-train.conllu``, ``en_ewt-ud-dev.conllu``, and ``en_ewt-ud-test.conllu``. Henceforth, ``SPLIT`` ranges over ``train``, ``dev``, and ``test``.
 7 | 
 8 | .. _UD-EWT: https://github.com/UniversalDependencies/UD_English-EWT
 9 | .. _EWT: https://catalog.ldc.upenn.edu/LDC2012T13
10 | .. _CoNLL-U format: https://universaldependencies.org/format.html
11 | 
12 | In UDS, each dependency parsed sentence in UD-EWT is represented as a rooted_ `directed graph`_ (digraph). Each graph's identifier takes the form ``ewt-SPLIT-SENTNUM``, where ``SENTNUM`` is the ordinal position (1-indexed) of the sentence within ``en_ewt-ud-SPLIT.conllu``.
13 | 
14 | .. _rooted: https://en.wikipedia.org/wiki/Rooted_graph
15 | .. _directed graph: https://en.wikipedia.org/wiki/Directed_graph
16 | 
17 | Each token in a sentence is associated with a node with identifier ``ewt-SPLIT-SENTNUM-syntax-TOKNUM``, where ``TOKNUM`` is the token's ordinal position within the sentence (1-indexed, following the convention in UD-EWT). At minimum, each node has the following attributes.
18 | 
19 |   - ``position`` (``int``): the ordinal position (``TOKNUM``) of that node as an integer (again, 1-indexed)
20 |   - ``domain`` (``str``): the subgraph this node is part of (always ``syntax``)
21 |   - ``type`` (``str``): the type of the object in the particular domain (always ``token``)
22 |   - ``form`` (``str``): the actual token
23 |   - ``lemma`` (``str``): the lemma corresponding to the actual token
24 |   - ``upos`` (``str``): the UD part-of-speech tag
25 |   - ``xpos`` (``str``): the Penn TreeBank part-of-speech tag
26 |   - any attribute found in the features column of the CoNLL-U
27 | 
28 | For information about the values ``upos``, ``xpos``, and the attributes contained in the features column can take on, see the `UD Guidelines`_.
29 | 
30 | .. _UD Guidelines: https://universaldependencies.org/guidelines.html
31 | 
32 | Each graph also has a special root node with identifier ``ewt-SPLIT-SENTNUM-root-0``. This node always has a ``position`` attribute set to ``0`` and ``domain`` and ``type`` attributes set to ``root``.
33 | 
34 | Edges within the graph represent the grammatical relations (dependencies) annotated in UD-EWT. These dependencies are always represented as directed edges pointing from the head to the dependent. At minimum, each edge has the following attributes.
35 | 
36 |   - ``domain`` (``str``): the subgraph this node is part of (always ``syntax``)
37 |   - ``type`` (``str``): the type of the object in the particular domain (always ``dependency``)
38 |   - ``deprel`` (``str``): the UD dependency relation tag
39 | 
40 | For information about the values ``deprel`` can take on, see the `UD Guidelines`_.
41 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | Decomp: A toolkit for decompositional semantics
 2 | ===============================================
 3 | 
 4 | Decomp_ is a toolkit for working with the `Universal Decompositional
 5 | Semantics (UDS) dataset`_, which is a collection of directed acyclic
 6 | semantic graphs with real-valued node and edge attributes pointing
 7 | into `Universal Dependencies`_ syntactic dependency trees.
 8 | 
 9 | The toolkit is built on top of NetworkX_ and RDFLib_ making it
10 | straightforward to:
11 | 
12 |   - read the UDS dataset from its native JSON format
13 |   - query both the syntactic and semantic subgraphs of UDS (as well as
14 |     pointers between them) using SPARQL 1.1 queries
15 |   - serialize UDS graphs to many common formats, such as Notation3_,
16 |     N-Triples_, turtle_, and JSON-LD_, as well as any other format
17 |     supported by NetworkX
18 | 
19 | The toolkit was built by `Aaron Steven White`_ and is maintained by
20 | the `Decompositional Semantics Initiative`_. The UDS dataset was
21 | constructed from annotations collected by the `Decompositional
22 | Semantics Initiative`_.
23 | 
24 | If you use either UDS or Decomp in your research, we ask that you cite the following paper:
25 | 
26 |   White, Aaron Steven, Elias Stengel-Eskin, Siddharth Vashishtha, Venkata Subrahmanyan Govindarajan, Dee Ann Reisinger, Tim Vieira, Keisuke Sakaguchi, et al. 2020. `The Universal Decompositional Semantics Dataset and Decomp Toolkit`_. *Proceedings of The 12th Language Resources and Evaluation Conference*, 5698–5707. Marseille, France: European Language Resources Association.
27 | 
28 | .. code-block:: latex
29 | 
30 |   @inproceedings{white-etal-2020-universal,
31 |       title = "The Universal Decompositional Semantics Dataset and Decomp Toolkit",
32 |       author = "White, Aaron Steven  and
33 |         Stengel-Eskin, Elias  and
34 |         Vashishtha, Siddharth  and
35 |         Govindarajan, Venkata Subrahmanyan  and
36 |         Reisinger, Dee Ann  and
37 |         Vieira, Tim  and
38 |         Sakaguchi, Keisuke  and
39 |         Zhang, Sheng  and
40 |         Ferraro, Francis  and
41 |         Rudinger, Rachel  and
42 |         Rawlins, Kyle  and
43 |         Van Durme, Benjamin",
44 |       booktitle = "Proceedings of The 12th Language Resources and Evaluation Conference",
45 |       month = may,
46 |       year = "2020",
47 |       address = "Marseille, France",
48 |       publisher = "European Language Resources Association",
49 |       url = "https://www.aclweb.org/anthology/2020.lrec-1.699",
50 |       pages = "5698--5707",
51 |       ISBN = "979-10-95546-34-4",
52 |   }
53 | 
54 | 
55 | .. _Decomp: https://github.com/decompositional-semantics-initiative/decomp
56 | .. _Universal Decompositional Semantics (UDS) dataset: http://decomp.io
57 | .. _Universal Dependencies: https://universaldependencies.org/
58 | .. _NetworkX: https://github.com/networkx/networkx
59 | .. _RDFLib: https://github.com/RDFLib/rdflib
60 | .. _matplotlib: https://matplotlib.org/
61 | .. _D3: https://d3js.org/
62 | .. _Notation3: https://www.w3.org/TeamSubmission/n3/
63 | .. _N-Triples: https://www.w3.org/TR/n-triples/
64 | .. _turtle: https://www.w3.org/TeamSubmission/turtle/
65 | .. _JSON-LD: https://json-ld.org/
66 | .. _Aaron Steven White: http://aaronstevenwhite.io/
67 | .. _Decompositional Semantics Initiative: http://decomp.io/
68 | .. _The Universal Decompositional Semantics Dataset and Decomp Toolkit: https://www.aclweb.org/anthology/2020.lrec-1.699/
69 | 
70 | .. toctree::
71 |    :maxdepth: 2
72 |    :caption: Contents:
73 | 
74 |    install
75 |    tutorial/index
76 |    data/index
77 |    package/index
78 | 
79 | 
80 | Indices and tables
81 | ==================
82 | 
83 | * :ref:`genindex`
84 | * :ref:`modindex`
85 | * :ref:`search`
86 | 


--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
 1 | .. _install:
 2 | 
 3 | ============
 4 | Installation
 5 | ============
 6 | 
 7 | The most painless way to get started quickly is to use the included
 8 | barebones Python 3.6-based Dockerfile. To build the image and start a
 9 | python interactive prompt, use:
10 | 
11 | .. code-block:: bash
12 | 
13 |   git clone git://gitlab.hltcoe.jhu.edu/aswhite/decomp.git
14 |   cd decomp
15 |   docker build -t decomp .
16 |   docker run -it decomp python
17 |    
18 | A jupyter notebook can then be opened in the standard way.
19 | 
20 | Decomp can also be installed to a local environment using ``pip``.
21 | 
22 | .. code-block:: bash
23 | 
24 |    pip install git+git://github.com/decompositional-semantics-initiative/decomp.git
25 | 
26 | 
27 | As an alternative to ``pip`` you can clone the decomp repository and use the included ``setup.py`` with the ``install`` flag.
28 | 
29 | .. code-block:: bash
30 | 
31 |    git clone https://github.com/decompositional-semantics-initiative/decomp.git
32 |    cd decomp
33 |    pip install --user --no-cache-dir -r ./requirements.txt
34 |    python setup.py install
35 | 
36 | 
37 | If you would like to install the package for the purposes of development, you can use the included ``setup.py`` with the ``develop`` flag.
38 | 
39 | .. code-block:: bash
40 | 
41 |    git clone https://github.com/decompositional-semantics-initiative/decomp.git
42 |    cd decomp
43 |    pip install --user --no-cache-dir -r ./requirements.txt
44 |    python setup.py develop
45 | 
46 | 
47 | If you have trouble installing via setup.py or pip on OS X Mojave, adding the following environment variables may help.
48 | 
49 | .. code-block:: bash 
50 | 
51 |     CXXFLAGS=-stdlib=libc++ CFLAGS=-stdlib=libc++ python setup.py install
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.corpus.corpus.rst:
--------------------------------------------------------------------------------
1 | decomp.corpus.corpus
2 | ====================
3 | 
4 | .. automodule:: decomp.corpus.corpus
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.corpus.rst:
--------------------------------------------------------------------------------
 1 | decomp.corpus
 2 | =============
 3 | 
 4 | .. automodule:: decomp.corpus
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | .. toctree::
10 |     decomp.corpus.corpus
11 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.graph.nx.rst:
--------------------------------------------------------------------------------
1 | decomp.graph.nx
2 | ===============
3 | 
4 | .. automodule:: decomp.graph.nx
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.graph.rdf.rst:
--------------------------------------------------------------------------------
1 | decomp.graph.rdf
2 | ================
3 | 
4 | .. automodule:: decomp.graph.rdf
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.graph.rst:
--------------------------------------------------------------------------------
 1 | decomp.graph
 2 | =============
 3 | 
 4 | .. automodule:: decomp.graph
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | .. toctree::
10 |     decomp.graph.rdf
11 |     decomp.graph.nx
12 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.predpatt.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.predpatt
2 | =========================
3 | 
4 | .. automodule:: decomp.semantics.predpatt
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.rst:
--------------------------------------------------------------------------------
 1 | decomp.semantics
 2 | ================
 3 | 
 4 | .. automodule:: decomp.semantics
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | .. toctree::
10 |     decomp.semantics.predpatt
11 |     decomp.semantics.uds
12 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.annotation.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.uds.annotation
2 | ===============================
3 | 
4 | .. automodule:: decomp.semantics.uds.annotation
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.corpus.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.uds.corpus
2 | ===========================
3 | 
4 | .. automodule:: decomp.semantics.uds.corpus
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.document.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.uds.document
2 | =============================
3 | 
4 | .. automodule:: decomp.semantics.uds.document
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.graph.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.uds.graph
2 | ==========================
3 | 
4 | .. automodule:: decomp.semantics.uds.graph
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.metadata.rst:
--------------------------------------------------------------------------------
1 | decomp.semantics.uds.metadata
2 | =============================
3 | 
4 | .. automodule:: decomp.semantics.uds.metadata
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.semantics.uds.rst:
--------------------------------------------------------------------------------
 1 | decomp.semantics.uds
 2 | ====================
 3 | 
 4 | .. automodule:: decomp.semantics.uds
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | .. toctree::
10 |     decomp.semantics.uds.corpus
11 |     decomp.semantics.uds.document    
12 |     decomp.semantics.uds.graph
13 |     decomp.semantics.uds.annotation
14 |     decomp.semantics.uds.metadata
15 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.syntax.dependency.rst:
--------------------------------------------------------------------------------
1 | decomp.syntax.dependency
2 | ========================
3 | 
4 | .. automodule:: decomp.syntax.dependency
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.syntax.rst:
--------------------------------------------------------------------------------
 1 | decomp.syntax
 2 | =============
 3 | 
 4 | .. automodule:: decomp.syntax
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | .. toctree::
10 |     decomp.syntax.dependency
11 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.vis.rst:
--------------------------------------------------------------------------------
 1 | decomp.vis
 2 | =============
 3 | 
 4 | .. automodule:: decomp.vis
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | .. toctree::
10 |     decomp.vis.uds_vis
11 | 


--------------------------------------------------------------------------------
/docs/source/package/decomp.vis.uds_vis.rst:
--------------------------------------------------------------------------------
1 | decomp.vis.uds_vis
2 | ==================
3 | 
4 | .. automodule:: decomp.vis.uds_vis
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/source/package/index.rst:
--------------------------------------------------------------------------------
 1 | Package Reference
 2 | =================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 | 
 7 |    decomp.syntax
 8 |    decomp.semantics
 9 |    decomp.corpus
10 |    decomp.graph
11 |    decomp.vis
12 | 


--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_genericity_no_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_genericity_no_syntax.png


--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_no_protoroles_no_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_no_protoroles_no_syntax.png


--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_no_protoroles_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_no_protoroles_syntax.png


--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_no_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_no_syntax.png


--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_node_props_no_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_node_props_no_syntax.png


--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_node_props_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_node_props_syntax.png


--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_protoroles_no_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_protoroles_no_syntax.png


--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_protoroles_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_protoroles_syntax.png


--------------------------------------------------------------------------------
/docs/source/tutorial/assets/vis_syntax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/docs/source/tutorial/assets/vis_syntax.png


--------------------------------------------------------------------------------
/docs/source/tutorial/index.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | =========
 3 | 
 4 | If you have not already :doc:`installed </install>` the decomp
 5 | package, follow those instructions before continuing the tutorial.
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 |    :caption: Contents:
10 | 
11 |    quick-start
12 |    reading
13 |    querying   
14 |    serializing
15 |    visualization
16 | 


--------------------------------------------------------------------------------
/docs/source/tutorial/querying.rst:
--------------------------------------------------------------------------------
  1 | Querying UDS Graphs
  2 | ===================
  3 | 
  4 | Decomp provides a rich array of methods for querying UDS graphs: both
  5 | pre-compiled and user-specified. Arbitrary user-specified graph
  6 | queries can be performed using the `UDSSentenceGraph.query`_ instance
  7 | method. This method accepts arbitrary SPARQL 1.1 queries, either as
  8 | strings or as precompiled `Query`_ objects built using RDFlib's
  9 | `prepareQuery`_.
 10 | 
 11 | .. _UDSSentenceGraph.query: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph.query
 12 | .. _Query: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.plugins.sparql.html#rdflib.plugins.sparql.sparql.Query
 13 | .. _prepareQuery: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.plugins.sparql.html?highlight=preparequery#rdflib.plugins.sparql.processor.prepareQuery
 14 | 
 15 | 
 16 | **NOTE:** Querying is not currently supported for document-level graphs
 17 | (`UDSDocumentGraph`_ objects) or for sentence-level graphs that contain
 18 | raw annotations (`RawUDSDataset`_).
 19 | 
 20 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph
 21 | .. _RawUDSDataset: ../package/decomp.semantics.uds.html#decomp.semantics.uds.RawUDSDataset
 22 | 
 23 | Pre-compiled queries
 24 | --------------------
 25 | 
 26 | For many use cases, the various instance attributes and methods for
 27 | accessing nodes, edges, and their attributes in the UDS graphs will
 28 | likely be sufficient; there is no need to use ``query``. For
 29 | example, to get a dictionary mapping identifiers for syntax nodes in
 30 | the UDS graph to their attributes, you can use:
 31 |  
 32 | .. code-block:: python
 33 | 
 34 |    uds["ewt-train-12"].syntax_nodes
 35 | 
 36 | To get a dictionary mapping identifiers for semantics nodes in the UDS
 37 | graph to their attributes, you can use:
 38 |    
 39 | .. code-block:: python
 40 |    
 41 |    uds["ewt-train-12"].semantics_nodes   
 42 | 
 43 | To get a dictionary mapping identifiers for semantics edges (tuples of
 44 | node identifiers) in the UDS graph to their attributes, you can use:
 45 |   
 46 | .. code-block:: python
 47 |    
 48 |    uds["ewt-train-12"].semantics_edges()
 49 | 
 50 | To get a dictionary mapping identifiers for semantics edges (tuples of
 51 | node identifiers) in the UDS graph involving the predicate headed by
 52 | the 7th token to their attributes, you can use:
 53 |    
 54 | .. code-block:: python  
 55 |    
 56 |    uds["ewt-train-12"].semantics_edges('ewt-train-12-semantics-pred-7')
 57 | 
 58 | To get a dictionary mapping identifiers for syntax edges (tuples of
 59 | node identifiers) in the UDS graph to their attributes, you can use:
 60 |    
 61 | .. code-block:: python  
 62 |    
 63 |    uds["ewt-train-12"].syntax_edges()
 64 | 
 65 | And to get a dictionary mapping identifiers for syntax edges (tuples
 66 | of node identifiers) in the UDS graph involving the node for the 7th
 67 | token to their attributes, you can use:
 68 |    
 69 | .. code-block:: python  
 70 |    
 71 |    uds["ewt-train-12"].syntax_edges('ewt-train-12-syntax-7')
 72 | 		
 73 | 
 74 | There are also methods for accessing relationships between semantics
 75 | and syntax nodes. For example, you can get a tuple of the ordinal
 76 | position for the head syntax node in the UDS graph that maps of the
 77 | predicate headed by the 7th token in the corresponding sentence to a
 78 | list of the form and lemma attributes for that token, you can use:
 79 | 
 80 | .. code-block:: python
 81 | 
 82 |    uds["ewt-train-12"].head('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
 83 | 
 84 | And if you want the same information for every token in the span, you
 85 | can use:
 86 |    
 87 | .. code-block:: python
 88 |    
 89 |    uds["ewt-train-12"].span('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
 90 | 
 91 | This will return a dictionary mapping ordinal position for syntax
 92 | nodes in the UDS graph that make of the predicate headed by the 7th
 93 | token in the corresponding sentence to a list of the form and lemma
 94 | attributes for the corresponding tokens.
 95 | 
 96 | Custom queries
 97 | --------------
 98 | 
 99 | Where the above methods generally turn out to be insufficient is in
100 | selecting nodes and edges on the basis of (combinations of their
101 | attributes). This is where having the full power of SPARQL comes in
102 | handy. This power comes with substantial slow downs in the speed of
103 | queries, however, so if you can do a query without using SPARQL you
104 | should try to.
105 | 
106 | For example, if you were interested in extracting only predicates
107 | referring to events that likely happened and likely lasted for
108 | minutes, you could use:
109 | 
110 | .. code-block:: python
111 | 
112 |    querystr = """
113 |               SELECT ?pred
114 |               WHERE { ?pred <domain> <semantics> ;
115 |                             <type> <predicate> ;
116 | 	                    <factual> ?factual ;
117 | 		            <dur-minutes> ?duration
118 | 	                    FILTER ( ?factual > 0 && ?duration > 0 )
119 |                     }
120 |               """
121 | 
122 |    results = {gid: graph.query(querystr, query_type='node', cache_rdf=False)
123 |               for gid, graph in uds.items()}
124 | 
125 | Or more tersely (but equivalently):
126 | 
127 | .. code-block:: python
128 | 
129 |    results = uds.query(querystr, query_type='node', cache_rdf=False)
130 | 	      
131 | Note that the ``query_type`` parameter is set to ``'node'``. This
132 | setting means that a dictionary mapping node identifiers to node
133 | attribute values will be returned. If no such query type is passed, an
134 | RDFLib `Result`_ object will be returned, which you will need to
135 | postprocess yourself. This is necessary if, for instance, you are
136 | making a ``CONSTRUCT``, ``ASK``, or ``DESCRIBE`` query.
137 | 
138 | Also, note that the ``cache_rdf`` parameter is set to ``False``. This is a
139 | memory-saving measure, as ``UDSSentenceGraph.query`` implicitly builds an RDF
140 | graph on the backend, and these graphs can be quite large. Leaving
141 | ``cache_rdf`` at its defaults of ``True`` will substantially speed up
142 | later queries at the expense of sometimes substantial memory costs.
143 | 
144 | .. _Result: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.query.Result
145 |    
146 | Constraints can also make reference to node and edge attributes of
147 | other nodes. For instance, if you were interested in extracting all
148 | predicates referring to events that are likely spatiotemporally
149 | delimited and have at least one spatiotemporally delimited participant
150 | that was volitional in the event, you could use:
151 | 
152 | .. code-block:: python
153 | 
154 |    querystr = """
155 |               SELECT DISTINCT ?node
156 |               WHERE { ?node ?edge ?arg ;
157 |                             <domain> <semantics> ;
158 |                             <type>   <predicate> ;
159 |                             <pred-particular> ?predparticular
160 | 			    FILTER ( ?predparticular > 0 ) .
161 |                       ?arg  <domain> <semantics> ;
162 | 		            <type>   <argument>  ;
163 | 			    <arg-particular> ?argparticular
164 | 			    FILTER ( ?argparticular > 0 ) .
165 |                       ?edge <volition> ?volition
166 | 		            FILTER ( ?volition > 0 ) .    
167 |                     }
168 |               """
169 | 
170 |    results = uds.query(querystr, query_type='node', cache_rdf=False)
171 | 		
172 | Disjunctive constraints are also possible. For instance, for the last
173 | query, if you were interested in either volitional or sentient
174 | arguments, you could use:
175 | 
176 | .. code-block:: python
177 | 
178 |    querystr = """
179 |               SELECT DISTINCT ?node
180 |               WHERE { ?node ?edge ?arg ;
181 |                             <domain> <semantics> ;
182 |                             <type>   <predicate> ;
183 |                             <pred-particular> ?predparticular
184 | 			    FILTER ( ?predparticular > 0 ) .
185 |                       ?arg  <domain> <semantics> ;
186 | 		            <type>   <argument>  ;
187 | 			    <arg-particular> ?argparticular
188 | 			    FILTER ( ?argparticular > 0 ) .
189 |                       { ?edge <volition> ?volition
190 | 		              FILTER ( ?volition > 0 )
191 | 	              } UNION
192 | 		      { ?edge <sentient> ?sentient
193 | 		              FILTER ( ?sentient > 0 )
194 | 	              }
195 |                     }
196 |               """
197 | 
198 |    results = uds.query(querystr, query_type='node', cache_rdf=False)
199 |   
200 | Beyond returning node attributes based on complex constraints, you can
201 | also return edge attributes. For instance, for the last query, if you
202 | were interested in all the attributes of edges connecting predicates
203 | and arguments satisfying the constraints of the last query, you could
204 | simply change which variable is bound by ``SELECT`` and set
205 | ``query_type`` to ``'edge'``.
206 | 
207 | .. code-block:: python
208 | 
209 |    querystr = """
210 |               SELECT ?edge
211 |               WHERE { ?node ?edge ?arg ;
212 |                             <domain> <semantics> ;
213 |                             <type>   <predicate> ;
214 |                             <pred-particular> ?predparticular
215 | 			    FILTER ( ?predparticular > 0 ) .
216 |                       ?arg  <domain> <semantics> ;
217 | 		            <type>   <argument>  ;
218 | 			    <arg-particular> ?argparticular
219 | 			    FILTER ( ?argparticular > 0 ) .
220 |                       { ?edge <volition> ?volition
221 | 		              FILTER ( ?volition > 0 )
222 | 	              } UNION
223 | 		      { ?edge <sentient> ?sentient
224 | 		              FILTER ( ?sentient > 0 )
225 | 	              }
226 |                     }
227 |               """
228 | 
229 |    results = uds.query(querystr, query_type='edge', cache_rdf=False)
230 | 


--------------------------------------------------------------------------------
/docs/source/tutorial/quick-start.rst:
--------------------------------------------------------------------------------
  1 | Quick Start
  2 | ===========
  3 | 
  4 | To read the Universal Decompositional Semantics (UDS) dataset, use:
  5 | 
  6 | .. code-block:: python
  7 | 
  8 |    from decomp import UDSCorpus
  9 | 
 10 |    uds = UDSCorpus()
 11 | 
 12 | This imports a `UDSCorpus`_ object ``uds``, which contains all
 13 | graphs across all splits in the data. If you would like a corpus,
 14 | e.g., containing only a particular split, see other loading options in
 15 | :doc:`reading`.
 16 | 
 17 | .. _UDSCorpus: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus
 18 | 
 19 | The first time you read UDS, it will take several minutes to
 20 | complete while the dataset is built from the `Universal Dependencies
 21 | English Web Treebank`_, which is not shipped with the package (but is
 22 | downloaded automatically on import in the background), and the `UDS
 23 | annotations`_, which are shipped with the package. Subsequent uses
 24 | will be faster, since the dataset is cached on build.
 25 | 
 26 | .. _Universal Dependencies English Web Treebank: https://github.com/UniversalDependencies/UD_English-EWT
 27 | .. _UDS annotations: http://decomp.io/data/
 28 | 
 29 | `UDSSentenceGraph`_ objects in the corpus can be accessed using standard
 30 | dictionary getters or iteration. For instance, to get the UDS graph
 31 | corresponding to the 12th sentence in ``en-ud-train.conllu``, you can
 32 | use:
 33 | 
 34 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph
 35 | 
 36 | .. code-block:: python
 37 | 
 38 |    uds["ewt-train-12"]
 39 | 
 40 | To access documents (`UDSDocument`_ objects, each of which has an associated
 41 | `UDSDocumentGraph`_), you can use:
 42 | 
 43 | .. _UDSDocument: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument
 44 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph
 45 | 
 46 | .. code-block:: python
 47 | 
 48 |    uds.documents["reviews-112579"]
 49 | 
 50 | 
 51 | To get the associated document graph, use:
 52 | 
 53 | .. code-block:: python
 54 | 
 55 |    uds.documents["reviews-112579"].document_graph
 56 | 
 57 | 
 58 | More generally, ``UDSCorpus`` objects behave like dictionaries. For
 59 | example, to print all the sentence-level graph identifiers in the corpus
 60 | (e.g. ``"ewt-train-12"``), you can use:
 61 | 
 62 | .. code-block:: python
 63 |    
 64 |    for graphid in uds:
 65 |        print(graphid)
 66 | 
 67 | 
 68 | To print all the document identifiers in the corpus, which correspond
 69 | directly to English Web Treebank file IDs (e.g. ``"reviews-112579"``), you 
 70 | can use:
 71 | 
 72 | .. code-block:: python
 73 | 
 74 |    for documentid in uds.documents:
 75 |        print(documentid)
 76 | 
 77 | 
 78 | Similarly, to print all the sentence-level graph identifiers in the corpus
 79 | (e.g. ``"ewt-train-12"``) along with the corresponding sentence, you can use:
 80 | 
 81 | .. code-block:: python
 82 | 
 83 |    for graphid, graph in uds.items():
 84 |        print(graphid)
 85 |        print(graph.sentence)
 86 |        
 87 | 
 88 | Likewise, the following will print all document identifiers, along with each
 89 | document's entire text:
 90 | 
 91 | .. code-block:: python
 92 | 
 93 |    for documentid, document in uds.documents.items():
 94 |        print(documentid)
 95 |        print(document.text)
 96 | 
 97 | 
 98 | A list of sentence-level graph identifiers can also be accessed via the 
 99 | ``graphids`` attribute of the UDSCorpus. A mapping from these identifiers 
100 | and the corresponding graph can be accessed via the ``graphs`` attribute.
101 | 
102 | .. code-block:: python
103 | 
104 |    # a list of the sentence-level graph identifiers in the corpus
105 |    uds.graphids
106 | 
107 |    # a dictionary mapping the sentence-level 
108 |    # graph identifiers to the corresponding graph
109 |    uds.graphs
110 | 
111 | 
112 | A list of document identifiers can also be accessed via the ``document_ids``
113 | attribute of the UDSCorpus:
114 | 
115 | .. code-block:: python
116 | 
117 |    uds.document_ids
118 | 
119 | 
120 | For sentence-level graphs, there are various instance attributes and 
121 | methods for accessing nodes, edges, and their attributes in the UDS
122 | sentence-level graphs. For example, to get a dictionary mapping identifiers for syntax nodes in a sentence-level graph to their attributes, you can use:
123 |  
124 | .. code-block:: python
125 | 
126 |    uds["ewt-train-12"].syntax_nodes
127 | 
128 | To get a dictionary mapping identifiers for semantics nodes in the UDS
129 | graph to their attributes, you can use:
130 |    
131 | .. code-block:: python
132 |    
133 |    uds["ewt-train-12"].semantics_nodes   
134 | 
135 | To get a dictionary mapping identifiers for semantics edges (tuples of
136 | node identifiers) in the UDS graph to their attributes, you can use:
137 |   
138 | .. code-block:: python
139 |    
140 |    uds["ewt-train-12"].semantics_edges()
141 | 
142 | To get a dictionary mapping identifiers for semantics edges (tuples of
143 | node identifiers) in the UDS graph involving the predicate headed by
144 | the 7th token to their attributes, you can use:
145 |    
146 | .. code-block:: python  
147 |    
148 |    uds["ewt-train-12"].semantics_edges('ewt-train-12-semantics-pred-7')
149 | 
150 | To get a dictionary mapping identifiers for syntax edges (tuples of
151 | node identifiers) in the UDS graph to their attributes, you can use:
152 |    
153 | .. code-block:: python  
154 |    
155 |    uds["ewt-train-12"].syntax_edges()
156 | 
157 | And to get a dictionary mapping identifiers for syntax edges (tuples
158 | of node identifiers) in the UDS graph involving the node for the 7th
159 | token to their attributes, you can use:
160 |    
161 | .. code-block:: python  
162 |    
163 |    uds["ewt-train-12"].syntax_edges('ewt-train-12-syntax-7')
164 | 		
165 | 
166 | There are also methods for accessing relationships between semantics
167 | and syntax nodes. For example, you can get a tuple of the ordinal
168 | position for the head syntax node in the UDS graph that maps of the
169 | predicate headed by the 7th token in the corresponding sentence to a
170 | list of the form and lemma attributes for that token, you can use:
171 | 
172 | .. code-block:: python
173 | 
174 |    uds["ewt-train-12"].head('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
175 | 
176 | And if you want the same information for every token in the span, you
177 | can use:
178 |    
179 | .. code-block:: python
180 |    
181 |    uds["ewt-train-12"].span('ewt-train-12-semantics-pred-7', ['form', 'lemma'])
182 | 
183 | This will return a dictionary mapping ordinal position for syntax
184 | nodes in the UDS graph that make of the predicate headed by the 7th
185 | token in the corresponding sentence to a list of the form and lemma
186 | attributes for the corresponding tokens.
187 | 
188 | More complicated queries of a sentence-level UDS graph can be performed 
189 | using the ``query`` method, which accepts arbitrary SPARQL 1.1 queries. See
190 | :doc:`querying` for details.
191 | 
192 | Queries on document-level graphs are not currently supported. However, each
193 | `UDSDocument`_ does contain a number of useful attributes, including its ``genre``
194 | (corresponding to the English Web Treebank subcorpus); its ``text`` (as
195 | demonstrated above); its ``timestamp``; the ``sentence_ids`` of its 
196 | constituent sentences; and the sentence-level graphs (``sentence_graphs``) 
197 | associated with those sentences. Additionally, one can also look up the
198 | semantics node associated with a particular node in the document graph via
199 | the `semantics_node`_ instance method.
200 | 
201 | .. _UDSDocument: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument
202 | .. _semantics_node: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocument.semantics_node
203 | 
204 | 
205 | Lastly, iterables for the nodes and edges of a document-level graph may be
206 | accessed as follows:
207 | 
208 | 
209 | .. code-block:: python
210 | 
211 |    uds.documents["reviews-112579"].document_graph.nodes
212 |    uds.documents["reviews-112579"].document_graph.edges
213 | 
214 | 
215 | Unlike the nodes and edges in a sentence-level graph, the ones in a document-
216 | level graph all share a common (``document``) domain. By default, document
217 | graphs are initialized without edges and with one node for each semantics node
218 | in the sentence-level graphs associated with the constituent sentences. Edges
219 | may be added by supplying annotations (see :doc:`reading`).
220 | 


--------------------------------------------------------------------------------
/docs/source/tutorial/reading.rst:
--------------------------------------------------------------------------------
  1 | Reading the UDS dataset
  2 | =======================
  3 | 
  4 | The most straightforward way to read the Universal Decompositional
  5 | Semantics (UDS) dataset is to import it.
  6 | 
  7 | .. code-block:: python
  8 | 
  9 |    from decomp import UDSCorpus
 10 | 
 11 |    uds = UDSCorpus()
 12 | 
 13 | This loads a `UDSCorpus`_ object ``uds``, which contains all
 14 | graphs across all splits in the data.
 15 | 
 16 | .. _UDSCorpus: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus
 17 | 
 18 | As noted in :doc:`quick-start`, the first time you do read UDS, it
 19 | will take several minutes to complete while the dataset is built from
 20 | the `Universal Dependencies English Web Treebank`_ (UD-EWT), which is not
 21 | shipped with the package (but is downloaded automatically on import in
 22 | the background), and the `UDS annotations`_, which are shipped with
 23 | the package as package data. Normalized annotations are loaded by default.
 24 | To load raw annotations, specify ``"raw"`` as the argument to the UDSCorpus
 25 | ``annotation_format`` keyword arugment as follows:
 26 | 
 27 | .. code-block:: python
 28 | 
 29 |    from decomp import UDSCorpus
 30 | 
 31 |    uds = UDSCorpus(annotation_format="raw")
 32 | 
 33 | (See `Adding annotations`_ below for more detail on annotation types.)
 34 | Subsequent uses of the corpus will be faster after the initial build,
 35 | since the built dataset is cached.
 36 | 
 37 | .. _Universal Dependencies English Web Treebank: https://github.com/UniversalDependencies/UD_English-EWT
 38 | .. _UDS annotations: http://decomp.io/data/
 39 | 
 40 | Standard splits
 41 | ---------------
 42 | 
 43 | If you would rather read only the graphs in the training, development,
 44 | or test split, you can do that by specifying the ``split`` parameter
 45 | of ``UDSCorpus``.
 46 | 
 47 | .. code-block:: python
 48 | 
 49 |    from decomp import UDSCorpus
 50 | 
 51 |    # read the train split of the UDS corpus
 52 |    uds_train = UDSCorpus(split='train')
 53 | 
 54 | Adding annotations
 55 | ------------------
 56 |    
 57 | Additional annotations beyond the standard UDS annotations can be
 58 | added using this method by passing a list of `UDSAnnotation`_
 59 | objects. These annotations can be added at two levels: the sentence level
 60 | and the document level. Sentence-level annotations contain attributes of
 61 | `UDSSentenceGraph`_ nodes or edges. Document-level annotations contain
 62 | attributes for `UDSDocumentGraph`_ nodes or edges. Document-level
 63 | edge annotations may relate nodes associated with different sentences 
 64 | in a document, although they are added as annotations only to the
 65 | the appropriate `UDSDocumentGraph`_.
 66 | 
 67 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph
 68 | .. _UDSDocumentGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSDocumentGraph
 69 | .. _UDSAnnotation: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSAnnotation
 70 | 
 71 | Sentence-level and document-level annotations share the same two in-memory
 72 | representations: ``RawUDSDataset`` and ``NormalizedUDSDataset``. The former 
 73 | may have multiple annotations for the same node or edge attribute, while the
 74 | latter must have only a single annotation. Both are loaded from 
 75 | JSON-formatted files, but differ in the expected format (see the 
 76 | `from_json`_ methods of each class for formatting guidelines). For example,
 77 | if you have some additional *normalized* sentence-level annotations in a file
 78 | ``new_annotations.json``, those can be added to the existing UDS annotations 
 79 | using:
 80 | 
 81 | .. _NormalizedUDSDataset: ../package/decomp.semantics.uds.html#decomp.semantics.uds.NormalizedUDSDataset
 82 | .. _from_json: ../package/decomp.semantics.uds.html#decomp.semantics.uds.NormalizedUDSDataset.from_json
 83 | 
 84 | .. code-block:: python
 85 | 
 86 |    from decomp import NormalizedUDSDataset
 87 | 		
 88 |    # read annotations
 89 |    new_annotations = [NormalizedUDSDataset.from_json("new_annotations.json")]
 90 | 
 91 |    # read the train split of the UDS corpus and append new annotations
 92 |    uds_train_plus = UDSCorpus(split='train', sentence_annotations=new_annotations)
 93 | 
 94 | If instead you wished to add *raw* annotations (and supposing those
 95 | annotations were still in "new_annotations.json"), you would do the following:
 96 | 
 97 | .. code-block:: python
 98 | 
 99 |    from decomp import RawUDSDataset
100 | 		
101 |    # read annotations
102 |    new_annotations = [RawUDSDataset.from_json("new_annotations.json")]
103 | 
104 |    # read the train split of the UDS corpus and append new annotations
105 |    uds_train_plus = UDSCorpus(split='train', sentence_annotations=new_annotations,
106 |                               annotation_format="raw")
107 | 
108 | If ``new_annotations.json`` contained document-level annotations
109 | you would pass ``new_annotations.json`` to the constructor keyword 
110 | argument ``document_annotations`` instead of to ``sentence_annotations``.
111 | Importantly, these annotations are added *in addition* to the existing
112 | UDS annotations that ship with the toolkit. You do not need to add these
113 | manually.
114 | 
115 | Finally, it should be noted that querying is currently **not** supported 
116 | for document-level graphs or for sentence-level graphs containing raw
117 | annotations.
118 | 
119 | Reading from an alternative location
120 | ------------------------------------
121 | 
122 | If you would like to read the dataset from an alternative
123 | location—e.g. if you have serialized the dataset to JSON, using the
124 | `to_json`_ instance method—this can be accomplished using
125 | ``UDSCorpus`` class methods (see :doc:`serializing` for more
126 | information on serialization). For example, if you serialize
127 | ``uds_train`` to the files ``uds-ewt-sentences-train.json`` (for
128 | sentences) and ``uds-ewt-documents-train.json`` (for the documents),
129 | you can read it back into memory using:
130 | 
131 | .. _to_json: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus.to_json
132 | 
133 | .. code-block:: python
134 | 
135 |    # serialize uds_train to JSON
136 |    uds_train.to_json("uds-ewt-sentences-train.json", "uds-ewt-documents-train.json")
137 | 
138 |    # read JSON serialized uds_train
139 |    uds_train = UDSCorpus.from_json("uds-ewt-sentences-train.json", "uds-ewt-documents-train.json")   
140 | 
141 | Rebuilding the corpus
142 | ---------------------
143 |    
144 | If you would like to rebuild the corpus from the UD-EWT CoNLL files
145 | and some set of JSON-formatted annotation files, you can use the
146 | analogous `from_conll`_ class method. Importantly, unlike the
147 | standard instance initialization described above, the UDS annotations
148 | are *not* automatically added. For example, if ``en-ud-train.conllu``
149 | is in the current working directory and you have already loaded
150 | ``new_annotations`` as above, a corpus containing only those
151 | annotations (without the UDS annotations) can be loaded using:
152 | 
153 | .. _from_conll: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSCorpus.from_conll
154 | 
155 | .. code-block:: python
156 | 
157 |    # read the train split of the UD corpus and append new annotations
158 |    uds_train_annotated = UDSCorpus.from_conll("en-ud-train.conllu", sentence_annotations=new_annotations)   
159 | 
160 | This also means that if you only want the semantic graphs as implied
161 | by PredPatt (without annotations), you can use the ``from_conll``
162 | class method to load them.
163 | 
164 | .. code-block:: python
165 | 
166 |    # read the train split of the UD corpus
167 |    ud_train = UDSCorpus.from_conll("en-ud-train.conllu")   
168 | 
169 | Note that, because PredPatt is used for predicate-argument extraction,
170 | only versions of UD-EWT that are compatible with PredPatt can be used
171 | here. Version 1.2 is suggested.
172 |    
173 | Though other serialization formats are available (see
174 | :doc:`serializing`), these formats are not yet supported for reading.
175 | 


--------------------------------------------------------------------------------
/docs/source/tutorial/serializing.rst:
--------------------------------------------------------------------------------
 1 | Serializing the UDS dataset
 2 | ===========================
 3 | 
 4 | The canonical serialization format for the Universal Decompositional
 5 | Semantics (UDS) dataset is JSON. Sentence- and document-level graphs
 6 | are serialized separately. For example, if you wanted to serialize
 7 | the entire UDS dataset to the files ``uds-sentence.json`` (for
 8 | sentences) and ``uds-document.json`` (for documents), you would use:
 9 | 
10 | .. code-block:: python
11 | 
12 |    from decomp import uds
13 | 
14 |    uds.to_json("uds-sentence.json", "uds-document.json")
15 | 
16 | The particular format is based directly on the `adjacency_data`_
17 | method implemented in `NetworkX`_
18 | 
19 | .. _adjacency_data: https://networkx.github.io/documentation/stable/reference/readwrite/generated/networkx.readwrite.json_graph.adjacency_data.html#networkx.readwrite.json_graph.adjacency_data
20 | .. _NetworkX: https://github.com/networkx/networkx
21 | 
22 | For the sentence-level graphs only, in addition to this JSON format, 
23 | any serialization format supported by `RDFLib`_ can also be used by
24 | accessing the `rdf`_ attribute of each `UDSSentenceGraph`_ object.
25 | This attribute exposes an `rdflib.graph.Graph`_ object, which implements
26 | a `serialize`_ method. By default, this method outputs rdf/xml. The 
27 | ``format`` parameter can also be set to ``'n3'``, ``'turtle'``, 
28 | ``'nt'``, ``'pretty-xml'``, ``'trix'``, ``'trig'``, or ``'nquads'``;
29 | and additional formats, such as JSON-LD, can be supported by installing
30 | plugins for RDFLib.
31 | 
32 | .. _serialize: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.graph.Graph.serialize
33 | .. _rdf: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph.rdf
34 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph
35 | .. _rdflib.graph.Graph: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#graph-module
36 | 
37 | Before considering serialization to such a format, be aware that only
38 | the JSON format mentioned above can be read by the
39 | toolkit. Additionally, note that if your aim is to query the graphs in
40 | the corpus, this can be done using the `query`_ instance method in
41 | ``UDSSentenceGraph``. See :doc:`querying` for details.
42 | 
43 | .. _RDFLib: https://github.com/RDFLib/rdflib
44 | .. _query: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph.query
45 | 


--------------------------------------------------------------------------------
/docs/source/tutorial/visualization.rst:
--------------------------------------------------------------------------------
 1 | Visualizing UDS Graphs 
 2 | ======================
 3 | 
 4 | Decomp comes with a built-in interactive visualization tool using the `UDSVisualization`_ object. This object visualizes a `UDSSentenceGraph`_.
 5 | 
 6 | .. _UDSVisualization: ../package/decomp.vis.uds_vis.html#decomp.vis.uds_vis.UDSVisualization
 7 | .. _UDSSentenceGraph: ../package/decomp.semantics.uds.html#decomp.semantics.uds.UDSSentenceGraph
 8 | 
 9 | A visualization (which is based on `Dash`_) is served to your local browser via port 8050 (e.g. `http://localhost:8050`).
10 | The following snippet visualizes the first graph in the dev split: 
11 | 
12 | .. _Dash: https://dash.plotly.com
13 | 
14 |  
15 | .. code-block:: python
16 | 
17 |    graph = uds["ewt-dev-1"]
18 |    vis = UDSVisualization(graph)
19 |    vis.serve()
20 | 
21 | The browser window will look like this: 
22 | 
23 |  .. image:: assets/vis_no_syntax.png
24 | 
25 | Black edges indicate edges in the semantic graph, while gray arrows are instance edges between semantics and syntax nodes. 
26 | Thick gray arrows indicate the syntactic head of a semantic argument or predicate. 
27 | Semantics nodes have a thick outline when they are annotated with decomp properties. 
28 | Hovering over such a node will reveal the annotations in a pop-out window. 
29 | 
30 |  .. image:: assets/vis_node_props_no_syntax.png
31 | 
32 | Similarly, yellow boxes on edges indicate protorole annotations, and can be hovered over to reveal their values.
33 | 
34 |  .. image:: assets/vis_protoroles_no_syntax.png
35 | 
36 | Using the checkboxes at the top left, annotation subspaces can be selected and de-selected. 
37 | If all the annotations for a node or edge are de-selected, it will become non-bolded or disappear 
38 | 
39 |  .. image:: assets/vis_no_protoroles_no_syntax.png
40 | 
41 | 
42 | Several options can be supplied to a visualization via arguments. For example, we can visualize the syntactic parse along with the semantic parse by setting 
43 | 
44 | .. code-block:: python
45 | 
46 |    vis = UDSVisualization(graph, add_syntax_edges = True)
47 | 
48 | which results in the following visualization. 
49 | 
50 | 
51 |  .. image:: assets/vis_syntax.png
52 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | requests==2.22.0
 2 | networkx>=2.5.1
 3 | memoized_property==1.0.3
 4 | typing==3.6.2
 5 | rdflib==4.2.2
 6 | setuptools>=52.0.0
 7 | numpy>=1.16.4
 8 | pyparsing==2.2.0
 9 | overrides==3.1.0
10 | http://github.com/hltcoe/PredPatt/tarball/master#egg=predpatt
11 | dash[testing]==1.9.1
12 | selenium==3.141.0
13 | jsonpickle==1.4.1
14 | pytest==6.2.2
15 | matplotlib==3.2.1
16 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | setup(name='decomp',
 4 |       version='0.2.2',
 5 |       description='Toolkit for working with Universal\
 6 |                    Decompositional Semantics graphs',
 7 |       url='https://decomp.io/',
 8 |       author='Aaron Steven White',
 9 |       author_email='aaron.white@rochester.edu',
10 |       license='MIT',
11 |       packages=find_packages(),
12 |       package_dir={'decomp': 'decomp'},
13 |       package_data={'decomp': ['data/*']},
14 |       install_requires=['requests==2.22.0',
15 |                         'networkx>=2.5.1',
16 |                         'memoized_property==1.0.3',
17 |                         'overrides==3.1.0',
18 |                         'typing==3.6.2',
19 |                         'rdflib==4.2.2',
20 |                         'setuptools>=52.0.0',
21 |                         'numpy>=1.16.4',
22 |                         'pyparsing==2.2.0',
23 |                         'predpatt @ http://github.com/hltcoe/PredPatt/tarball/master#egg=predpatt'],
24 |       test_suite='nose.collector',
25 |       tests_require=['nose'],
26 |       include_package_data=True,
27 |       zip_safe=False)
28 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | This directory contains the tests for the [Decomp
 2 | toolkit](https://github.com/decompositional-semantics-initiative/decomp). Theses
 3 | tests use the [`pytest` framework](https://docs.pytest.org/).
 4 | 
 5 | # Installation
 6 | 
 7 | To run the tests in this directory, ensure that both the toolkit and
 8 | `pytest` are installed.
 9 | 
10 | ```bash
11 | pip install --user pytest==6.0.* git+git://github.com/decompositional-semantics-initiative/decomp.git
12 | ```
13 | 
14 | # Running the test suite
15 | 
16 | The entire test suite can be run from the root directory of the
17 | toolkit installation using:
18 | 
19 | ```bash
20 | pytest
21 | ```
22 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import os
 4 | 
 5 | from decomp.semantics.uds.annotation import NormalizedUDSAnnotation
 6 | from decomp.semantics.uds.annotation import RawUDSAnnotation
 7 | 
 8 | def pytest_configure(config):
 9 |     config.addinivalue_line(
10 |         "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')"
11 |     )
12 | 
13 | def pytest_addoption(parser):
14 |     parser.addoption(
15 |         "--runslow", action="store_true", default=False, help="run slow tests"
16 |     )
17 | 
18 | def pytest_collection_modifyitems(config, items):
19 |     if config.getoption("--runslow"):
20 |         # --runslow given in cli: do not skip slow tests
21 |         return
22 | 
23 |     skip_slow = pytest.mark.skip(reason="need --runslow option to run")
24 | 
25 |     for item in items:
26 |         if "slow" in item.keywords:
27 |             item.add_marker(skip_slow)    
28 | 
29 | @pytest.fixture
30 | def test_dir():
31 |     return os.path.dirname(os.path.abspath(__file__))
32 | 
33 | @pytest.fixture
34 | def test_data_dir(test_dir):
35 |     return os.path.join(test_dir, 'data/')
36 | 
37 | 
38 | @pytest.fixture
39 | def normalized_node_sentence_annotation(test_data_dir):
40 |     fpath = os.path.join(test_data_dir, 'normalized_node_sentence_annotation.json')
41 | 
42 |     with open(fpath) as f:
43 |         return f.read()
44 | 
45 | @pytest.fixture
46 | def normalized_edge_sentence_annotation(test_data_dir):
47 |     fpath = os.path.join(test_data_dir, 'normalized_edge_sentence_annotation.json')
48 | 
49 |     with open(fpath) as f:
50 |         return f.read()
51 | 
52 | @pytest.fixture
53 | def normalized_sentence_annotations(normalized_node_sentence_annotation,
54 |                                     normalized_edge_sentence_annotation):
55 |     norm_node_ann = NormalizedUDSAnnotation.from_json(normalized_node_sentence_annotation)
56 |     norm_edge_ann = NormalizedUDSAnnotation.from_json(normalized_edge_sentence_annotation)
57 | 
58 |     return norm_node_ann, norm_edge_ann
59 | 
60 | @pytest.fixture
61 | def raw_node_sentence_annotation(test_data_dir):
62 |     fpath = os.path.join(test_data_dir, 'raw_node_sentence_annotation.json')
63 | 
64 |     with open(fpath) as f:
65 |         return f.read()
66 | 
67 | @pytest.fixture
68 | def raw_edge_sentence_annotation(test_data_dir):
69 |     fpath = os.path.join(test_data_dir, 'raw_edge_sentence_annotation.json')
70 | 
71 |     with open(fpath) as f:
72 |         return f.read()
73 | 
74 | @pytest.fixture
75 | def raw_sentence_annotations(raw_node_sentence_annotation,
76 |                              raw_edge_sentence_annotation):
77 |     raw_node_ann = RawUDSAnnotation.from_json(raw_node_sentence_annotation)
78 |     raw_edge_ann = RawUDSAnnotation.from_json(raw_edge_sentence_annotation)
79 | 
80 |     return raw_node_ann, raw_edge_ann
81 | 


--------------------------------------------------------------------------------
/tests/data/normalized_edge_document_annotation.json:
--------------------------------------------------------------------------------
1 | {"answers-20111105112131AA6gIX6_ans": {"ewt-train-7192-document-pred-20%%ewt-train-7192-document-arg-2": {"protoroles": {"instigation": {"confidence": 1.0, "value": -0.0}, "change_of_possession": {"confidence": 1.0, "value": -0.0}, "existed_before": {"confidence": 0.6796, "value": 0.0111}, "was_for_benefit": {"confidence": 1.0, "value": -0.0}, "change_of_state_continuous": {"confidence": 0.1675, "value": 0.0032}, "change_of_state": {"confidence": 0.1675, "value": 0.0032}, "volition": {"confidence": 1.0, "value": -0.0}, "change_of_location": {"confidence": 1.0, "value": -0.0}, "partitive": {"confidence": 0.564, "value": -0.0941}, "existed_during": {"confidence": 1.0, "value": 1.3421}, "existed_after": {"confidence": 0.6796, "value": 0.0111}, "awareness": {"confidence": 1.0, "value": -0.0}, "sentient": {"confidence": 1.0, "value": -0.9348}, "was_used": {"confidence": 0.564, "value": -0.0}}}, "ewt-train-7192-document-pred-25%%ewt-train-7191-document-arg-18": {"protoroles": {"instigation": {"confidence": 1.0, "value": 1.3557}, "change_of_possession": {"confidence": 0.7724, "value": -0.0}, "existed_before": {"confidence": 1.0, "value": 1.3527}, "was_for_benefit": {"confidence": 0.1976, "value": -0.0504}, "change_of_state_continuous": {"confidence": 1.0, "value": -0.0}, "change_of_state": {"confidence": 0.2067, "value": -0.0548}, "volition": {"confidence": 1.0, "value": 1.3545}, "change_of_location": {"confidence": 0.272, "value": -0.0922}, "partitive": {"confidence": 0.1148, "value": -0.0018}, "existed_during": {"confidence": 1.0, "value": 1.3557}, "existed_after": {"confidence": 1.0, "value": 1.3527}, "awareness": {"confidence": 1.0, "value": 1.3526}, "sentient": {"confidence": 1.0, "value": 1.354}, "was_used": {"confidence": 0.4373, "value": -0.0207}}}, "ewt-train-7192-document-pred-20%%ewt-train-7190-document-arg-3": {"protoroles": {"instigation": {"confidence": 1.0, "value": -1.5074}, "change_of_possession": {"confidence": 1.0, "value": -0.3909}, "existed_before": {"confidence": 1.0, "value": 1.3954}, "was_for_benefit": {"confidence": 0.3418, "value": 0.0008}, "change_of_state_continuous": {"confidence": 0.0791, "value": -0.0351}, "change_of_state": {"confidence": 0.3333, "value": -0.0085}, "volition": {"confidence": 1.0, "value": -0.3909}, "change_of_location": {"confidence": 0.1395, "value": -0.0549}, "partitive": {"confidence": 0.0791, "value": -0.1354}, "existed_during": {"confidence": 1.0, "value": 1.3959}, "existed_after": {"confidence": 0.6567, "value": 0.124}, "awareness": {"confidence": 0.1395, "value": -0.0549}, "sentient": {"confidence": 1.0, "value": -1.508}, "was_used": {"confidence": 0.3333, "value": -0.0085}}}}}
2 | 


--------------------------------------------------------------------------------
/tests/data/normalized_edge_sentence_annotation.json:
--------------------------------------------------------------------------------
1 | {"metadata": {"protoroles": {"awareness": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_location": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_possession": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_state": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "change_of_state_continuous": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "existed_after": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "existed_before": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "existed_during": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "instigation": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "location": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "manner": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "partitive": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "purpose": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "sentient": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "time": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "volition": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "was_for_benefit": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "was_used": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}}}, "data": {"tree1": {"tree1-semantics-pred-11%%tree1-semantics-arg-13": {"protoroles": {"instigation": {"confidence": 1.0, "value": -0.0}, "change_of_possession": {"confidence": 1.0, "value": -0.0}, "existed_before": {"confidence": 0.6796, "value": 0.0111}, "was_for_benefit": {"confidence": 1.0, "value": -0.0}, "change_of_state_continuous": {"confidence": 0.1675, "value": 0.0032}, "change_of_state": {"confidence": 0.1675, "value": 0.0032}, "volition": {"confidence": 1.0, "value": -0.0}, "change_of_location": {"confidence": 1.0, "value": -0.0}, "partitive": {"confidence": 0.564, "value": -0.0941}, "existed_during": {"confidence": 1.0, "value": 1.3421}, "existed_after": {"confidence": 0.6796, "value": 0.0111}, "awareness": {"confidence": 1.0, "value": -0.0}, "sentient": {"confidence": 1.0, "value": -0.9348}, "was_used": {"confidence": 0.564, "value": -0.0}}}, "tree1-semantics-pred-7%%tree1-semantics-arg-3": {"protoroles": {"instigation": {"confidence": 1.0, "value": 1.3557}, "change_of_possession": {"confidence": 0.7724, "value": -0.0}, "existed_before": {"confidence": 1.0, "value": 1.3527}, "was_for_benefit": {"confidence": 0.1976, "value": -0.0504}, "change_of_state_continuous": {"confidence": 1.0, "value": -0.0}, "change_of_state": {"confidence": 0.2067, "value": -0.0548}, "volition": {"confidence": 1.0, "value": 1.3545}, "change_of_location": {"confidence": 0.272, "value": -0.0922}, "partitive": {"confidence": 0.1148, "value": -0.0018}, "existed_during": {"confidence": 1.0, "value": 1.3557}, "existed_after": {"confidence": 1.0, "value": 1.3527}, "awareness": {"confidence": 1.0, "value": 1.3526}, "sentient": {"confidence": 1.0, "value": 1.354}, "was_used": {"confidence": 0.4373, "value": -0.0207}}}, "tree1-semantics-pred-11%%tree1-semantics-arg-9": {"protoroles": {"instigation": {"confidence": 1.0, "value": -1.5074}, "change_of_possession": {"confidence": 1.0, "value": -0.3909}, "existed_before": {"confidence": 1.0, "value": 1.3954}, "was_for_benefit": {"confidence": 0.3418, "value": 0.0008}, "change_of_state_continuous": {"confidence": 0.0791, "value": -0.0351}, "change_of_state": {"confidence": 0.3333, "value": -0.0085}, "volition": {"confidence": 1.0, "value": -0.3909}, "change_of_location": {"confidence": 0.1395, "value": -0.0549}, "partitive": {"confidence": 0.0791, "value": -0.1354}, "existed_during": {"confidence": 1.0, "value": 1.3959}, "existed_after": {"confidence": 0.6567, "value": 0.124}, "awareness": {"confidence": 0.1395, "value": -0.0549}, "sentient": {"confidence": 1.0, "value": -1.508}, "was_used": {"confidence": 0.3333, "value": -0.0085}}}}}}
2 | 


--------------------------------------------------------------------------------
/tests/data/normalized_node_document_annotation.json:
--------------------------------------------------------------------------------
1 | {"answers-20111105112131AA6gIX6_ans": {"ewt-train-7189-document-arg-2": {"genericity": {"arg-kind": {"confidence": 1.0, "value": 1.1619}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7192-document-pred-25": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.54}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "ewt-train-7191-document-arg-18": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7192-document-pred-20": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.5399}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "ewt-train-7192-document-pred-20": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": -1.5399}, "pred-hypothetical": {"confidence": 1.0, "value": 0.7748}, "pred-particular": {"confidence": 1.0, "value": -1.54}}}, "ewt-train-7194-document-arg-13": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7194-document-arg-1": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "ewt-train-7192-document-arg-2": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}}}
2 | 


--------------------------------------------------------------------------------
/tests/data/normalized_node_sentence_annotation.json:
--------------------------------------------------------------------------------
1 | {"metadata": {"genericity": {"pred-dynamic": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "pred-hypothetical": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "pred-particular": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "arg-abstract": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "arg-kind": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}, "arg-particular": {"value": {"datatype": "float"}, "confidence": {"datatype": "float"}}}}, "data": {"tree1": {"tree1-semantics-arg-15": {"genericity": {"arg-kind": {"confidence": 1.0, "value": 1.1619}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-pred-7": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.54}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "tree1-semantics-arg-3": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-pred-11": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": 0.7748}, "pred-hypothetical": {"confidence": 1.0, "value": -1.5399}, "pred-particular": {"confidence": 1.0, "value": 0.7748}}}, "tree1-semantics-pred-20": {"genericity": {"pred-dynamic": {"confidence": 1.0, "value": -1.5399}, "pred-hypothetical": {"confidence": 1.0, "value": 0.7748}, "pred-particular": {"confidence": 1.0, "value": -1.54}}}, "tree1-semantics-arg-23": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-arg-9": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}, "tree1-semantics-arg-13": {"genericity": {"arg-kind": {"confidence": 1.0, "value": -1.147}, "arg-abstract": {"confidence": 1.0, "value": -1.147}, "arg-particular": {"confidence": 1.0, "value": 1.1619}}}}}}
2 | 


--------------------------------------------------------------------------------
/tests/data/raw_edge_sentence_annotators.txt:
--------------------------------------------------------------------------------
 1 | protoroles-annotator-0
 2 | protoroles-annotator-1
 3 | protoroles-annotator-10
 4 | protoroles-annotator-11
 5 | protoroles-annotator-12
 6 | protoroles-annotator-13
 7 | protoroles-annotator-14
 8 | protoroles-annotator-15
 9 | protoroles-annotator-16
10 | protoroles-annotator-17
11 | protoroles-annotator-18
12 | protoroles-annotator-19
13 | protoroles-annotator-2
14 | protoroles-annotator-20
15 | protoroles-annotator-21
16 | protoroles-annotator-22
17 | protoroles-annotator-23
18 | protoroles-annotator-24
19 | protoroles-annotator-25
20 | protoroles-annotator-26
21 | protoroles-annotator-27
22 | protoroles-annotator-28
23 | protoroles-annotator-29
24 | protoroles-annotator-3
25 | protoroles-annotator-30
26 | protoroles-annotator-31
27 | protoroles-annotator-32
28 | protoroles-annotator-33
29 | protoroles-annotator-34
30 | protoroles-annotator-35
31 | protoroles-annotator-36
32 | protoroles-annotator-37
33 | protoroles-annotator-38
34 | protoroles-annotator-39
35 | protoroles-annotator-4
36 | protoroles-annotator-40
37 | protoroles-annotator-41
38 | protoroles-annotator-42
39 | protoroles-annotator-43
40 | protoroles-annotator-44
41 | protoroles-annotator-45
42 | protoroles-annotator-5
43 | protoroles-annotator-6
44 | protoroles-annotator-7
45 | protoroles-annotator-8
46 | protoroles-annotator-9
47 | 


--------------------------------------------------------------------------------
/tests/data/rawtree.conllu:
--------------------------------------------------------------------------------
 1 | 1	The	the	DET	DT	Definite=Def|PronType=Art	3	det	_	_
 2 | 2	police	police	NOUN	NN	Number=Sing	3	compound	_	_
 3 | 3	commander	commander	NOUN	NN	Number=Sing	7	nsubj	_	_
 4 | 4	of	of	ADP	IN	_	6	case	_	_
 5 | 5	Ninevah	Ninevah	PROPN	NNP	Number=Sing	6	compound	_	_
 6 | 6	Province	Province	PROPN	NNP	Number=Sing	3	nmod	_	_
 7 | 7	announced	announce	VERB	VBD	Mood=Ind|Tense=Past|VerbForm=Fin	0	root	_	_
 8 | 8	that	that	SCONJ	IN	_	11	mark	_	_
 9 | 9	bombings	bombing	NOUN	NNS	Number=Plur	11	nsubj	_	_
10 | 10	had	have	AUX	VBD	Mood=Ind|Tense=Past|VerbForm=Fin	11	aux	_	_
11 | 11	declined	decline	VERB	VBN	Tense=Past|VerbForm=Part	7	ccomp	_	_
12 | 12	80	80	NUM	CD	NumType=Card	13	nummod	_	_
13 | 13	percent	percent	NOUN	NN	Number=Sing	11	dobj	_	_
14 | 14	in	in	ADP	IN	_	15	case	_	_
15 | 15	Mosul	Mosul	PROPN	NNP	Number=Sing	11	nmod	_	SpaceAfter=No
16 | 16	,	,	PUNCT	,	_	11	punct	_	_
17 | 17	whereas	whereas	SCONJ	IN	_	20	mark	_	_
18 | 18	there	there	PRON	EX	_	20	expl	_	_
19 | 19	had	have	AUX	VBD	Mood=Ind|Tense=Past|VerbForm=Fin	20	aux	_	_
20 | 20	been	be	VERB	VBN	Tense=Past|VerbForm=Part	11	advcl	_	_
21 | 21	a	a	DET	DT	Definite=Ind|PronType=Art	23	det	_	_
22 | 22	big	big	ADJ	JJ	Degree=Pos	23	amod	_	_
23 | 23	jump	jump	NOUN	NN	Number=Sing	20	nsubj	_	_
24 | 24	in	in	ADP	IN	_	26	case	_	_
25 | 25	the	the	DET	DT	Definite=Def|PronType=Art	26	det	_	_
26 | 26	number	number	NOUN	NN	Number=Sing	23	nmod	_	_
27 | 27	of	of	ADP	IN	_	28	case	_	_
28 | 28	kidnappings	kidnapping	NOUN	NNS	Number=Plur	26	nmod	_	SpaceAfter=No
29 | 29	.	.	PUNCT	.	_	7	punct	_	_


--------------------------------------------------------------------------------
/tests/data/vis_data.json:
--------------------------------------------------------------------------------
1 | {"directed": true, "multigraph": false, "graph": [["name", "ewt-dev-1"]], "nodes": [{"domain": "syntax", "type": "token", "position": 1, "form": "From", "lemma": "from", "upos": "ADP", "xpos": "IN", "id": "ewt-dev-1-syntax-1"}, {"domain": "syntax", "type": "token", "position": 2, "form": "the", "lemma": "the", "upos": "DET", "xpos": "DT", "Definite": "Def", "PronType": "Art", "id": "ewt-dev-1-syntax-2"}, {"domain": "syntax", "type": "token", "position": 3, "form": "AP", "lemma": "AP", "upos": "PROPN", "xpos": "NNP", "Number": "Sing", "id": "ewt-dev-1-syntax-3"}, {"domain": "syntax", "type": "token", "position": 4, "form": "comes", "lemma": "come", "upos": "VERB", "xpos": "VBZ", "Mood": "Ind", "Number": "Sing", "Person": "3", "Tense": "Pres", "VerbForm": "Fin", "id": "ewt-dev-1-syntax-4"}, {"domain": "syntax", "type": "token", "position": 5, "form": "this", "lemma": "this", "upos": "DET", "xpos": "DT", "Number": "Sing", "PronType": "Dem", "id": "ewt-dev-1-syntax-5"}, {"domain": "syntax", "type": "token", "position": 6, "form": "story", "lemma": "story", "upos": "NOUN", "xpos": "NN", "Number": "Sing", "id": "ewt-dev-1-syntax-6"}, {"domain": "syntax", "type": "token", "position": 7, "form": ":", "lemma": ":", "upos": "PUNCT", "xpos": ":", "id": "ewt-dev-1-syntax-7"}, {"position": 0, "domain": "root", "type": "root", "id": "ewt-dev-1-root-0"}, {"domain": "semantics", "frompredpatt": true, "type": "predicate", "factuality": {"factual": {"confidence": 1.0, "value": 0.967}}, "time": {"dur-weeks": {"confidence": 0.2564, "value": -1.3247}, "dur-decades": {"confidence": 0.2564, "value": -1.1146}, "dur-days": {"confidence": 0.2564, "value": 0.8558}, "dur-hours": {"confidence": 0.2564, "value": 0.9952}, "dur-seconds": {"confidence": 0.2564, "value": 0.8931}, "dur-forever": {"confidence": 0.2564, "value": -1.4626}, "dur-centuries": {"confidence": 0.2564, "value": -1.1688}, "dur-instant": {"confidence": 0.2564, "value": -1.4106}, "dur-years": {"confidence": 0.2564, "value": 0.9252}, "dur-minutes": {"confidence": 0.2564, "value": -0.9337}, "dur-months": {"confidence": 0.2564, "value": -1.2142}}, "genericity": {"pred-dynamic": {"confidence": 0.627, "value": -0.0469}, "pred-hypothetical": {"confidence": 0.5067, "value": -0.0416}, "pred-particular": {"confidence": 1.0, "value": 1.1753}}, "id": "ewt-dev-1-semantics-pred-4"}, {"domain": "semantics", "frompredpatt": true, "type": "argument", "genericity": {"arg-kind": {"confidence": 1.0, "value": -1.1642}, "arg-abstract": {"confidence": 1.0, "value": -1.1642}, "arg-particular": {"confidence": 1.0, "value": 1.2257}}, "id": "ewt-dev-1-semantics-arg-3"}, {"domain": "semantics", "frompredpatt": true, "type": "argument", "wordsense": {"supersense-noun.object": {"confidence": 1.0, "value": -3.0}, "supersense-noun.Tops": {"confidence": 1.0, "value": -3.0}, "supersense-noun.quantity": {"confidence": 1.0, "value": -3.0}, "supersense-noun.feeling": {"confidence": 1.0, "value": -3.0}, "supersense-noun.food": {"confidence": 1.0, "value": -3.0}, "supersense-noun.shape": {"confidence": 1.0, "value": -3.0}, "supersense-noun.event": {"confidence": 1.0, "value": -3.0}, "supersense-noun.motive": {"confidence": 1.0, "value": -3.0}, "supersense-noun.substance": {"confidence": 1.0, "value": -3.0}, "supersense-noun.time": {"confidence": 1.0, "value": -3.0}, "supersense-noun.person": {"confidence": 1.0, "value": -3.0}, "supersense-noun.process": {"confidence": 1.0, "value": -3.0}, "supersense-noun.attribute": {"confidence": 1.0, "value": -3.0}, "supersense-noun.artifact": {"confidence": 1.0, "value": -1.3996}, "supersense-noun.group": {"confidence": 1.0, "value": -3.0}, "supersense-noun.animal": {"confidence": 1.0, "value": -3.0}, "supersense-noun.location": {"confidence": 1.0, "value": -3.0}, "supersense-noun.plant": {"confidence": 1.0, "value": -3.0}, "supersense-noun.possession": {"confidence": 1.0, "value": -3.0}, "supersense-noun.relation": {"confidence": 1.0, "value": -3.0}, "supersense-noun.phenomenon": {"confidence": 1.0, "value": -3.0}, "supersense-noun.cognition": {"confidence": 1.0, "value": -3.0}, "supersense-noun.act": {"confidence": 1.0, "value": -3.0}, "supersense-noun.state": {"confidence": 1.0, "value": -3.0}, "supersense-noun.communication": {"confidence": 1.0, "value": 0.2016}, "supersense-noun.body": {"confidence": 1.0, "value": -3.0}}, "genericity": {"arg-kind": {"confidence": 0.7138, "value": -0.035}, "arg-abstract": {"confidence": 1.0, "value": -1.1685}, "arg-particular": {"confidence": 1.0, "value": 1.2257}}, "id": "ewt-dev-1-semantics-arg-6"}, {"domain": "semantics", "type": "predicate", "frompredpatt": false, "id": "ewt-dev-1-semantics-pred-root"}, {"domain": "semantics", "type": "argument", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-0"}, {"domain": "semantics", "type": "argument", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-author"}, {"domain": "semantics", "type": "argument", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-addressee"}], "adjacency": [[], [], [{"deprel": "case", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-1"}, {"deprel": "det", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-2"}], [{"deprel": "nmod", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-3"}, {"deprel": "nsubj", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-6"}, {"deprel": "punct", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-7"}], [], [{"deprel": "det", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-5"}], [], [{"deprel": "root", "domain": "syntax", "type": "dependency", "id": "ewt-dev-1-syntax-4"}], [{"domain": "interface", "type": "head", "id": "ewt-dev-1-syntax-4"}, {"domain": "interface", "type": "nonhead", "id": "ewt-dev-1-syntax-1"}, {"domain": "semantics", "type": "dependency", "frompredpatt": true, "protoroles": {"manner": {"confidence": 1.0, "value": -1.3932}, "location": {"confidence": 1.0, "value": 1.4353}, "time": {"confidence": 1.0, "value": -1.3913}, "purpose": {"confidence": 1.0, "value": -1.3941}}, "id": "ewt-dev-1-semantics-arg-3"}, {"domain": "semantics", "type": "dependency", "frompredpatt": true, "protoroles": {"instigation": {"confidence": 0.1128, "value": 0.0458}, "change_of_possession": {"confidence": 0.7669, "value": -0.0561}, "existed_before": {"confidence": 0.1128, "value": 0.1096}, "was_for_benefit": {"confidence": 0.7669, "value": -0.1343}, "change_of_state_continuous": {"confidence": 1.0, "value": -0.0}, "change_of_state": {"confidence": 0.7669, "value": -0.1343}, "volition": {"confidence": 0.3073, "value": -0.0}, "change_of_location": {"confidence": 0.7669, "value": -0.0561}, "partitive": {"confidence": 0.5736, "value": -0.2656}, "existed_during": {"confidence": 0.4211, "value": 0.236}, "existed_after": {"confidence": 0.4211, "value": 0.236}, "awareness": {"confidence": 0.7669, "value": -0.0}, "sentient": {"confidence": 0.4612, "value": -0.3556}, "was_used": {"confidence": 0.013, "value": -0.0204}}, "id": "ewt-dev-1-semantics-arg-6"}], [{"domain": "interface", "type": "head", "id": "ewt-dev-1-syntax-3"}, {"domain": "interface", "type": "nonhead", "id": "ewt-dev-1-syntax-2"}], [{"domain": "interface", "type": "head", "id": "ewt-dev-1-syntax-6"}, {"domain": "interface", "type": "nonhead", "id": "ewt-dev-1-syntax-5"}], [{"domain": "semantics", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-0"}, {"domain": "semantics", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-author"}, {"domain": "semantics", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-semantics-arg-addressee"}], [{"domain": "semantics", "type": "head", "frompredpatt": false, "id": "ewt-dev-1-semantics-pred-4"}, {"domain": "interface", "type": "dependency", "frompredpatt": false, "id": "ewt-dev-1-root-0"}], [], []]}


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest==6.0.*
2 | 


--------------------------------------------------------------------------------
/tests/test_dependency.py:
--------------------------------------------------------------------------------
 1 | from numpy import array
 2 | from networkx import DiGraph
 3 | from decomp.syntax.dependency import DependencyGraphBuilder, CoNLLDependencyTreeCorpus
 4 | 
 5 | rawtree = '''1       I       I       PRON    PRP     Case=Nom|Number=Sing|Person=1|PronType=Prs      4       nsubj   _       _
 6 | 2       ca      can     AUX     MD      VerbForm=Fin    4       aux     _       SpaceAfter=No
 7 | 3       n't     not     PART    RB      _       4       advmod  _       _
 8 | 4       imagine imagine VERB    VB      VerbForm=Inf    0       root    _       _
 9 | 5       they    they    PRON    PRP     Case=Nom|Number=Plur|Person=3|PronType=Prs      6       nsubj   _       _
10 | 6       wanted  want    VERB    VBD     Mood=Ind|Tense=Past|VerbForm=Fin        4       ccomp   _       _
11 | 7       to      to      PART    TO      _       8       mark    _       _
12 | 8       do      do      VERB    VB      VerbForm=Inf    6       xcomp   _       _
13 | 9       this    this    PRON    DT      Number=Sing|PronType=Dem        8       obj     _       SpaceAfter=No
14 | 10      .       .       PUNCT   .       _       4       punct   _       _'''
15 | 
16 | sentence = "I ca n't imagine they wanted to do this ."
17 | 
18 | listtree = [l.split() for l in rawtree.split('\n')]
19 | 
20 | 
21 | def setup_tree():
22 |     # build and extract tree
23 |     graph = DependencyGraphBuilder().from_conll(listtree, 'tree1')
24 | 
25 |     return graph
26 | 
27 | 
28 | def setup_corpus():
29 |     listtrees = {'tree1': listtree,
30 |                  'tree2': listtree}
31 | 
32 |     corpus = CoNLLDependencyTreeCorpus(listtrees)
33 | 
34 |     return corpus
35 | 
36 | 
37 | # could use @nose.with_setup
38 | def test_dependency_tree_builder():
39 |     tree = setup_tree()
40 | 
41 |     assert tree.name == 'tree1'
42 |     assert (tree.graph['conll'] == array(listtree)).all()
43 | 
44 |     print(tree.nodes['tree1-root-0'])
45 |     # test the root
46 |     # test syntax nodes
47 |     assert tree.nodes['tree1-root-0'] == {'position': 0,
48 |                                           'domain': 'root',
49 |                                           'type': 'root'}
50 | 
51 |     for idx, node in tree.nodes.items():
52 |         for row in listtree:
53 |             if int(row[0]) == idx:
54 |                 assert node['form'] == row[1]
55 |                 assert node['lemma'] == row[2]
56 |                 assert node['upos'] == row[3]
57 |                 assert node['xpos'] == row[4]
58 | 
59 |     for (idx1, idx2), edge in tree.edges.items():
60 |         for row in listtree:
61 |             if int(row[0]) == idx2:
62 |                 assert int(row[6]) == idx1
63 |                 assert row[7] == edge['deprel']
64 | 
65 | 
66 | def test_dependency_tree_corpus():
67 |     corpus = setup_corpus()
68 | 
69 |     assert all([isinstance(t, DiGraph) for gid, t in corpus.graphs.items()])
70 |     assert all([isinstance(t, DiGraph) for gid, t in corpus.items()])
71 |     assert all([isinstance(gid, str) for gid in corpus])
72 | 


--------------------------------------------------------------------------------
/tests/test_predpatt.py:
--------------------------------------------------------------------------------
  1 | from io import StringIO
  2 | from networkx import DiGraph
  3 | from predpatt import load_conllu, PredPatt, PredPattOpts
  4 | from decomp.syntax.dependency import DependencyGraphBuilder
  5 | from decomp.semantics.predpatt import PredPattCorpus, PredPattGraphBuilder
  6 | 
  7 | rawtree = '''1	The	the	DET	DT	Definite=Def|PronType=Art	3	det	_	_
  8 | 2	police	police	NOUN	NN	Number=Sing	3	compound	_	_
  9 | 3	commander	commander	NOUN	NN	Number=Sing	7	nsubj	_	_
 10 | 4	of	of	ADP	IN	_	6	case	_	_
 11 | 5	Ninevah	Ninevah	PROPN	NNP	Number=Sing	6	compound	_	_
 12 | 6	Province	Province	PROPN	NNP	Number=Sing	3	nmod	_	_
 13 | 7	announced	announce	VERB	VBD	Mood=Ind|Tense=Past|VerbForm=Fin	0	root	_	_
 14 | 8	that	that	SCONJ	IN	_	11	mark	_	_
 15 | 9	bombings	bombing	NOUN	NNS	Number=Plur	11	nsubj	_	_
 16 | 10	had	have	AUX	VBD	Mood=Ind|Tense=Past|VerbForm=Fin	11	aux	_	_
 17 | 11	declined	decline	VERB	VBN	Tense=Past|VerbForm=Part	7	ccomp	_	_
 18 | 12	80	80	NUM	CD	NumType=Card	13	nummod	_	_
 19 | 13	percent	percent	NOUN	NN	Number=Sing	11	dobj	_	_
 20 | 14	in	in	ADP	IN	_	15	case	_	_
 21 | 15	Mosul	Mosul	PROPN	NNP	Number=Sing	11	nmod	_	SpaceAfter=No
 22 | 16	,	,	PUNCT	,	_	11	punct	_	_
 23 | 17	whereas	whereas	SCONJ	IN	_	20	mark	_	_
 24 | 18	there	there	PRON	EX	_	20	expl	_	_
 25 | 19	had	have	AUX	VBD	Mood=Ind|Tense=Past|VerbForm=Fin	20	aux	_	_
 26 | 20	been	be	VERB	VBN	Tense=Past|VerbForm=Part	11	advcl	_	_
 27 | 21	a	a	DET	DT	Definite=Ind|PronType=Art	23	det	_	_
 28 | 22	big	big	ADJ	JJ	Degree=Pos	23	amod	_	_
 29 | 23	jump	jump	NOUN	NN	Number=Sing	20	nsubj	_	_
 30 | 24	in	in	ADP	IN	_	26	case	_	_
 31 | 25	the	the	DET	DT	Definite=Def|PronType=Art	26	det	_	_
 32 | 26	number	number	NOUN	NN	Number=Sing	23	nmod	_	_
 33 | 27	of	of	ADP	IN	_	28	case	_	_
 34 | 28	kidnappings	kidnapping	NOUN	NNS	Number=Plur	26	nmod	_	SpaceAfter=No
 35 | 29	.	.	PUNCT	.	_	7	punct	_	_'''
 36 | 
 37 | sentence = 'The police commander of Ninevah Province announced that bombings had declined 80 percent in Mosul , whereas there had been a big jump in the number of kidnappings .'
 38 | 
 39 | listtree = [l.split() for l in rawtree.split('\n')]
 40 | 
 41 | def setup_graph():
 42 |     ud = DependencyGraphBuilder.from_conll(listtree, 'tree1')
 43 | 
 44 |     pp = PredPatt(next(load_conllu(rawtree))[1],
 45 |                   opts=PredPattOpts(resolve_relcl=True,
 46 |                                     borrow_arg_for_relcl=True,
 47 |                                     resolve_conj=False,
 48 |                                     cut=True))
 49 | 
 50 |     graph = PredPattGraphBuilder.from_predpatt(pp, ud, 'tree1')
 51 | 
 52 |     return pp, graph
 53 | 
 54 | def setup_corpus_from_str():
 55 |     return PredPattCorpus.from_conll(rawtree)
 56 | 
 57 | def setup_corpus_from_io():
 58 |     rawfile = StringIO(rawtree)
 59 |     return PredPattCorpus.from_conll(rawfile)
 60 | 
 61 | ## could use @nose.with_setup
 62 | def test_predpatt_graph_builder():
 63 |     pp, pp_graph = setup_graph()
 64 | 
 65 |     assert pp_graph.name == 'tree1'
 66 |     assert all(['tree1' in nodeid
 67 |                 for nodeid in pp_graph.nodes])
 68 | 
 69 |     # test syntax nodes
 70 |     print(pp_graph.nodes['tree1-root-0'])
 71 |     assert pp_graph.nodes['tree1-root-0'] == {'position': 0,
 72 |                                               'domain': 'root',
 73 |                                               'type': 'root'}
 74 | 
 75 |     for idx, node in pp_graph.nodes.items():
 76 |         if 'syntax' in idx:
 77 |             idx = idx.split('-')[-1]
 78 |             for row in listtree:
 79 |                 if int(row[0]) == idx:
 80 |                     assert node['form'] == row[1]
 81 |                     assert node['lemma'] == row[2]
 82 |                     assert node['upos'] == row[3]
 83 |                     assert node['xpos'] == row[4]
 84 | 
 85 |     for (idx1, idx2), edge in pp_graph.edges.items():
 86 |         if 'syntax' in idx1 and 'syntax' in idx2:
 87 |             idx1, idx2 = idx1.split('-')[-1], idx2.split('-')[-1]
 88 |             for row in listtree:
 89 |                 if int(row[0]) == idx2:
 90 |                     assert int(row[6]) == idx1
 91 |                     assert row[7] == edge['deprel']
 92 | 
 93 |     # test semantics nodes
 94 |     assert 'tree1-semantics-pred-0' not in pp_graph.nodes
 95 |     assert 'tree1-semantics-arg-0' not in pp_graph.nodes
 96 | 
 97 |     assert all(['arg' in nodeid or 'pred' in nodeid
 98 |                 for nodeid in pp_graph.nodes
 99 |                 if 'semantics' in nodeid])
100 | 
101 |     assert all(['domain' in pp_graph.nodes[nodeid]
102 |                 for nodeid in pp_graph.nodes
103 |                 if 'semantics' in nodeid])
104 | 
105 |     assert all([pp_graph.nodes[nodeid]['domain'] == 'semantics'
106 |                 for nodeid in pp_graph.nodes
107 |                 if 'semantics' in nodeid])
108 | 
109 |     assert all(['type' in pp_graph.nodes[nodeid]
110 |                 for nodeid in pp_graph.nodes
111 |                 if 'semantics' in nodeid])
112 | 
113 |     assert all([pp_graph.nodes[nodeid]['type'] in ['argument', 'predicate']
114 |                 for nodeid in pp_graph.nodes
115 |                 if 'semantics' in nodeid])
116 | 
117 |     assert all([('arg' in nodeid) ==
118 |                 (pp_graph.nodes[nodeid]['type'] == 'argument')
119 |                 for nodeid in pp_graph.nodes
120 |                 if 'semantics' in nodeid])
121 | 
122 |     assert all([('pred' in nodeid) ==
123 |                 (pp_graph.nodes[nodeid]['type'] == 'predicate')
124 |                 for nodeid in pp_graph.nodes
125 |                 if 'semantics' in nodeid])
126 | 
127 |     assert all(['arg' not in nodeid and 'pred' not in nodeid
128 |                 for nodeid in pp_graph.nodes
129 |                 if 'syntax' in nodeid])
130 | 
131 |     # test argument edges
132 |     assert all([pp_graph.edges[(nodeid2, nodeid1)]['domain'] == 'semantics' and
133 |                 pp_graph.edges[(nodeid2, nodeid1)]['type'] == 'dependency'
134 |                 for nodeid1, node1 in pp_graph.nodes.items()
135 |                 for nodeid2 in pp_graph.nodes
136 |                 if 'semantics-arg' in nodeid1
137 |                 if 'semantics-pred' in nodeid2
138 |                 if (nodeid2, nodeid1) in pp_graph.edges])
139 | 
140 |     # tests subpredicate edges
141 |     subprededge = ('tree1-semantics-arg-11', 'tree1-semantics-pred-11')
142 |     assert pp_graph.edges[subprededge]['domain'] == 'semantics'
143 |     assert pp_graph.edges[subprededge]['type'] == 'head'
144 | 
145 |     assert all([(nodeid2, nodeid1) in pp_graph.edges and
146 |                 pp_graph.edges[(nodeid2, nodeid1)]['domain'] == 'semantics' and
147 |                 pp_graph.edges[(nodeid2, nodeid1)]['type'] == 'head'
148 |                 for nodeid1, node1 in pp_graph.nodes.items()
149 |                 for nodeid2 in pp_graph.nodes
150 |                 if 'semantics-pred' in nodeid1
151 |                 if 'semantics-arg' in nodeid2
152 |                 if nodeid1.split('-')[-1] == nodeid2.split('-')[-1]])
153 | 
154 | def test_predpatt_corpus():
155 |     corpus = setup_corpus_from_str()
156 | 
157 |     assert all([isinstance(t, DiGraph) for gid, t in corpus.graphs.items()])
158 |     assert all([isinstance(t, DiGraph) for gid, t in corpus.items()])
159 |     assert all([isinstance(gid, str) for gid in corpus])
160 | 
161 |     corpus = setup_corpus_from_io()
162 | 
163 |     assert all([isinstance(t, DiGraph) for gid, t in corpus.graphs.items()])
164 |     assert all([isinstance(t, DiGraph) for gid, t in corpus.items()])
165 |     assert all([isinstance(gid, str) for gid in corpus])
166 | 


--------------------------------------------------------------------------------
/tests/test_uds_annotation.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import os, json
  4 | 
  5 | from pprint import pprint
  6 | 
  7 | from decomp.semantics.uds.metadata import UDSAnnotationMetadata
  8 | from decomp.semantics.uds.annotation import UDSAnnotation
  9 | from decomp.semantics.uds.annotation import NormalizedUDSAnnotation
 10 | from decomp.semantics.uds.annotation import RawUDSAnnotation
 11 | 
 12 | class TestUDSAnnotation:
 13 | 
 14 |     def test_direct_instantiation_of_uds_annotation_fails(self):
 15 |         with pytest.raises(TypeError):
 16 |             UDSAnnotation(None)
 17 | 
 18 | class TestNormalizedUDSAnnotation:
 19 | 
 20 |     def test_from_json(self,
 21 |                        normalized_node_sentence_annotation,
 22 |                        normalized_edge_sentence_annotation,
 23 |                        normalized_sentence_annotations):
 24 |         norm_node_ann, norm_edge_ann = normalized_sentence_annotations
 25 |         norm_node_ann_direct = json.loads(normalized_node_sentence_annotation)
 26 |         norm_edge_ann_direct = json.loads(normalized_edge_sentence_annotation)
 27 | 
 28 |         assert norm_node_ann.metadata == UDSAnnotationMetadata.from_dict(norm_node_ann_direct['metadata'])
 29 |         assert norm_edge_ann.metadata == UDSAnnotationMetadata.from_dict(norm_edge_ann_direct['metadata'])
 30 | 
 31 |         assert all([not edge_attrs
 32 |                     for n, (node_attrs, edge_attrs) in norm_node_ann.items()])
 33 | 
 34 |         assert all([norm_node_ann_direct['data']['tree1'][k] == v
 35 |                     for n, (node_attrs, edge_attrs) in norm_node_ann.items()
 36 |                     for k, v in node_attrs.items()])
 37 | 
 38 |         assert all([not node_attrs
 39 |                     for n, (node_attrs, edge_attrs) in norm_edge_ann.items()])
 40 | 
 41 |         assert all([norm_edge_ann_direct['data']['tree1']['%%'.join(k)] == v
 42 |                     for n, (node_attrs, edge_attrs) in norm_edge_ann.items()
 43 |                     for k, v in edge_attrs.items()])
 44 | 
 45 | class TestRawUDSAnnotation:
 46 | 
 47 |     def test_from_json(self,
 48 |                        raw_node_sentence_annotation,
 49 |                        raw_edge_sentence_annotation,
 50 |                        raw_sentence_annotations):
 51 |         raw_node_ann, raw_edge_ann = raw_sentence_annotations
 52 |         raw_node_ann_direct = json.loads(raw_node_sentence_annotation)
 53 |         raw_edge_ann_direct = json.loads(raw_edge_sentence_annotation)
 54 | 
 55 |         assert raw_node_ann.metadata == UDSAnnotationMetadata.from_dict(raw_node_ann_direct['metadata'])
 56 |         assert raw_edge_ann.metadata == UDSAnnotationMetadata.from_dict(raw_edge_ann_direct['metadata'])
 57 | 
 58 |         assert all([not edge_attrs
 59 |                     for n, (node_attrs, edge_attrs) in raw_node_ann.items()])
 60 | 
 61 |         assert all([raw_node_ann_direct['data']['tree1'][k] == v
 62 |                     for n, (node_attrs, edge_attrs) in raw_node_ann.items()
 63 |                     for k, v in node_attrs.items()])
 64 | 
 65 |         assert all([not node_attrs
 66 |                     for n, (node_attrs, edge_attrs) in raw_edge_ann.items()])
 67 | 
 68 |         assert all([raw_edge_ann_direct['data']['tree1']['%%'.join(k)] == v
 69 |                     for n, (node_attrs, edge_attrs) in raw_edge_ann.items()
 70 |                     for k, v in edge_attrs.items()])
 71 | 
 72 | 
 73 |     def test_annotators(self, raw_sentence_annotations, test_data_dir):
 74 |         raw_node_ann, raw_edge_ann = raw_sentence_annotations
 75 | 
 76 |         with open(os.path.join(test_data_dir, 'raw_node_sentence_annotators.txt')) as f:
 77 |             assert raw_node_ann.annotators() == {line.strip() for line in f}
 78 | 
 79 |         with open(os.path.join(test_data_dir, 'raw_edge_sentence_annotators.txt')) as f:
 80 |             assert raw_edge_ann.annotators() == {line.strip() for line in f}
 81 | 
 82 |     def test_items(self, raw_sentence_annotations):
 83 |         raw_node_ann, raw_edge_ann = raw_sentence_annotations
 84 | 
 85 |         # verify that items by annotator generator works
 86 |         for gid, (node_attrs, edge_attrs) in raw_node_ann.items(annotator_id='genericity-pred-annotator-88'):
 87 |             assert gid == 'tree1'
 88 |             assert json.dumps(node_attrs) == '{"tree1-semantics-pred-7": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-11": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-20": {"genericity": {"pred-dynamic": {"confidence": 0, "value": 1}, "pred-hypothetical": {"confidence": 0, "value": 1}, "pred-particular": {"confidence": 0, "value": 1}}}}'
 89 |             assert json.dumps(edge_attrs) == '{}'
 90 | 
 91 |         # verify that node attribute-only generator works
 92 |         for gid, node_attrs in raw_node_ann.items(annotation_type="node",
 93 |                                                    annotator_id='genericity-pred-annotator-88'):
 94 |             assert gid == 'tree1'
 95 |             assert json.dumps(node_attrs) == '{"tree1-semantics-pred-7": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-11": {"genericity": {"pred-dynamic": {"confidence": 4, "value": 0}, "pred-hypothetical": {"confidence": 4, "value": 0}, "pred-particular": {"confidence": 4, "value": 0}}}, "tree1-semantics-pred-20": {"genericity": {"pred-dynamic": {"confidence": 0, "value": 1}, "pred-hypothetical": {"confidence": 0, "value": 1}, "pred-particular": {"confidence": 0, "value": 1}}}}'
 96 | 
 97 |         # generator for edge attributes for the node attribute-only annotation
 98 |         # should yield empty results for the graph
 99 |         with pytest.raises(ValueError):
100 |             for gid, edge_attrs in raw_node_ann.items(annotation_type="edge",
101 |                                                       annotator_id='genericity-pred-annotator-88'):
102 |                 pass
103 | 
104 |         # verify that edge attribute-only generator works
105 |         for gid, (node_attrs, edge_attrs) in raw_edge_ann.items(annotator_id='protoroles-annotator-14'):
106 |             assert gid == 'tree1'
107 |             assert json.dumps({'%%'.join(e): attrs for e, attrs in edge_attrs.items()}) == '{"tree1-semantics-pred-11%%tree1-semantics-arg-9": {"protoroles": {"awareness": {"confidence": 1, "value": 4}, "change_of_location": {"confidence": 1, "value": 4}, "change_of_possession": {"confidence": 1, "value": 4}, "change_of_state": {"confidence": 1, "value": 4}, "change_of_state_continuous": {"confidence": 1, "value": 4}, "existed_after": {"confidence": 1, "value": 4}, "existed_before": {"confidence": 1, "value": 4}, "existed_during": {"confidence": 1, "value": 4}, "instigation": {"confidence": 1, "value": 4}, "partitive": {"confidence": 1, "value": 4}, "sentient": {"confidence": 1, "value": 4}, "volition": {"confidence": 1, "value": 4}, "was_for_benefit": {"confidence": 1, "value": 4}, "was_used": {"confidence": 1, "value": 4}}}}'
108 | 
109 |         # generator for node attributes for the edge attribute-only annotation
110 |         # should yield empty results for the graph
111 |         with pytest.raises(ValueError):
112 |             for gid, node_attrs in raw_edge_ann.items(annotation_type="node",
113 |                                                       annotator_id='protoroles-annotator-14'):
114 |                 pass
115 | 


--------------------------------------------------------------------------------
/tests/test_uds_corpus.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import logging
  4 | import pytest
  5 | 
  6 | from glob import glob
  7 | from pkg_resources import resource_filename
  8 | from decomp.semantics.uds import UDSCorpus
  9 | 
 10 | test_document_name = 'answers-20111105112131AA6gIX6_ans'
 11 | test_document_genre = 'answers'
 12 | test_document_timestamp = '20111105112131'
 13 | test_document_text = 'My dad just does n\'t understand ? Ugh my dad is so stupid ... he just does n\'t understand anything ! I have 5 sisters and so including my mom ... he is the only guy in a house of six females . Now I \'m the youngest and I just got my period so now we all have ours and he thinks it \'s a good thing ? He \'s always like " ohh you must be so happy to finally have yours , I wish I had mine ! " and he is n\'t even joking . I think just living in a house with so many girls is making him go crazy ? Yep , the females are just getting to him ... dads .. Do n\'t blame him please , he feels lonely and wants to show his attention to all of you to look after you , please forgive and sympathy if he miss something . I am sorry for him , he is a good dad'
 14 | test_document_sentence_ids = {'ewt-train-7189': 'answers-20111105112131AA6gIX6_ans-0001',
 15 |  'ewt-train-7190': 'answers-20111105112131AA6gIX6_ans-0002',
 16 |  'ewt-train-7191': 'answers-20111105112131AA6gIX6_ans-0003',
 17 |  'ewt-train-7192': 'answers-20111105112131AA6gIX6_ans-0004',
 18 |  'ewt-train-7193': 'answers-20111105112131AA6gIX6_ans-0005',
 19 |  'ewt-train-7194': 'answers-20111105112131AA6gIX6_ans-0006',
 20 |  'ewt-train-7195': 'answers-20111105112131AA6gIX6_ans-0007',
 21 |  'ewt-train-7196': 'answers-20111105112131AA6gIX6_ans-0008',
 22 |  'ewt-train-7197': 'answers-20111105112131AA6gIX6_ans-0009'}
 23 | test_document_node = 'ewt-train-7195-document-pred-7'
 24 | test_document_semantics_node_normalized = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics',
 25 |   'frompredpatt': True,
 26 |   'type': 'predicate',
 27 |   'factuality': {'factual': {'confidence': 1.0, 'value': 1.2225}},
 28 |   'time': {'dur-weeks': {'confidence': 0.3991, 'value': 0.7263},
 29 |    'dur-decades': {'confidence': 0.3991, 'value': -1.378},
 30 |    'dur-days': {'confidence': 0.3991, 'value': 0.7498},
 31 |    'dur-hours': {'confidence': 0.3991, 'value': -1.1733},
 32 |    'dur-seconds': {'confidence': 0.3991, 'value': -1.4243},
 33 |    'dur-forever': {'confidence': 0.3991, 'value': -1.2803},
 34 |    'dur-centuries': {'confidence': 0.3991, 'value': -1.1213},
 35 |    'dur-instant': {'confidence': 0.3991, 'value': -1.3219},
 36 |    'dur-years': {'confidence': 0.3991, 'value': -1.1953},
 37 |    'dur-minutes': {'confidence': 0.3991, 'value': 0.8558},
 38 |    'dur-months': {'confidence': 0.3991, 'value': 0.6852}},
 39 |   'genericity': {'pred-dynamic': {'confidence': 1.0, 'value': 1.1508},
 40 |    'pred-hypothetical': {'confidence': 1.0, 'value': -1.1583},
 41 |    'pred-particular': {'confidence': 1.0, 'value': 1.1508}}}}
 42 | test_document_semantics_node_raw = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics', 'frompredpatt': True, 'type': 'predicate', 'factuality': {'factual': {'value': {'factuality-annotator-26': 1, 'factuality-annotator-34': 1}, 'confidence': {'factuality-annotator-26': 4, 'factuality-annotator-34': 4}}}, 'time': {'duration': {'value': {'time-annotator-508': 4, 'time-annotator-619': 6, 'time-annotator-310': 5, 'time-annotator-172': 4, 'time-annotator-448': 5, 'time-annotator-548': 6}, 'confidence': {'time-annotator-508': 2, 'time-annotator-619': 4, 'time-annotator-310': 4, 'time-annotator-172': 4, 'time-annotator-448': 1, 'time-annotator-548': 2}}}, 'genericity': {'pred-dynamic': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-hypothetical': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-particular': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}}}}
 43 | 
 44 | 
 45 | total_graphs = 16622
 46 | total_documents = 1174
 47 | 
 48 | 
 49 | data_dir = resource_filename('decomp', 'data/')
 50 | 
 51 | 
 52 | def _load_corpus(base, version, annotation_format):
 53 |     UDSCorpus.CACHE_DIR = base
 54 | 
 55 |     try:
 56 |         os.makedirs(os.path.join(base,
 57 |                                  version,
 58 |                                  annotation_format,
 59 |                                  'sentence/'))
 60 |         os.makedirs(os.path.join(base,
 61 |                                  version,
 62 |                                  annotation_format,
 63 |                                  'document/'))
 64 | 
 65 |     except FileExistsError:
 66 |         pass
 67 | 
 68 |     return UDSCorpus(version=version,
 69 |                      annotation_format=annotation_format)
 70 | 
 71 | def _assert_correct_corpus_initialization(uds, raw):
 72 |     # Assert all graphs and documents initialized
 73 |     assert uds.ngraphs == total_graphs
 74 |     assert uds.ndocuments == total_documents
 75 | 
 76 |     n_sentence_graphs = 0
 77 | 
 78 |     for doc_id in uds.documentids:
 79 |         n_sentence_graphs += len(uds.documents[doc_id].sentence_graphs)
 80 | 
 81 |     assert n_sentence_graphs == total_graphs
 82 | 
 83 |     # Inspect a test document
 84 |     test_doc = uds.documents[test_document_name]
 85 |     assert test_doc.genre == test_document_genre
 86 |     assert test_doc.timestamp == test_document_timestamp
 87 |     assert test_doc.sentence_ids == test_document_sentence_ids
 88 |     assert test_doc.text == test_document_text
 89 |     assert test_doc.document_graph is not None
 90 | 
 91 |     print(test_doc.semantics_node(test_document_node))
 92 |     
 93 |     if raw:
 94 |         assert uds.annotation_format == 'raw'
 95 |         #assert test_doc.semantics_node(test_document_node) == test_document_semantics_node_raw
 96 |     else:
 97 |         assert uds.annotation_format == 'normalized'
 98 |         #assert test_doc.semantics_node(test_document_node) == test_document_semantics_node_normalized
 99 | 
100 | def _assert_document_annotation(uds, raw):
101 |     if raw:
102 |         node_ann, edge_ann = setup_raw_document_annotations()
103 |     else:
104 |         node_ann, edge_ann = setup_normalized_document_annotations()
105 | 
106 |     document = list(node_ann.node_attributes.keys())[0]
107 | 
108 |     # assert node annotations
109 |     node_ann_attrs = dict(list(node_ann.node_attributes.values())[0])
110 | 
111 |     for doc_node, node_annotation in node_ann_attrs.items():
112 |         for k, v in node_annotation.items():
113 |             assert uds.documents[document].document_graph.nodes[doc_node][k] == v
114 | 
115 |     # assert edge annotations
116 |     edge_ann_attrs = dict(list(edge_ann.edge_attributes.values())[0])
117 | 
118 |     for doc_edge, edge_annotation in edge_ann_attrs.items():
119 |         for k, v in edge_annotation.items():
120 |             assert uds.documents[document].document_graph.edges[doc_edge][k] == v
121 | 
122 | class TestUDSCorpus:
123 | 
124 |     # @pytest.mark.slow
125 |     # def test_load_v1_normalized(self, tmp_path, caplog):
126 |     #     caplog.set_level(logging.WARNING)
127 | 
128 |     #     uds = _load_corpus(tmp_path, '1.0', 'normalized')
129 | 
130 |     #     raw = False
131 | 
132 |     #     _assert_correct_corpus_initialization(uds, raw)
133 |     #     #_assert_document_annotation(uds, raw)
134 | 
135 |     #     # reload the UDSCorpus, which will initialize it from
136 |     #     # the now-serialized graphs
137 |     #     uds_cached = _load_corpus(tmp_path, '1.0', 'normalized')
138 | 
139 |     #     _assert_correct_corpus_initialization(uds_cached, raw)
140 |     #     #_assert_document_annotation(uds_cached, raw)
141 | 
142 | 
143 |     # @pytest.mark.slow        
144 |     # def test_load_v2_normalized(self, tmp_path, caplog):
145 |     #     caplog.set_level(logging.WARNING)
146 |         
147 |     #     uds = _load_corpus(tmp_path, '2.0', 'normalized')
148 | 
149 |     #     raw = False
150 | 
151 |     #     _assert_correct_corpus_initialization(uds, raw)
152 |     #     #_assert_document_annotation(uds, raw)
153 | 
154 |     #     # reload the UDSCorpus, which will initialize it from
155 |     #     # the now-serialized graphs
156 |     #     uds_cached = _load_corpus(tmp_path, '2.0', 'normalized')
157 | 
158 |     #     _assert_correct_corpus_initialization(uds_cached, raw)
159 |     #     #_assert_document_annotation(uds_cached, raw)
160 | 
161 |     # @pytest.mark.slow        
162 |     # def test_load_v1_raw(self, tmp_path, caplog):
163 |     #     caplog.set_level(logging.WARNING)
164 | 
165 |     #     uds = _load_corpus(tmp_path, '1.0', 'raw')
166 | 
167 |     #     raw = True
168 | 
169 |     #     _assert_correct_corpus_initialization(uds, raw)
170 |     #     #_assert_document_annotation(uds, raw)
171 | 
172 |     #     # reload the UDSCorpus, which will initialize it from
173 |     #     # the now-serialized graphs
174 |     #     uds_cached = _load_corpus(tmp_path, '1.0', 'raw')
175 | 
176 |     #     _assert_correct_corpus_initialization(uds_cached, raw)
177 |     #     #_assert_document_annotation(uds_cached, raw)
178 | 
179 |     @pytest.mark.slow        
180 |     def test_load_v2_raw(self, tmp_path, caplog):
181 |         caplog.set_level(logging.WARNING)
182 | 
183 |         uds = _load_corpus(tmp_path, '2.0', 'raw')
184 | 
185 |         raw = True
186 | 
187 |         #print(uds.metadata.to_dict())
188 | 
189 |         print(uds._sentences_paths)
190 |         print(uds._documents_paths)
191 |         _assert_correct_corpus_initialization(uds, raw)
192 |         #_assert_document_annotation(uds, raw)
193 | 
194 |         # reload the UDSCorpus, which will initialize it from
195 |         # the now-serialized graphs
196 |         uds_cached = _load_corpus(tmp_path, '2.0', 'raw')
197 | 
198 |         print()
199 |         #print(uds_cached.metadata.to_dict())
200 | 
201 |         raise Exception
202 |         
203 |         
204 |         _assert_correct_corpus_initialization(uds_cached, raw)
205 |         #_assert_document_annotation(uds_cached, raw)
206 | 
207 | # def _test_uds_corpus_load(version, raw, data_dir):
208 | #     # Remove cached graphs
209 | #     if raw:
210 | #         annotation_format = 'raw'
211 | #     else:
212 | #         annotation_format = 'normalized'
213 | 
214 | #     sentence_path = os.path.join(data_dir, version, annotation_format, 'sentence')
215 | #     doc_path = os.path.join(data_dir, version, annotation_format, 'document')
216 | 
217 | #     if glob(os.path.join(sentence_path, '*.json')):
218 | #         os.system('rm ' + sentence_path + '/*.json')
219 | 
220 | #     if glob(os.path.join(doc_path, '*.json')):
221 | #         os.system('rm ' + doc_path + '/*.json')
222 | 
223 | 
224 | #     annotations_dir = os.path.join(doc_path, 'annotations')
225 | #     if not glob(annotations_dir):
226 | #         os.system('mkdir ' + annotations_dir)
227 | #     if raw:
228 | #         # Dump the test anontations to JSON files
229 | #         raw_node_ann = json.loads(raw_node_document_annotation)
230 | #         raw_edge_ann = json.loads(raw_edge_document_annotation)
231 | #         raw_node_ann_path = os.path.join(annotations_dir, 'raw_node.json')
232 | #         raw_edge_ann_path = os.path.join(annotations_dir, 'raw_edge.json')
233 | #         annotations = [raw_node_ann, raw_edge_ann]
234 | #         paths = [raw_node_ann_path, raw_edge_ann_path]
235 | #     else:
236 | #         norm_node_ann = json.loads(normalized_node_document_annotation)
237 | #         norm_edge_ann = json.loads(normalized_edge_document_annotation)
238 | #         norm_node_ann_path = os.path.join(annotations_dir, 'norm_node.json')
239 | #         norm_edge_ann_path = os.path.join(annotations_dir, 'norm_edge.json')
240 | #         annotations = [norm_node_ann, norm_edge_ann]
241 | #         paths = [norm_node_ann_path, norm_edge_ann_path]
242 | 
243 | 
244 | #     for ann, path in zip(annotations, paths):
245 | #         os.system('touch ' + path)
246 | #         with open(path, 'w') as out:
247 | #             json.dump(ann, out)
248 | 
249 | #     # Load the UDSCorpus without any options
250 | #     uds = UDSCorpus(version=version, annotation_format=annotation_format)
251 | #     assert_correct_corpus_initialization(uds, raw)
252 | #     assert_document_annotation(uds, raw)
253 | 
254 | #     # Reload the UDSCorpus, which will initialize it from
255 | #     # the now-serialized graphs
256 | #     uds_cached = UDSCorpus(version=version, annotation_format=annotation_format)
257 | #     assert_correct_corpus_initialization(uds_cached, raw)
258 | #     assert_document_annotation(uds, raw)
259 | 
260 | #     # Remove the cached graphs and annotations
261 | #     os.system('rm ' + sentence_path + '/*.json')
262 | #     os.system('rm ' + doc_path + '/*.json')
263 | #     for path in paths:
264 | #         os.system('rm ' + path)
265 | 


--------------------------------------------------------------------------------
/tests/test_uds_document.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | test_document_name = 'answers-20111105112131AA6gIX6_ans'
 4 | test_document_genre = 'answers'
 5 | test_document_timestamp = '20111105112131'
 6 | 
 7 | 
 8 | test_document_text = 'My dad just does n\'t understand ? Ugh my dad is so stupid ... he just does n\'t understand anything ! I have 5 sisters and so including my mom ... he is the only guy in a house of six females . Now I \'m the youngest and I just got my period so now we all have ours and he thinks it \'s a good thing ? He \'s always like " ohh you must be so happy to finally have yours , I wish I had mine ! " and he is n\'t even joking . I think just living in a house with so many girls is making him go crazy ? Yep , the females are just getting to him ... dads .. Do n\'t blame him please , he feels lonely and wants to show his attention to all of you to look after you , please forgive and sympathy if he miss something . I am sorry for him , he is a good dad'
 9 | 
10 | test_document_sentence_ids = {'ewt-train-7189': 'answers-20111105112131AA6gIX6_ans-0001',
11 |  'ewt-train-7190': 'answers-20111105112131AA6gIX6_ans-0002',
12 |  'ewt-train-7191': 'answers-20111105112131AA6gIX6_ans-0003',
13 |  'ewt-train-7192': 'answers-20111105112131AA6gIX6_ans-0004',
14 |  'ewt-train-7193': 'answers-20111105112131AA6gIX6_ans-0005',
15 |  'ewt-train-7194': 'answers-20111105112131AA6gIX6_ans-0006',
16 |  'ewt-train-7195': 'answers-20111105112131AA6gIX6_ans-0007',
17 |  'ewt-train-7196': 'answers-20111105112131AA6gIX6_ans-0008',
18 |  'ewt-train-7197': 'answers-20111105112131AA6gIX6_ans-0009'}
19 | 
20 | test_document_node = 'ewt-train-7195-document-pred-7'
21 | 
22 | test_document_semantics_node_normalized = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics',
23 |   'frompredpatt': True,
24 |   'type': 'predicate',
25 |   'factuality': {'factual': {'confidence': 1.0, 'value': 1.2225}},
26 |   'time': {'dur-weeks': {'confidence': 0.3991, 'value': 0.7263},
27 |    'dur-decades': {'confidence': 0.3991, 'value': -1.378},
28 |    'dur-days': {'confidence': 0.3991, 'value': 0.7498},
29 |    'dur-hours': {'confidence': 0.3991, 'value': -1.1733},
30 |    'dur-seconds': {'confidence': 0.3991, 'value': -1.4243},
31 |    'dur-forever': {'confidence': 0.3991, 'value': -1.2803},
32 |    'dur-centuries': {'confidence': 0.3991, 'value': -1.1213},
33 |    'dur-instant': {'confidence': 0.3991, 'value': -1.3219},
34 |    'dur-years': {'confidence': 0.3991, 'value': -1.1953},
35 |    'dur-minutes': {'confidence': 0.3991, 'value': 0.8558},
36 |    'dur-months': {'confidence': 0.3991, 'value': 0.6852}},
37 |   'genericity': {'pred-dynamic': {'confidence': 1.0, 'value': 1.1508},
38 |    'pred-hypothetical': {'confidence': 1.0, 'value': -1.1583},
39 |    'pred-particular': {'confidence': 1.0, 'value': 1.1508}}}}
40 | 
41 | test_document_semantics_node_raw = {'ewt-train-7195-semantics-pred-7': {'domain': 'semantics', 'frompredpatt': True, 'type': 'predicate', 'factuality': {'factual': {'value': {'factuality-annotator-26': 1, 'factuality-annotator-34': 1}, 'confidence': {'factuality-annotator-26': 4, 'factuality-annotator-34': 4}}}, 'time': {'duration': {'value': {'time-annotator-508': 4, 'time-annotator-619': 6, 'time-annotator-310': 5, 'time-annotator-172': 4, 'time-annotator-448': 5, 'time-annotator-548': 6}, 'confidence': {'time-annotator-508': 2, 'time-annotator-619': 4, 'time-annotator-310': 4, 'time-annotator-172': 4, 'time-annotator-448': 1, 'time-annotator-548': 2}}}, 'genericity': {'pred-dynamic': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-hypothetical': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}, 'pred-particular': {'value': {'genericity-pred-annotator-277': 0}, 'confidence': {'genericity-pred-annotator-277': 2}}}}}
42 | 
43 | @pytest.fixture
44 | def normalized_node_document_annotation(test_data_dir):
45 |     fpath = os.path.join(test_data_dir,
46 |                          'normalized_node_document_annotation.json')
47 |     with open(fpath) as f:
48 |         return f.read()
49 | 
50 | 
51 | @pytest.fixture
52 | def normalized_edge_document_annotation(test_data_dir):
53 |     fpath = os.path.join(test_data_dir,
54 |                          'normalized_edge_document_annotation.json')
55 |     with open(fpath) as f:
56 |         return f.read()
57 | 
58 | 
59 | @pytest.fixture
60 | def normalized_document_annotations(normalized_node_document_annotation,
61 |                                     normalized_edge_document_annotation):
62 |     norm_node_ann = NormalizedUDSAnnotation.from_json(normalized_node_document_annotation)
63 |     norm_edge_ann = NormalizedUDSAnnotation.from_json(normalized_edge_document_annotation)
64 | 
65 |     return norm_node_ann, norm_edge_ann
66 | 
67 | 
68 | @pytest.fixture
69 | def raw_node_document_annotation():
70 |     return '{"answers-20111105112131AA6gIX6_ans": {"ewt-train-7192-document-pred-25": {"subspace": {"property": {"confidence": {"annotator1": 0.12}, "value": {"annotator1": 0.0}}}}, "ewt-train-7192-document-pred-20": {"subspace": {"property": {"confidence": {"annotator2": 0.55, "annotator3": 0.07}, "value": {"annotator2": 0.0, "annotator3": 0.0}}}}, "ewt-train-7192-document-pred-20": {"subspace": {"property": {"confidence": {"annotator2": 0.55}, "value": {"annotator2": 0.0}}}}}}'
71 | 
72 | 
73 | @pytest.fixture
74 | def raw_edge_document_annotation():
75 |     return '{"answers-20111105112131AA6gIX6_ans": {"ewt-train-7192-document-pred-20%%ewt-train-7192-document-arg-2": {"subspace": {"property": {"confidence": {"annotator1": 0.12}, "value": {"annotator1": 0.0}}}}, "ewt-train-7192-document-pred-20%%ewt-train-7189-document-arg-2": {"subspace": {"property": {"confidence": {"annotator2": 0.55, "annotator3": 0.07}, "value": {"annotator2": 0.0, "annotator3": 0.0}}}}, "ewt-train-7192-document-pred-25%%ewt-train-7191-document-arg-18": {"subspace": {"property": {"confidence": {"annotator2": 0.55}, "value": {"annotator2": 0.0}}}}}}'
76 | 
77 | @pytest.fixture
78 | def raw_document_annotations(raw_node_document_annotation,
79 |                              raw_edge_document_annotation):
80 |     raw_node_ann = RawUDSAnnotation.from_json(raw_node_document_annotation)
81 |     raw_edge_ann = RawUDSAnnotation.from_json(raw_edge_document_annotation)
82 | 
83 |     return raw_node_ann, raw_edge_ann
84 | 


--------------------------------------------------------------------------------
/tests/test_uds_metadata.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from copy import deepcopy
  4 | from typing import List
  5 | 
  6 | from decomp.semantics.uds.metadata import _dtype
  7 | from decomp.semantics.uds.metadata import UDSDataType
  8 | from decomp.semantics.uds.metadata import UDSPropertyMetadata
  9 | from decomp.semantics.uds.metadata import UDSAnnotationMetadata
 10 | 
 11 | def test_dtype():
 12 |     assert _dtype('int') is int
 13 |     assert _dtype('str') is str
 14 |     assert _dtype('float') is float
 15 |     assert _dtype('bool') is bool
 16 | 
 17 | 
 18 | class TestUDSDataType:
 19 | 
 20 |     catdict = {'int': [1, 2, 3, 4, 5],
 21 |                'str': ['yes', 'maybe', 'no']}
 22 | 
 23 |     cases = [({'datatype': 'int',
 24 |                'categories': [1, 2, 3, 4, 5],
 25 |                'ordered': True},
 26 |               {'datatype': 'int',
 27 |                'categories': [1, 2, 3, 4, 5],
 28 |                'ordered': True,
 29 |                'lower_bound': 1,
 30 |                'upper_bound': 5}),
 31 |              ({'datatype': 'int'},
 32 |               {'datatype': 'int'}),
 33 |              ({'datatype': 'float',
 34 |                'lower_bound': 0.0,
 35 |                'upper_bound': 1.0},
 36 |               {'datatype': 'float',
 37 |                'ordered': True,
 38 |                'lower_bound': 0.0,
 39 |                'upper_bound': 1.0})]
 40 | 
 41 |     def test_init_simple(self):
 42 |         UDSDataType(datatype=str)
 43 |         UDSDataType(datatype=int)
 44 |         UDSDataType(datatype=bool)
 45 |         UDSDataType(datatype=float)
 46 | 
 47 |     def test_init_categorical(self):
 48 |         for t, c in self.catdict.items():
 49 |             for o in [True, False]:
 50 |                 t = int if t == 'int' else str
 51 |                 UDSDataType(datatype=t,
 52 |                             categories=c,
 53 |                             ordered=o)
 54 | 
 55 |     def test_from_dict_simple(self):
 56 |         UDSDataType.from_dict({'datatype': 'str'})        
 57 |         UDSDataType.from_dict({'datatype': 'int'})
 58 |         UDSDataType.from_dict({'datatype': 'bool'})
 59 |         UDSDataType.from_dict({'datatype': 'float'})
 60 | 
 61 |     def test_from_dict_categorical(self):
 62 |         # the name for the categories key is "categories"
 63 |         with pytest.raises(KeyError):
 64 |             UDSDataType.from_dict({'datatype': 'int',
 65 |                                    'category': [1, 2, 3, 4, 5],
 66 |                                    'ordered': True})
 67 | 
 68 |         # floats cannot be categorical
 69 |         with pytest.raises(ValueError):
 70 |             UDSDataType.from_dict({'datatype': 'float',
 71 |                                    'categories': [1, 2, 3, 4, 5],
 72 |                                    'ordered': True})
 73 | 
 74 |         # bounds can only be specified if ordered is not specified or
 75 |         # is True
 76 |         with pytest.raises(ValueError):
 77 |             UDSDataType.from_dict({'datatype': 'str',
 78 |                                    'categories': ["no", "maybe", "yes"],
 79 |                                    'ordered': False,
 80 |                                    'lower_bound': "no",
 81 |                                    'upper_bound': "yes"})
 82 | 
 83 |         # these are good
 84 |         for t, c in self.catdict.items():
 85 |             for o in [True, False]:
 86 |                 dt = UDSDataType.from_dict({'datatype': t,
 87 |                                             'categories': c,
 88 |                                             'ordered': o})
 89 | 
 90 |                 assert dt.is_categorical
 91 |                 assert dt.is_ordered_categorical == o
 92 | 
 93 |                 if o:
 94 |                     assert dt.categories == c
 95 |                 else:
 96 |                     assert dt.categories == set(c)
 97 | 
 98 |     def test_from_dict_bounded(self):
 99 |         # bounded datatypes should only be float or int
100 |         with pytest.raises(ValueError):
101 |             UDSDataType.from_dict({'datatype': 'str',
102 |                                    'categories': ['yes', 'maybe', 'no'],
103 |                                    'ordered': True,
104 |                                    'lower_bound': 'no',
105 |                                    'upper_bound': 'yes'})
106 | 
107 |         # the the datatype is categorical, the lower bound should
108 |         # match the category lower bound
109 |         with pytest.raises(ValueError):
110 |             UDSDataType.from_dict({'datatype': 'int',
111 |                                    'categories': [1, 2, 3, 4, 5],
112 |                                    'ordered': True,
113 |                                    'lower_bound': 2,
114 |                                    'upper_bound': 5})
115 | 
116 |         # these are good
117 |         for c, _ in self.cases:
118 |             UDSDataType.from_dict(c)
119 | 
120 |     def test_to_dict(self):
121 |         for c_in, c_out in self.cases:
122 |             loaded = UDSDataType.from_dict(c_in)
123 |             assert loaded.to_dict() == c_out
124 | 
125 |     def test_eq(self):
126 |         for c_in, c_out in self.cases:
127 |             loaded1 = UDSDataType.from_dict(c_in)
128 |             loaded2 = UDSDataType.from_dict(c_out)
129 | 
130 |             assert loaded1 == loaded2
131 | 
132 | sentence_metadata_example = {'protoroles': {'awareness': {'annotators': ['protoroles-annotator-8',
133 |                                                                          'protoroles-annotator-9'],
134 |                                                           'confidence': {'categories': [0, 1],
135 |                                                                          'datatype': 'int',
136 |                                                                          'ordered': False},
137 |                                                           'value': {'categories': [1, 2, 3, 4, 5],
138 |                                                                     'datatype': 'int',
139 |                                                                     'ordered': True}},
140 |                                             'change_of_location': {'annotators': ['protoroles-annotator-0',
141 |                                                                                   'protoroles-annotator-1'],
142 |                                                                    'confidence': {'categories': [0, 1],
143 |                                                                                   'datatype': 'int',
144 |                                                                                   'ordered': False},
145 |                                                                    'value': {'categories': [1, 2, 3, 4, 5],
146 |                                                                              'datatype': 'int',
147 |                                                                              'ordered': True}}}}
148 | 
149 | sentence_metadata_example_full = {'protoroles': {'awareness': {'annotators': ['protoroles-annotator-8',
150 |                                                                               'protoroles-annotator-9'],
151 |                                                                'confidence': {'categories': [0, 1],
152 |                                                                               'datatype': 'int',
153 |                                                                               'ordered': False},
154 |                                                           'value': {'categories': [1, 2, 3, 4, 5],
155 |                                                                     'datatype': 'int',
156 |                                                                     'ordered': True,
157 |                                                                     'lower_bound': 1,
158 |                                                                     'upper_bound': 5}},
159 |                                             'change_of_location': {'annotators': ['protoroles-annotator-0',
160 |                                                                                   'protoroles-annotator-1'],
161 |                                                                    'confidence': {'categories': [0, 1],
162 |                                                                                   'datatype': 'int',
163 |                                                                                   'ordered': False},
164 |                                                                    'value': {'categories': [1, 2, 3, 4, 5],
165 |                                                                              'datatype': 'int',
166 |                                                                              'ordered': True,
167 |                                                                              'lower_bound': 1,
168 |                                                                              'upper_bound': 5}}}}
169 | 
170 | 
171 | sentence_metadata_example_noann = deepcopy(sentence_metadata_example)
172 | 
173 | for subspace, propdict in sentence_metadata_example_noann.items():
174 |     for prop, md in propdict.items():
175 |         del md['annotators']
176 | 
177 | 
178 | class TestUDSPropertyMetadata:
179 | 
180 |     def test_init(self):
181 |         pass
182 | 
183 |     def test_from_dict(self):
184 |         metadatadict = sentence_metadata_example['protoroles']['awareness']
185 |         metadata = UDSPropertyMetadata.from_dict(metadatadict)
186 | 
187 |         assert isinstance(metadata.value, UDSDataType)
188 |         assert isinstance(metadata.confidence, UDSDataType)
189 | 
190 |         assert metadata.value.datatype is int
191 |         assert metadata.confidence.datatype is int
192 | 
193 |         assert metadata.value.categories == [1, 2, 3, 4, 5]
194 |         assert metadata.confidence.categories == {0, 1}
195 | 
196 |         assert metadata.annotators == {'protoroles-annotator-8',
197 |                                        'protoroles-annotator-9'}
198 | 
199 |     def test_to_dict(self):
200 |         metadatadict = sentence_metadata_example['protoroles']['awareness']
201 |         metadata = UDSPropertyMetadata.from_dict(metadatadict)
202 | 
203 |         out_in_out = UDSPropertyMetadata.from_dict(metadata.to_dict()).to_dict()
204 | 
205 |         # have to check that the set of annotators is equal, because
206 |         # they could be put out of order when loaded in
207 |         assert set(sentence_metadata_example_full['protoroles']['awareness']['annotators']) ==\
208 |             set(out_in_out['annotators'])
209 | 
210 |         assert sentence_metadata_example_full['protoroles']['awareness']['value'] ==\
211 |             out_in_out['value']
212 | 
213 |         assert sentence_metadata_example_full['protoroles']['awareness']['confidence'] ==\
214 |             out_in_out['confidence']
215 | 
216 | class TestUDSAnnotationMetadata:
217 | 
218 |     metadata = UDSAnnotationMetadata.from_dict(sentence_metadata_example)
219 |     metadata_noann = UDSAnnotationMetadata.from_dict(sentence_metadata_example_noann)
220 | 
221 |     def test_getitem(self):
222 |         self.metadata['protoroles']
223 |         self.metadata['protoroles', 'awareness']
224 |         self.metadata['protoroles']['awareness']
225 |         self.metadata['protoroles', 'awareness'].value
226 | 
227 |         with pytest.raises(TypeError):
228 |             self.metadata['protoroles', 'awareness', 'value']
229 | 
230 |     def test_add(self):
231 |         assert self.metadata == self.metadata + self.metadata
232 | 
233 |         metadatadict1 = {'protoroles': {'awareness': sentence_metadata_example['protoroles']['awareness']}}
234 |         metadatadict2 = {'protoroles': {'change_of_location': sentence_metadata_example['protoroles']['change_of_location']}}
235 | 
236 |         metadata1 = UDSAnnotationMetadata.from_dict(metadatadict1)
237 |         metadata2 = UDSAnnotationMetadata.from_dict(metadatadict2)
238 | 
239 |         metadata = metadata1 + metadata2
240 | 
241 |     def test_subspaces(self):
242 |         assert self.metadata.subspaces == {'protoroles'}
243 | 
244 |     def test_properties(self):
245 |         assert self.metadata.properties() == {'awareness',
246 |                                               'change_of_location'}
247 | 
248 |         assert self.metadata.properties('protoroles') == {'awareness',
249 |                                                           'change_of_location'}
250 | 
251 |     def test_annotators(self):
252 |         assert self.metadata.annotators() == {'protoroles-annotator-0',
253 |                                               'protoroles-annotator-1',
254 |                                               'protoroles-annotator-8',
255 |                                               'protoroles-annotator-9'}
256 | 
257 |         assert self.metadata.annotators('protoroles') == {'protoroles-annotator-0',
258 |                                                           'protoroles-annotator-1',
259 |                                                           'protoroles-annotator-8',
260 |                                                           'protoroles-annotator-9'}
261 | 
262 |         assert self.metadata.annotators('protoroles', 'awareness') == {'protoroles-annotator-8',
263 |                                                                        'protoroles-annotator-9'}
264 | 
265 | 
266 |         with pytest.raises(ValueError):
267 |             self.metadata.annotators(prop='awareness')
268 | 
269 |         assert self.metadata_noann.annotators() is None
270 | 
271 |     def test_has_annotators(self):
272 |         assert self.metadata.has_annotators()
273 |         assert self.metadata.has_annotators('protoroles')
274 |         assert self.metadata.has_annotators('protoroles', 'awareness')
275 |         assert not self.metadata_noann.has_annotators()
276 | 
277 | 
278 | class TestUDSCorpusMetadata:
279 | 
280 |     metadata = UDSAnnotationMetadata.from_dict(sentence_metadata_example)
281 | 


--------------------------------------------------------------------------------
/tests/test_vis.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os 
 3 | from predpatt import PredPatt, PredPattOpts, load_conllu
 4 | from decomp.syntax.dependency import DependencyGraphBuilder
 5 | from decomp.semantics.predpatt import PredPattGraphBuilder
 6 | from decomp.semantics.uds import UDSSentenceGraph, UDSCorpus
 7 | from decomp.vis.uds_vis import UDSVisualization
 8 | from decomp import NormalizedUDSAnnotation
 9 | import pdb 
10 | 
11 | from test_uds_graph import raw_sentence_graph, rawtree, listtree
12 | import pytest
13 | import dash 
14 | from dash.testing.application_runners import import_app
15 | 
16 | 
17 | @pytest.fixture
18 | def basic_sentence_graph(test_data_dir):
19 |     graph_data = json.load(open(os.path.join(test_data_dir, "vis_data.json")))
20 |     graph = UDSSentenceGraph.from_dict(graph_data)
21 |     return graph
22 | 
23 | def test_vis_basic(basic_sentence_graph, dash_duo):
24 |     vis = UDSVisualization(basic_sentence_graph, add_syntax_edges=True)
25 |     app = vis.serve(do_return = True)
26 |     dash_duo.start_server(app)
27 |     assert(dash_duo.find_element("title") is not None)
28 | 
29 | def test_vis_raw(raw_sentence_graph):
30 |     with pytest.raises(AttributeError):
31 |         vis = UDSVisualization(raw_sentence_graph, add_syntax_edges=True)
32 |         vis.serve()
33 | 


--------------------------------------------------------------------------------
/uds-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decompositional-semantics-initiative/decomp/efd26396118c577989ab86f5d8ffe018f5c594e1/uds-graph.png


--------------------------------------------------------------------------------