├── .gitignore
├── LICENSE
├── README.md
├── examples
    ├── examples_dmrs.py
    ├── examples_exact_matching.py
    ├── examples_mapping.py
    ├── examples_query.py
    └── examples_toy_robot.py
├── pydmrs
    ├── __config__
    │   ├── default_interface.conf
    │   └── default_simplification.conf
    ├── __init__.py
    ├── _exceptions.py
    ├── components.py
    ├── core.py
    ├── graphlang
    │   ├── __init__.py
    │   └── graphlang.py
    ├── mapping
    │   ├── __init__.py
    │   ├── mapping.py
    │   └── paraphrase.py
    ├── matching
    │   ├── __init__.py
    │   ├── aligned_matching.py
    │   ├── common.py
    │   ├── exact_matching.py
    │   ├── general_matching.py
    │   ├── match_evaluation.py
    │   └── query.py
    ├── pydelphin_interface.py
    ├── rooted.py
    ├── serial.py
    ├── simplification
    │   ├── __init__.py
    │   └── gpred_filtering.py
    ├── utils.py
    └── visualization
    │   ├── index.html
    │   └── static
    │       ├── bootstrap.min.css
    │       ├── bootstrap.min.js
    │       ├── d3.min.js
    │       ├── d3.min.js-LICENSE
    │       ├── dmrs.css
    │       ├── dmrs.js
    │       ├── jquery-1.12.3.min.js
    │       └── visualization.js
├── setup.cfg
├── setup.py
└── tests
    ├── matching
        ├── test_aligned_matching.py
        └── test_general_matching.py
    ├── test_components.py
    └── test_core.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | # Eclipse
60 | .project
61 | .pydevproject
62 | 
63 | # PyCharm
64 | .idea


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 DELPH-IN
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pydmrs
2 | 
3 | A library for manipulating DMRS structures.
4 | 
5 | ### References
6 | 
7 | - [Copestake (2009)](http://www.aclweb.org/anthology/E/E09/E09-1001.pdf)
8 | - [Copestake et al. (2016)](http://www.lrec-conf.org/proceedings/lrec2016/pdf/634_Paper.pdf)
9 | 


--------------------------------------------------------------------------------
/examples/examples_dmrs.py:
--------------------------------------------------------------------------------
  1 | from pydmrs.components import Pred, GPred, RealPred, Sortinfo, EventSortinfo, InstanceSortinfo
  2 | from pydmrs.core import Node, Link, DictDmrs
  3 | 
  4 | 
  5 | def the():
  6 |     dmrs = DictDmrs()
  7 |     dmrs.add_node(Node(pred=RealPred('the', 'q')))  # node id set automatically
  8 |     return dmrs
  9 | 
 10 | 
 11 | def the_cat():
 12 |     dmrs = DictDmrs(surface='the cat')
 13 |     dmrs.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3))
 14 |     dmrs.add_node(Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7,
 15 |                        sortinfo=InstanceSortinfo(pers='3', num='sg',
 16 |                                                  ind='+')))  # underspecified sortinfo
 17 |     dmrs.add_link(Link(start=1, end=2, rargname='RSTR', post='H'))
 18 |     return dmrs
 19 | 
 20 | 
 21 | def the_mouse():
 22 |     dmrs = DictDmrs(surface='the mouse')
 23 |     dmrs.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3))
 24 |     dmrs.add_node(Node(nodeid=2, pred=RealPred('mouse', 'n', '1'), cfrom=4, cto=9,
 25 |                        sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
 26 |     dmrs.add_link(Link(start=1, end=2, rargname='RSTR', post='H'))
 27 |     return dmrs
 28 | 
 29 | 
 30 | def dog_cat():
 31 |     dmrs = DictDmrs(surface='dog cat')
 32 |     dmrs.add_node(Node(pred=RealPred('dog', 'n', '1'), cfrom=0, cto=3,
 33 |                        sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
 34 |     dmrs.add_node(Node(pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7,
 35 |                        sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
 36 |     return dmrs
 37 | 
 38 | 
 39 | def the_dog_chases_the_cat():
 40 |     return DictDmrs(
 41 |         surface='the dog chases the cat',
 42 |         nodes=[Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3),
 43 |                Node(nodeid=2, pred=RealPred('dog', 'n', '1'), cfrom=4, cto=7,
 44 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
 45 |                Node(nodeid=3, pred=RealPred('chase', 'v', '1'), cfrom=8, cto=14,
 46 |                     sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')),
 47 |                Node(nodeid=4, pred=RealPred('the', 'q'), cfrom=15, cto=18),
 48 |                Node(nodeid=5, pred=RealPred('cat', 'n', '1'), cfrom=19, cto=22,
 49 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))],
 50 |         links=[Link(start=1, end=2, rargname='RSTR', post='H'),
 51 |                Link(start=3, end=2, rargname='ARG1', post='NEQ'),
 52 |                Link(start=3, end=5, rargname='ARG2', post='NEQ'),
 53 |                Link(start=4, end=5, rargname='RSTR', post='H')],
 54 |         index=3,
 55 |         top=3)
 56 | 
 57 | 
 58 | def the_cat_chases_the_dog():
 59 |     return DictDmrs(
 60 |         surface='the cat chases the dog',
 61 |         nodes=[Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3),
 62 |                Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7,
 63 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
 64 |                Node(nodeid=3, pred=RealPred('chase', 'v', '1'), cfrom=8, cto=14,
 65 |                     sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')),
 66 |                Node(nodeid=4, pred=RealPred('the', 'q'), cfrom=15, cto=18),
 67 |                Node(nodeid=5, pred=RealPred('dog', 'n', '1'), cfrom=19, cto=22,
 68 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))],
 69 |         links=[Link(start=1, end=2, rargname='RSTR', post='H'),
 70 |                Link(start=3, end=2, rargname='ARG1', post='NEQ'),
 71 |                Link(start=3, end=5, rargname='ARG2', post='NEQ'),
 72 |                Link(start=4, end=5, rargname='RSTR', post='H')],
 73 |         index=3,
 74 |         top=3)
 75 | 
 76 | 
 77 | def the_dog_chases_the_mouse():
 78 |     return DictDmrs(
 79 |         nodes=[Node(nodeid=1, pred=RealPred('the', 'q')),
 80 |                Node(nodeid=2, pred=RealPred('dog', 'n', '1'),
 81 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
 82 |                Node(nodeid=3, pred=RealPred('chase', 'v', '1'),
 83 |                     sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')),
 84 |                Node(nodeid=4, pred=RealPred('the', 'q')),
 85 |                Node(nodeid=5, pred=RealPred('mouse', 'n', '1'),
 86 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))],
 87 |         links=[Link(start=1, end=2, rargname='RSTR', post='H'),
 88 |                Link(start=3, end=2, rargname='ARG1', post='NEQ'),
 89 |                Link(start=3, end=5, rargname='ARG2', post='NEQ'),
 90 |                Link(start=4, end=5, rargname='RSTR', post='H')],
 91 |         index=3,
 92 |         top=3)
 93 | 
 94 | 
 95 | def the_dog_chases_the_cat_and_the_mouse():
 96 |     return DictDmrs(
 97 |         nodes=[Node(nodeid=1, pred=RealPred('the', 'q')),
 98 |                Node(nodeid=2, pred=RealPred('dog', 'n', '1'),
 99 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
100 |                Node(nodeid=3, pred=RealPred('chase', 'v', '1'),
101 |                     sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')),
102 |                Node(nodeid=4, pred=RealPred('the', 'q')),
103 |                Node(nodeid=5, pred=RealPred('cat', 'n', '1'),
104 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
105 |                Node(nodeid=6, pred=GPred('udef_q')),
106 |                Node(nodeid=7, pred=RealPred('and', 'c'),
107 |                     sortinfo=InstanceSortinfo(pers='3', num='pl')),
108 |                Node(nodeid=8, pred=RealPred('the', 'q')),
109 |                Node(nodeid=9, pred=RealPred('mouse', 'n', '1'),
110 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))],
111 |         links=[Link(start=1, end=2, rargname='RSTR', post='H'),
112 |                Link(start=3, end=2, rargname='ARG1', post='NEQ'),
113 |                Link(start=3, end=7, rargname='ARG2', post='NEQ'),
114 |                Link(start=4, end=5, rargname='RSTR', post='H'),
115 |                Link(start=6, end=7, rargname='RSTR', post='H'),
116 |                Link(start=7, end=5, rargname='L-INDEX', post='NEQ'),
117 |                Link(start=7, end=9, rargname='R-INDEX', post='NEQ'),
118 |                Link(start=8, end=9, rargname='RSTR', post='H')],
119 |         index=3,
120 |         top=3)
121 | 
122 | 
123 | def the_dog_chases_the_cat_and_the_cat_chases_the_mouse():
124 |     return DictDmrs(
125 |         nodes=[Node(nodeid=1, pred=RealPred('the', 'q')),
126 |                Node(nodeid=2, pred=RealPred('dog', 'n', '1'),
127 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
128 |                Node(nodeid=3, pred=RealPred('chase', 'v', '1'),
129 |                     sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')),
130 |                Node(nodeid=4, pred=RealPred('the', 'q')),
131 |                Node(nodeid=5, pred=RealPred('cat', 'n', '1'),
132 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
133 |                Node(nodeid=6, pred=RealPred('and', 'c'),
134 |                     sortinfo=InstanceSortinfo(pers='3', num='pl')),
135 |                Node(nodeid=7, pred=RealPred('the', 'q')),
136 |                Node(nodeid=8, pred=RealPred('cat', 'n', '1'),
137 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
138 |                Node(nodeid=9, pred=RealPred('chase', 'v', '1'),
139 |                     sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')),
140 |                Node(nodeid=10, pred=RealPred('the', 'q')),
141 |                Node(nodeid=11, pred=RealPred('mouse', 'n', '1'),
142 |                     sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))],
143 |         links=[Link(start=1, end=2, rargname='RSTR', post='H'),
144 |                Link(start=3, end=2, rargname='ARG1', post='NEQ'),
145 |                Link(start=3, end=5, rargname='ARG2', post='NEQ'),
146 |                Link(start=4, end=5, rargname='RSTR', post='H'),
147 |                Link(start=6, end=3, rargname='L-INDEX', post='NEQ'),
148 |                Link(start=6, end=3, rargname='L-HNDL', post='H'),
149 |                Link(start=6, end=9, rargname='R-INDEX', post='NEQ'),
150 |                Link(start=6, end=9, rargname='R-HNDL', post='H'),
151 |                Link(start=7, end=8, rargname='RSTR', post='H'),
152 |                Link(start=9, end=8, rargname='ARG1', post='NEQ'),
153 |                Link(start=9, end=11, rargname='ARG2', post='NEQ'),
154 |                Link(start=10, end=11, rargname='RSTR', post='H')],
155 |         index=6,
156 |         top=6)
157 | 
158 | 
159 | def predsort():
160 |     dmrs = DictDmrs()
161 |     dmrs.add_node(Node(pred=Pred(), sortinfo=Sortinfo()))  # underspecified predicate and sortinfo
162 |     return dmrs
163 | 
164 | 
165 | def noun():
166 |     dmrs = DictDmrs()
167 |     dmrs.add_node(
168 |         Node(pred=RealPred('?', 'n', 'unknown'), sortinfo=Sortinfo()))  # underspecified noun and sortinfo
169 |     return dmrs
170 | 


--------------------------------------------------------------------------------
/examples/examples_exact_matching.py:
--------------------------------------------------------------------------------
 1 | from pydmrs.matching.exact_matching import dmrs_exact_matching
 2 | import examples.examples_dmrs as examples
 3 | 
 4 | 
 5 | if __name__ == '__main__':
 6 | 
 7 |     # "the" - "the dog chases the cat"
 8 |     assert len(list(dmrs_exact_matching(examples.the(), examples.the_dog_chases_the_cat()))) == 2
 9 | 
10 |     # "the cat" - "the dog chases the cat"
11 |     assert len(list(dmrs_exact_matching(examples.the_cat(), examples.the_dog_chases_the_cat()))) == 1
12 | 
13 |     # "dog cat" - "the dog chases the cat"
14 |     assert len(list(dmrs_exact_matching(examples.dog_cat(), examples.the_dog_chases_the_cat()))) == 1
15 | 
16 |     # "the dog chases the cat" - "the dog chases the cat"
17 |     assert len(list(dmrs_exact_matching(examples.the_dog_chases_the_cat(), examples.the_dog_chases_the_cat()))) == 1
18 | 
19 |     # "the cat chases the dog" - "the dog chases the cat"
20 |     assert not len(list(dmrs_exact_matching(examples.the_cat_chases_the_dog(), examples.the_dog_chases_the_cat())))
21 | 
22 |     # "the dog chases the cat" - "the dog chases the cat and the mouse"
23 |     assert not len(list(dmrs_exact_matching(examples.the_dog_chases_the_cat(), examples.the_dog_chases_the_cat_and_the_mouse())))
24 | 
25 |     # "the dog chases the cat" - "the dog chases the cat and the cat chases the mouse"
26 |     assert len(list(dmrs_exact_matching(examples.the_dog_chases_the_cat(), examples.the_dog_chases_the_cat_and_the_cat_chases_the_mouse()))) == 1
27 | 
28 |     # "the cat" - "the dog chases the cat and the cat chases the mouse"
29 |     assert len(list(dmrs_exact_matching(examples.the_cat(), examples.the_dog_chases_the_cat_and_the_cat_chases_the_mouse()))) == 2
30 | 
31 |     # "dog cat" - "the dog chases the cat and the cat chases the mouse"
32 |     assert len(list(dmrs_exact_matching(examples.dog_cat(), examples.the_dog_chases_the_cat_and_the_cat_chases_the_mouse()))) == 2
33 | 
34 |     # predsort - "the dog chases the cat"
35 |     assert len(list(dmrs_exact_matching(examples.predsort(), examples.the_dog_chases_the_cat()))) == 5
36 | 
37 |     # noun - "the dog chases the cat"
38 |     assert len(list(dmrs_exact_matching(examples.noun(), examples.the_dog_chases_the_cat()))) == 2
39 | 


--------------------------------------------------------------------------------
/examples/examples_mapping.py:
--------------------------------------------------------------------------------
  1 | from pydmrs.pydelphin_interface import parse, generate
  2 | from pydmrs.mapping.mapping import dmrs_mapping
  3 | from pydmrs.graphlang.graphlang import parse_graphlang
  4 | import examples.examples_dmrs as examples
  5 | 
  6 | 
  7 | if __name__ == '__main__':
  8 | 
  9 |     # basic functionality
 10 |     dmrs = examples.the_dog_chases_the_cat()
 11 |     search_dmrs = parse_graphlang('[1]:_the_q')
 12 |     replace_dmrs = parse_graphlang('[1]:_a_q')
 13 | 
 14 |     # iterative, all
 15 |     assert 'A dog chases a cat.' in generate(dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=True, iterative=True, all_matches=True))
 16 |     # not iterative, all
 17 |     assert all(sent in sents for sent, sents in zip(['A dog chases the cat.', 'The dog chases a cat.'], [generate(dmrs) for dmrs in dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=True, iterative=False, all_matches=True)]))
 18 |     # iterative, not all
 19 |     assert 'A dog chases the cat.' in generate(dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=True, iterative=True, all_matches=False))
 20 |     # not iterative, not all
 21 |     assert 'A dog chases the cat.' in generate(dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=True, iterative=False, all_matches=False))
 22 |     # original dmrs did not change so far
 23 |     assert 'The dog chases the cat.' in generate(dmrs)
 24 |     # iterative, not all
 25 |     dmrs = examples.the_dog_chases_the_cat()
 26 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False, iterative=True, all_matches=False)
 27 |     assert 'A dog chases the cat.' in generate(dmrs)
 28 |     # iterative, all
 29 |     dmrs = examples.the_dog_chases_the_cat()
 30 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False, iterative=True, all_matches=True)
 31 |     assert 'A dog chases a cat.' in generate(dmrs)
 32 | 
 33 | 
 34 | 
 35 |     dmrs = parse('Kim eats and Kim sleeps.')[0]
 36 |     search_dmrs = parse_graphlang('[4]:node=1 <-1- [2]:node <-l- [1]:_and_c e? -r-> [3]:node -1-> node=1 <-- proper_q; :2 <-lh- :1 -rh-> :3')
 37 |     replace_dmrs = parse_graphlang('[4]:node <-1- [2]:node <-l- [1]:_and_c e? -r-> [3]:node -1-> :4; :2 <=lh= :1 =rh=> :3')
 38 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
 39 |     assert 'Kim eats and sleeps.' in generate(dmrs)
 40 | 
 41 | 
 42 |     # some examples inspired by examples from the AMR specification
 43 | 
 44 |     dmrs = parse('He described the mission as a failure.')[0]
 45 |     search_dmrs = parse_graphlang('[2]:node <-2- *[1]:_describe_v_as e? -3-> [3]:node')
 46 |     replace_dmrs = parse_graphlang('pronoun_q --> pron x[3sn_s] <-2- [1]:_describe_v_to e? <-2h- *_as_x_subord e[pui--] -1h-> _be_v_id e[ppi--] -1-> [2]:node; :_be_v_id -2-> [3]:node')
 47 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
 48 |     assert 'As he described it, the mission is a failure.' in generate(dmrs)
 49 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
 50 |     assert 'He described the mission as a failure.' in generate(dmrs)
 51 | 
 52 |     dmrs = parse('The boy can go.')[0]
 53 |     search_dmrs = parse_graphlang('[1]:_can_v_modal e[p????] -1h-> [2]:_v e[pui--]')
 54 |     replace_dmrs = parse_graphlang('[1]:_possible_a_for e[o????] -1h-> [2]:_v e[ppi--]')
 55 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
 56 |     assert 'It is possible that the boy goes.' in generate(dmrs)
 57 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
 58 |     assert 'The boy can go.' in generate(dmrs)
 59 | 
 60 |     dmrs = parse('The boy can\'t go.')[0]
 61 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
 62 |     assert 'It is not possible that the boy goes.' in generate(dmrs)
 63 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
 64 |     assert 'The boy can\'t go.' in generate(dmrs)
 65 | 
 66 |     dmrs = parse('The boy must go.')[0]
 67 |     search_dmrs = parse_graphlang('[1]:_must_v_modal e? -1h-> [2]:_v e[pui--]')
 68 |     replace_dmrs = parse_graphlang('[1]:_necessary_a_for e? -1h-> [2]:_v e[ppi--]')
 69 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
 70 |     assert 'It is necessary that the boy goes.' in generate(dmrs)
 71 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
 72 |     assert 'The boy must go.' in generate(dmrs)
 73 | 
 74 |     dmrs = parse('The boy should go.')[0]
 75 |     search_dmrs = parse_graphlang('[1]:_should_v_modal e? -1h-> [2]:_v e[pui--]')
 76 |     replace_dmrs = parse_graphlang('[1]:_recommend_v_to e? -2h-> [2]:_v e[ppi--]')
 77 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
 78 |     assert 'That the boy goes, is recommended.' in generate(dmrs)
 79 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
 80 |     assert 'The boy should go.' in generate(dmrs)
 81 | 
 82 |     dmrs = parse('The boy is likely to go.')[0]
 83 |     search_dmrs = parse_graphlang('[1]:_likely_a_1 e? -1h-> [2]:_v e[oui--]')
 84 |     replace_dmrs = parse_graphlang('[1]:_likely_a_1 e? -1h-> [2]:_v e[ppi--]')
 85 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
 86 |     assert 'It is likely that the boy goes.' in generate(dmrs)
 87 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
 88 |     assert 'The boy is likely to go.' in generate(dmrs)
 89 | 
 90 |     dmrs = parse('The boy would rather go.')[0]
 91 |     search_dmrs = parse_graphlang('[1]:_would_v_modal e? -1h-> [2]:_v e? <=1= _rather_a_1 i; :2 -1-> [3]:node')
 92 |     replace_dmrs = parse_graphlang('[1]:_prefer_v_to e? -2h-> [2]:_v e? -1-> [3]:node <-1- :1')
 93 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
 94 |     assert 'The boy prefers to go.' in generate(dmrs)
 95 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
 96 |     assert 'The boy would rather go.' in generate(dmrs)
 97 | 
 98 |     dmrs = parse('I don\'t have any money.')[0]
 99 |     search_dmrs = parse_graphlang('neg e[pui--] -1h-> [1]:_v e? -2-> [2]:node <-- _any_q')
100 |     replace_dmrs = parse_graphlang('[1]:_v e? -2-> [2]:node <-- _no_q')
101 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
102 |     assert 'I have no money.' in generate(dmrs)
103 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
104 |     assert 'I don\'t have any money.' in generate(dmrs)
105 | 
106 |     dmrs = parse('Kim doesn\'t like any cake.')[0]
107 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
108 |     assert 'Kim likes no cake.' in generate(dmrs)
109 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
110 |     assert 'Kim doesn\'t like any cake.' in generate(dmrs)
111 | 
112 |     dmrs = parse('The boy doesn\'t think his team will win.')[0]
113 |     search_dmrs = parse_graphlang('neg e[pui--] -1h-> [1]:_v e? -2h-> [2]:_v e?')
114 |     replace_dmrs = parse_graphlang('[1]:_v e? -2h-> neg e[pui--] -1h-> [2]:_v e?')
115 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
116 |     assert 'The boy thinks his team won\'t win.' in generate(dmrs)
117 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
118 |     assert 'The boy doesn\'t think his team will win.' in generate(dmrs)
119 | 
120 |     dmrs = parse('I don\'t believe that Kim likes cake.')[0]
121 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
122 |     assert 'I believe that Kim doesn\'t like cake.' in generate(dmrs)
123 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
124 |     assert 'I don\'t believe that Kim likes cake.' in generate(dmrs)
125 | 
126 |     dmrs = parse('I don\'t think that Kim doesn\'t like cake.')[0]
127 |     search_dmrs = parse_graphlang('neg e[pui--] -1h-> [1]:_v e? -2h-> neg e[pui--] -1h-> [2]:_v e?')
128 |     replace_dmrs = parse_graphlang('[1]:_v e? -2h-> [2]:_v e?')
129 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
130 |     assert 'I think that Kim likes cake.' in generate(dmrs)
131 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
132 |     assert 'I don\'t think that Kim doesn\'t like cake.' in generate(dmrs)
133 | 
134 | 
135 |     # Verb particle examples
136 | 
137 |     dmrs = parse('I look you up.')[0]
138 |     search_dmrs = parse_graphlang('[1]:_look_v_up e?')
139 |     replace_dmrs = parse_graphlang('[1]:_find_v_1 e?')
140 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
141 |     assert 'I find you.' in generate(dmrs)
142 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
143 |     assert 'I look you up.' in generate(dmrs)
144 | 
145 |     dmrs = parse('Kim carries on eating cake.')[0]
146 |     search_dmrs = parse_graphlang('[1]:_carry_v_on e?')
147 |     replace_dmrs = parse_graphlang('[1]:_continue_v_2 e?')
148 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
149 |     assert 'Kim continues eating cake.' in generate(dmrs)
150 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
151 |     assert 'Kim carries on eating cake.' in generate(dmrs)
152 | 
153 |     dmrs = parse('Alice passed a message on to Bob.')[0]
154 |     search_dmrs = parse_graphlang('[1]:_pass_v_on e?')
155 |     replace_dmrs = parse_graphlang('[1]:_give_v_1 e?')
156 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
157 |     assert 'Alice gave a message to Bob.' in generate(dmrs)
158 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
159 |     assert 'Alice passed a message on to Bob.' in generate(dmrs)
160 | 
161 |     dmrs = parse('Bob then gave Alice back the message.')[0]
162 |     search_dmrs = parse_graphlang('[1]:node <-2- [2]:_give_v_back e? -3-> [3]:node')
163 |     replace_dmrs = parse_graphlang('[3]:node <-2- [2]:_return_v_to e? -3-> [1]:node')
164 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
165 |     assert 'Bob then returned the message to Alice.' in generate(dmrs)
166 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
167 |     assert 'Bob then gave Alice back the message.' in generate(dmrs)
168 | 
169 |     dmrs = parse('He keeps on complaining.')[0]
170 |     search_dmrs = parse_graphlang('[2]:node <-1- [1]:_keep_v_on e? -2h-> [3]:_v e[pui-+] -1-> :2')
171 |     replace_dmrs = parse_graphlang('[1]:_continue_v_2 e? -1h-> [3]:_v e[oui--] -1-> [2]:node')
172 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
173 |     assert 'He continues to complain.' in generate(dmrs)
174 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
175 |     assert 'He keeps on complaining.' in generate(dmrs)
176 | 
177 |     dmrs = parse('He takes on great responsibility.')[0]
178 |     search_dmrs = parse_graphlang('[1]:_take_v_on e?')
179 |     replace_dmrs = parse_graphlang('[1]:_accept_v_1 e?')
180 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
181 |     assert 'He accepts great responsibility.' in generate(dmrs)
182 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
183 |     assert 'He takes on great responsibility.' in generate(dmrs)
184 | 
185 | 
186 |     # determinerless PPs
187 | 
188 |     dmrs = parse('I found you at last.')[0]
189 |     search_dmrs = parse_graphlang('[1]:_at_p e[pui--] -2-> _last_n_1 x[3s_+_] <-- idiom_q_i')
190 |     replace_dmrs = parse_graphlang('[1]:_final_a_1 e[pui--]')
191 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
192 |     assert 'I found you finally.' in generate(dmrs)
193 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
194 |     assert 'I found you at last.' in generate(dmrs)
195 | 
196 |     dmrs = parse('I am on edge.')[0]
197 |     search_dmrs = parse_graphlang('[1]:_on_p e? -2-> _edge_n_of x[3s_+_] <-- idiom_q_i')
198 |     replace_dmrs = parse_graphlang('[1]:_nervous_a_about e?')
199 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
200 |     assert 'I am nervous.' in generate(dmrs)
201 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
202 |     assert 'I am on edge.' in generate(dmrs)
203 | 
204 |     dmrs = parse('You can see the insects at close range.')[0]
205 |     search_dmrs = parse_graphlang('[1]:_at_p e[pui--] -2-> _range_n_of x[3s___] <-- udef_q; :_range_n_of <=1= _close_a_to e[p____]')
206 |     replace_dmrs = parse_graphlang('[1]:_from_p_state e[pui--] -2-> _distance_n_1 x[3s_+_] <-- _a_q; :_distance_n_1 <=1= _small_a_1 e[p____]')
207 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
208 |     assert 'You can see the insects from a small distance.' in generate(dmrs)
209 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
210 |     assert 'You can see the insects at close range.' in generate(dmrs)
211 | 
212 | 
213 |     # idioms
214 | 
215 |     dmrs = parse('Kim often took advantage of Sandy.')[0]
216 |     search_dmrs = parse_graphlang('[2]:node <-3- [1]:_take_v_of-i e? -2-> _advantage_n_i x[3s_+_] <-- idiom_q_i')
217 |     replace_dmrs = parse_graphlang('[1]:_benefit_v_from e? -2-> [2]:node')
218 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
219 |     assert 'Kim often benefitted from Sandy.' in generate(dmrs)
220 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
221 |     assert 'Kim often took advantage of Sandy.' in generate(dmrs)
222 | 
223 |     dmrs = parse('The government keeps tabs on everyone.')[0]
224 |     search_dmrs = parse_graphlang('[2]:node <-3- [1]:_keep_v_on-i e? -2-> _tabs_n_i x[3p_+_] <-- udef_q')
225 |     replace_dmrs = parse_graphlang('[1]:_watch_v_1 e? -2-> [2]:node')
226 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
227 |     assert 'The government watches everyone.' in generate(dmrs)
228 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
229 |     assert 'The government keeps tabs on everyone.' in generate(dmrs)
230 | 
231 |     dmrs = parse('I can give you a hand with your work.')[0]
232 |     search_dmrs = parse_graphlang('[2]:node <-3- [1]:_give_v_1 e? -2-> _hand_n_1 x[3s_+_] <-- _a_q')
233 |     replace_dmrs = parse_graphlang('[1]:_help_v_1 e? -2-> [2]:node')
234 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
235 |     assert 'I can help you with your work.' in generate(dmrs)
236 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
237 |     assert 'I can give you a hand with your work.' in generate(dmrs)
238 | 
239 |     dmrs = parse('The old senator kicked the bucket.')[0]
240 |     search_dmrs = parse_graphlang('[1]:_kick_v_i e? -2-> _bucket_n_1 x[3s_+_] <-- _the_q')
241 |     replace_dmrs = parse_graphlang('[1]:_die_v_1 e?')
242 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
243 |     assert 'The old senator died.' in generate(dmrs)
244 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
245 |     assert 'The old senator kicked the bucket.' in generate(dmrs)
246 | 
247 | 
248 |     # light verbs
249 | 
250 |     dmrs = parse('I give a talk on linguistics.')[0]
251 |     search_dmrs = parse_graphlang('[1]:_give_v_1 e? -2-> _talk_n_of-on x[3s_+_] <-- _a_q; :_talk_n_of-on -1-> [2]:node')
252 |     replace_dmrs = parse_graphlang('[1]:_talk_v_about e? <=1= _about_p e -2-> [2]:node')
253 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
254 |     assert 'I talk about linguistics.' in generate(dmrs)
255 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
256 |     assert 'I give a talk on linguistics.' in generate(dmrs)
257 | 
258 | 
259 |     # synonyms
260 | 
261 |     dmrs = parse('Kim loves cake.')[0]
262 |     search_dmrs = parse_graphlang('[1]:_love_v_1 e?')
263 |     replace_dmrs = parse_graphlang('[1]:_adore_v_1 e?')
264 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
265 |     assert 'Kim adores cake.' in generate(dmrs)
266 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
267 |     assert 'Kim loves cake.' in generate(dmrs)
268 | 
269 |     dmrs = parse('I like to play tennis.')[0]
270 |     search_dmrs = parse_graphlang('[1]:_like_v_1 e? -2h-> [2]:_v e[pui--]')
271 |     replace_dmrs = parse_graphlang('[1]:_enjoy_v_1 e? -2h-> [2]:_v e[pui-+]')
272 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
273 |     assert 'I enjoy playing tennis.' in generate(dmrs)
274 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
275 |     assert 'I like to play tennis.' in generate(dmrs)
276 | 
277 | 
278 |     # synonyms with re-ordering
279 | 
280 |     dmrs = parse('Kim gave a book to Sandy.')[0]
281 |     search_dmrs = parse_graphlang('[2]:node <-1- [1]:_give_v_1 e? -3-> [3]:node')
282 |     replace_dmrs = parse_graphlang('[3]:node <-1- [1]:_get_v_1 e? <=1= _from_p e -2-> [2]:node')
283 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
284 |     assert 'Sandy got a book from Kim.' in generate(dmrs)
285 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
286 |     assert 'Kim gave a book to Sandy.' in generate(dmrs)
287 | 
288 |     dmrs = parse('Kim hates spinach.')[0]
289 |     search_dmrs = parse_graphlang('[2]:node <-1- [1]:_hate_v_1 e? -2-> [3]:node')
290 |     replace_dmrs = parse_graphlang('[3]:node <-1- [1]:_disgust_v_1 e? -2-> [2]:node')
291 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
292 |     assert 'Spinach disgusts Kim.' in generate(dmrs)
293 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
294 |     assert 'Kim hates spinach.' in generate(dmrs)
295 | 
296 |     dmrs = parse('I like to play tennis.')[0]
297 |     search_dmrs = parse_graphlang('[1]:node <-1- [2]:_like_v_1 e? -2h-> [3]:_v e[pui--] -1-> :1')
298 |     replace_dmrs = parse_graphlang('udef_q --> nominalization x <-1- [2]:_make_v_cause e? -2h-> _happy_a_with e[pui__] -1-> [1]:node; :nominalization =1h=> [3]:_v e[pui-+]')
299 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
300 |     assert 'Playing tennis makes me happy.' in generate(dmrs)
301 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
302 |     assert 'I like to play tennis.' in generate(dmrs)
303 | 
304 | 
305 |     # think + subclause examples
306 | 
307 |     dmrs = parse('I think I will go.')[0]
308 |     search_dmrs = parse_graphlang('[1]:_think_v_1 e[????-] -2h-> [2]:_v e[pfi--]')
309 |     replace_dmrs = parse_graphlang('[1]:_think_v_of e[????+] -2-> nominalization x <-- udef_q; :nominalization =1h=> [2]:_v e[pui-+]')
310 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
311 |     assert 'I am thinking of me going.' in generate(dmrs)
312 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
313 |     assert 'I think I will go.' in generate(dmrs)
314 | 
315 |     dmrs = parse('I think he will go.')[0]
316 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
317 |     assert 'I am thinking of him going.' in generate(dmrs)
318 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
319 |     assert 'I think he will go.' in generate(dmrs)
320 | 
321 | 
322 |     # determinerless PP (with optional node)
323 | 
324 |     dmrs = parse('I found you at last.')[0]
325 |     search_dmrs = parse_graphlang('[1]:_at_p e[pui--] -2-> _last_n_1 x[3s_+_] <-- idiom_q_i; (2):_long_a_1 e[pui__] =1=> :_last_n_1')
326 |     replace_dmrs = parse_graphlang('[1]:_final_a_1 e[pui--]')
327 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
328 |     assert 'I found you finally.' in generate(dmrs)
329 |     dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False)
330 |     assert 'I found you at last.' in generate(dmrs)
331 | 
332 | 
333 |     # question generation (with subgraph nodes)
334 | 
335 |     dmrs = parse('Kim gave Sandy a book.')[0]
336 |     search_dmrs = parse_graphlang('*[1]:_v e[p????] -1-> {2}:node')
337 |     replace_dmrs = parse_graphlang('*[1]:_v e[q????] -1-> [2]:person x[3s___] <-- which_q')
338 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
339 |     assert 'Who gave Sandy a book?' in generate(dmrs)
340 | 
341 |     dmrs = parse('Kim gave Sandy a book.')[0]
342 |     search_dmrs = parse_graphlang('*[1]:_v e[p????] -2-> {2}:node')
343 |     replace_dmrs = parse_graphlang('*[1]:_v e[q????] -2-> [2]:thing x <-- which_q')
344 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
345 |     assert 'What did Kim give Sandy?' in generate(dmrs)
346 | 
347 |     dmrs = parse('Kim gave Sandy a book.')[0]
348 |     search_dmrs = parse_graphlang('*[1]:_v e[p????] -3-> {2}:node')
349 |     replace_dmrs = parse_graphlang('*[1]:_v e[q????] -3-> [2]:person x[3s___] <-- which_q')
350 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False)
351 |     assert 'Who did Kim give a book?' in generate(dmrs)
352 | 
353 | 
354 |     # think example (with equal constraints)
355 | 
356 |     dmrs = parse('I think I will go.')[0]
357 |     equalities = {}
358 |     search_dmrs = parse_graphlang('[1]:node=1 <-1- [2]:_think_v_1 e[????-] -2h-> [3]:_v e[pfi--] -1-> node=1', equalities=equalities)
359 |     replace_dmrs = parse_graphlang('[1]:node <-1- [2]:_think_v_of e[????+] -2-> nominalization x <-- udef_q; :nominalization =1h=> [3]:_v e[pui-+]')
360 |     dmrs_mapping(dmrs, search_dmrs, replace_dmrs, equalities=equalities, copy_dmrs=False)
361 |     assert 'I am thinking of going.' in generate(dmrs)
362 | 


--------------------------------------------------------------------------------
/examples/examples_query.py:
--------------------------------------------------------------------------------
 1 | from pydmrs.pydelphin_interface import parse
 2 | from pydmrs.matching.query import dmrs_query
 3 | 
 4 | 
 5 | if __name__ == '__main__':
 6 | 
 7 |     # basic functionality
 8 |     dmrs_list = [parse('A mouse ate the whole cheese.')[0],
 9 |                  parse('Lions eat around 15 zebras per year.')[0],
10 |                  parse('Their children eat so many sweets.')[0],
11 |                  parse('Potatoes are mostly eaten by humans.')[0]]
12 |     search_dmrs = '_?1_?_?_rel i <-1- _eat_v_1_rel e? -2-> _?2_?_?_rel i'
13 | 
14 |     # not dict, not per dmrs
15 |     results = list(dmrs_query(dmrs_list, search_dmrs, results_as_dict=False, results_per_dmrs=False))
16 |     assert len(results) == 4
17 |     assert ('mouse', 'cheese') in results
18 |     assert ('lion', 'zebra') in results
19 |     assert ('child', 'sweet') in results
20 |     assert ('human', 'potato') in results
21 |     # dict, not per dmrs
22 |     results = list(dmrs_query(dmrs_list, search_dmrs, results_as_dict=True, results_per_dmrs=False))
23 |     assert len(results) == 4
24 |     assert {'1': 'mouse', '2': 'cheese'} in results
25 |     assert {'1': 'lion', '2': 'zebra'} in results
26 |     assert {'1': 'child', '2': 'sweet'} in results
27 |     assert {'1': 'human', '2': 'potato'} in results
28 |     # not dict, per dmrs
29 |     results = list(dmrs_query(dmrs_list, search_dmrs, results_as_dict=False, results_per_dmrs=True))
30 |     assert len(results) == 4 and all(isinstance(result, list) for result in results)
31 |     assert ('mouse', 'cheese') in results[0]
32 |     assert ('lion', 'zebra') in results[1]
33 |     assert ('child', 'sweet') in results[2]
34 |     assert ('human', 'potato') in results[3]
35 |     # dict, per dmrs
36 |     results = list(dmrs_query(dmrs_list, search_dmrs, results_as_dict=True, results_per_dmrs=True))
37 |     assert len(results) == 4 and all(isinstance(result, list) for result in results)
38 |     assert {'1': 'mouse', '2': 'cheese'} in results[0]
39 |     assert {'1': 'lion', '2': 'zebra'} in results[1]
40 |     assert {'1': 'child', '2': 'sweet'} in results[2]
41 |     assert {'1': 'human', '2': 'potato'} in results[3]
42 | 


--------------------------------------------------------------------------------
/examples/examples_toy_robot.py:
--------------------------------------------------------------------------------
 1 | from copy import copy
 2 | 
 3 | from pydmrs.core import Link, LinkLabel
 4 | from pydmrs.components import Pred, RealPred, GPred
 5 | from pydmrs.simplification.gpred_filtering import gpred_filtering, DEFAULT_FILTER
 6 | #from pydmrs.mapping.mapping import dmrs_mapping
 7 | from pydmrs.graphlang.graphlang import parse_graphlang
 8 | 
 9 | # Also remove pronouns
10 | extended_filter = DEFAULT_FILTER | {GPred('pron')}
11 | 
12 | # Replace the first pred with the second:
13 | rename = [(RealPred('forwards','p'), RealPred('forward','p','dir'))]
14 | 
15 | # Replace a pair of nodes with a single node
16 | # (the first pred linked to the second pred, is replaced by the third pred)
17 | shrink = [('_left_a_1', 'ARG1/EQ', 'place_n', '_left_n_1'),
18 |           ('_right_a_1', 'ARG1/EQ', 'place_n', '_right_n_1'),
19 |           ('loc_nonsp', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'),
20 |           ('loc_nonsp', 'ARG2/NEQ', '_right_n_1', '_right_p_dir'),
21 |           ('_to_p', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'),
22 |           ('_to_p', 'ARG2/NEQ', '_right_n_1', '_right_p_dir')]
23 | 
24 | shrink = [(Pred.from_string(a),
25 |            LinkLabel.from_string(b),
26 |            Pred.from_string(c),
27 |            Pred.from_string(d)) for a,b,c,d in shrink]
28 | 
29 | def simplify(dmrs):
30 |     """
31 |     Simplify an input DMRS to a form that can be converted to robot commands
32 |     """
33 |     # Remove unnecessary GPreds (defaults, plus pronouns)
34 |     gpred_filtering(dmrs, extended_filter)
35 |     
36 |     # Remove quantifiers
37 |     for node in copy(dmrs.nodes):
38 |         if dmrs.is_quantifier(node.nodeid):
39 |             dmrs.remove_node(node.nodeid)
40 |     
41 |     # Apply mapping rules
42 |     for before, after in rename:
43 |         for node in dmrs.iter_nodes():
44 |             if node.pred == before:
45 |                 node.pred = after
46 |     
47 |     for first, label, second, new in shrink:
48 |         for node in copy(dmrs.nodes):
49 |             if node.pred == first:
50 |                 nid = node.nodeid
51 |                 for link in dmrs.get_out(nid, rargname=label.rargname, post=label.post):
52 |                     if dmrs[link.end].pred == second:
53 |                         # We've found a match 
54 |                         endid = link.end
55 |                         dmrs.remove_link(link)
56 |                         # Copy links from second node to first
57 |                         for old_link in dmrs.get_out(endid):
58 |                             dmrs.add_link(Link(nid, old_link.end, old_link.rargname, old_link.post))
59 |                         for old_link in dmrs.get_in(endid):
60 |                             dmrs.add_link(Link(old_link.start, nid, old_link.rargname, old_link.post))
61 |                         # Remove the second node and update the first
62 |                         dmrs.remove_node(link.end)
63 |                         dmrs[nid].pred = new
64 |     
65 |     return dmrs
66 | 
67 | 
68 | dmrsstring = '''
69 | _then_c -L-HNDL/H-> _drive_v_1 <-L-INDEX/NEQ- :_then_c -R-HNDL/H-> _turn_v_1 <-R-INDEX/NEQ- :_then_c;
70 | pronoun_q -RSTR/H-> pron <-1- :_drive_v_1 <=1= _forwards_p;
71 | pronoun_q -RSTR/H-> pron <-1- :_turn_v_1 <=1= loc_nonsp -2-> place_n <-RSTR/H- def_implicit_q;
72 | _left_a_1 =1=> :place_n
73 | '''
74 | dmrs = parse_graphlang(dmrsstring)
75 | dmrs.surface = 'Drive forwards then turn left'
76 | 
77 | print([(n.nodeid, n.pred) for n in dmrs.nodes])
78 | print(dmrs.links)
79 | 
80 | simplify(dmrs)
81 | 
82 | print()
83 | print([(n.nodeid, n.pred) for n in dmrs.nodes])
84 | print(dmrs.links)
85 | 
86 | 'Go forward and then turn to the left'
87 | 'Turn left at a yellow line'
88 | 'On a yellow line, turn to the left'


--------------------------------------------------------------------------------
/pydmrs/__config__/default_interface.conf:
--------------------------------------------------------------------------------
1 | [Grammar]
2 | ERG: /opt/erg/erg-1214.dat
3 | 


--------------------------------------------------------------------------------
/pydmrs/__config__/default_simplification.conf:
--------------------------------------------------------------------------------
  1 | [General Predicate Filtering]
  2 | allow_disconnected_dmrs: False
  3 | filter:
  4 |     ## Uncommented lines indicate filtered gpreds
  5 | 
  6 |     ## Quantifier-like things (*_q_* indicates a quantifier gpred)
  7 |     ## When you don't have an explicit quantifier, it's added as a grammar quantifier
  8 | 
  9 |     ## Occurs with 'he' etc., always occurs with 'pron_rel'. It doesn't shown anything interesting.
 10 |     pronoun_q_rel
 11 | 
 12 |     ## These don't occur in newer versions of the grammar anymore
 13 |     focus_d_rel,parg_d_rel
 14 | 
 15 |     ## Shows up with e.g. 'Three bark' (dogs). 'three' is being treated as a noun
 16 |     number_q_rel
 17 | 
 18 |     ## Part of an idiom (indicated by '_i_') when there is no other explicit quantifier for idiom (?)
 19 |     idiom_q_i_rel
 20 | 
 21 |     ## Quantifier for proper names. Shows distinction between explicitly quantifier proper names
 22 |     ## e.g. 'The Kim I saw yesterday' vs. 'Kim jumped up a tree' (last one has proper_q_rel)
 23 |     ## Almost always present, so it can be removed to reduce complexity.
 24 |     proper_q_rel
 25 | 
 26 |     ## If it doesn't have any other quantifier, it's this.
 27 |     ## Mostly harmless, without a strong signal. Can be removed to decrease complexity.
 28 |     udef_q_rel
 29 | 
 30 |     ## Used for things like possesive 'her', 'whose'.
 31 |     ## (investigate further! - found in dates too)
 32 |     def_explicit_q_rel,def_implicit_q_rel
 33 | 
 34 |     ## // end quantifier-like things
 35 | 
 36 |     ## Signifies when copulas can't be treated implicitly.
 37 |     ## e.g. 'Kim is president' as opposed to 'Kim is tall'
 38 |     cop_id_rel
 39 | 
 40 |     ## Signifying a gap in a sentence, e.g. 'Kim doesn't know when' [...]
 41 |     ## They are supposed to happen here. They often happen due to misparse,
 42 |     ## which is why we filter them by default.
 43 |     ellipsis_rel,ellipsis_expl_rel,elliptical_n_rel,ellipsis_ref_rel
 44 | 
 45 |     approx_grad_rel
 46 | 
 47 |     eventuality_rel
 48 |     generic_nom_rel,generic_verb_rel
 49 | 
 50 |     id_rel
 51 |     interval_rel,interval_p_end_rel,interval_p_start_rel,hour_prep_rel
 52 |     property_rel
 53 |     prpstn_to_prop_rel
 54 |     string
 55 |     timezone_p_rel
 56 |     unknown_rel
 57 |     unspec_adj_rel
 58 |     v_event_rel
 59 | 
 60 |     ## UNFILTERED THINGS:
 61 | 
 62 |     ## Analysis of things like everybody (every body)
 63 |     # every_q_rel,some_q_rel
 64 | 
 65 |     ## Question-like things
 66 | 
 67 |     ## Corresponds to any 'what', 'why'.
 68 |     ## Signals what a question is about, clauses ('Kim wondered why this is so hard')
 69 |     # which_q_rel
 70 | 
 71 |     ## The following examples are distinguished by using these two gpreds
 72 |     ## 'Kim fell where Sandy fell' vs. 'Kim fell wherever Sandy fell'
 73 |     # free_relative_q_rel,free_relative_ever_q_rel
 74 | 
 75 |     ## // end question-like things
 76 | 
 77 |     ## Signifies a person being addressed in discourse
 78 |     ## e.g. 'No, Mr. Bond, I expect you to die'
 79 |     # addressee_rel
 80 | 
 81 |     ## Signify discourse expressions such as 'Hello', 'Please'
 82 |     # greet_rel,polite_rel
 83 | 
 84 |     ## Signifies when something that's not normally used as a noun, is used as a noun
 85 |     ## e.g. 'Playing is fun'
 86 |     # nominalization_rel
 87 | 
 88 |     ## Preposition-like gpreds
 89 | 
 90 |     ## Corresponds to 'in' in what manner
 91 |     ## e.g. 'How did Kim fall' (in what manner did Kim fall?)
 92 |     # unspec_manner_rel
 93 | 
 94 |     ## Indicates noun-noun compounds, preposition-like
 95 |     ## e.g. 'Kim Smith'
 96 |     ## compound_name_rel is obsolete in the latest version (everything is compound_rel)
 97 |     # compound_rel,compound_name_rel
 98 | 
 99 |     ##
100 |     # temp_loc_x_rel,temp_rel,loc_nonsp_rel
101 | 
102 |     ## Noun-like gpreds
103 | 
104 |     ## Question-like things
105 |     # manner_rel,person_rel,reason_rel
106 |     # place_n_rel,time_n_rel
107 | 
108 |     ## Temporal gpreds, occuring with year, month, days, hours ...
109 |     ## They all (?) have cargs
110 |     # minute_rel,numbered_hour_rel,dofw_rel,dofm_rel,mofy_rel,holiday_rel,season_rel,year_range_rel,yofc_rel
111 | 
112 |     ## Signifying numbers
113 |     ## They all have cargs
114 |     # basic_card_rel,card_rel,ord_rel
115 | 
116 |     ## Signifying proper names, e.g. 'Kim'
117 |     ## They all have cargs
118 |     # named_rel,named_n_rel
119 | 
120 |     ## Signifies relations between composed numbers
121 |     ## e.g. 'two hundred and twenty-three' (two-times-hundred-plus-three-plus-twenty)
122 |     # fraction_rel,plus_rel,times_rel,num_seq_rel
123 | 
124 |     ## Signifies multiple coordination or sentence coordination without an explicit conjunction word
125 |     ## e.g. 'Kim, Sandy and Lee are smart' (between Kim and 'Sandy')
126 |     # implicit_conj_rel
127 | 
128 |     ## Signifies additional explanation/elaboration of something
129 |     ## e.g. 'Kim (Smith) is visiting'
130 |     # parenthetical_rel,appos_rel
131 | 
132 |     # measure_rel
133 |     # comp_equal_rel,comp_enough_rel,comp_less_rel,comp_not+so_rel,comp_not+too_rel,comp_rel,comp_so_rel,comp_too_rel,superl_rel
134 | 
135 |     # little-few_a_rel,much-many_a_rel
136 | 
137 |     # generic_entity_rel
138 | 
139 |     # neg_rel,poss_rel,pron_rel,subord_rel,thing_rel
140 | 
141 |     # meas_np_rel
142 |     # abstr_deg_rel
143 |     # all+too_rel
144 |     # discourse_rel
145 |     # excl_rel
146 |     # fw_seq_rel
147 |     # ne_x_rel
148 |     # part_of_rel
149 |     # of_p_rel
150 |     # recip_pro_rel
151 |     # refl_mod_rel
152 |     # with_p_rel
153 |     # relative_mod_rel
154 |     # prednom_state_rel
155 | 
156 | [Rooted Conversion]
157 | reverse_arg1:
158 |     appos_rel
159 |     parenthetical_rel
160 |     compound_name_rel
161 |     compound_rel
162 |     of_p_rel
163 |     but_p_except_rel
164 |     poss_rel
165 |     


--------------------------------------------------------------------------------
/pydmrs/__init__.py:
--------------------------------------------------------------------------------
1 | # Control what is imported using `from pydmrs import *`
2 | __all__ = ['components', 'core', 'serial', 'simplification']
3 | 


--------------------------------------------------------------------------------
/pydmrs/_exceptions.py:
--------------------------------------------------------------------------------
 1 | class PydmrsError(Exception):
 2 |     pass
 3 | 
 4 | class PydmrsTypeError(PydmrsError, TypeError):
 5 |     pass
 6 | 
 7 | class PydmrsValueError(PydmrsError, ValueError):
 8 |     pass
 9 | 
10 | class PydmrsKeyError(PydmrsError, KeyError):
11 |     pass
12 | 
13 | class PydmrsWarning(PydmrsError, Warning):
14 |     pass
15 | 
16 | class PydmrsDeprecationWarning(PydmrsWarning, DeprecationWarning):
17 |     pass


--------------------------------------------------------------------------------
/pydmrs/graphlang/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/delph-in/pydmrs/795b35dba4986fa9084eaa81fb16206cb131a752/pydmrs/graphlang/__init__.py


--------------------------------------------------------------------------------
/pydmrs/graphlang/graphlang.py:
--------------------------------------------------------------------------------
  1 | from pydmrs.components import Pred, RealPred, GPred, Sortinfo, EventSortinfo, InstanceSortinfo
  2 | from pydmrs.core import Link, Node, ListDmrs
  3 | from pydmrs.mapping.mapping import AnchorNode, OptionalNode, SubgraphNode
  4 | 
  5 | 
  6 | default_sortinfo_classes = dict(
  7 |     e=EventSortinfo,
  8 |     x=InstanceSortinfo
  9 | )
 10 | 
 11 | default_sortinfo_shortforms = dict(
 12 |     e=dict(
 13 |         sf={'p': 'prop', 'q': 'ques', 'o': 'prop-or-ques', 'c': 'comm'},
 14 |         tense={'u': 'untensed', 't': 'tensed', 'p': 'pres', 'a': 'past', 'f': 'fut'},
 15 |         mood={'i': 'indicative', 's': 'subjunctive'},
 16 |         perf={'+': '+', '-': '-'},
 17 |         prog={'+': '+', '-': '-', 'b': 'bool'}
 18 |     ),
 19 |     x=dict(
 20 |         pers={'1': '1', '2': '2', '3': '3', 'o': '1-or-3'},
 21 |         num={'s': 'sg', 'p': 'pl'},
 22 |         gend={'f': 'f', 'm': 'm', 'n': 'n', 'o': 'm-or-f'},
 23 |         ind={'+': '+', '-': '-'},
 24 |         pt={'s': 'std', 'z': 'zero', 'r': 'refl'}
 25 |     )
 26 | )
 27 | 
 28 | 
 29 | def parse_graphlang(
 30 |     string,
 31 |     cls=ListDmrs,
 32 |     queries=None,
 33 |     equalities=None,
 34 |     anchors=None,
 35 |     sortinfo_classes=None,
 36 |     sortinfo_shortforms=None
 37 | ):
 38 |     if queries is None:
 39 |         queries = {}
 40 |     if equalities is None:
 41 |         equalities = {}
 42 |     if anchors is None:
 43 |         anchors = {}
 44 |     if sortinfo_classes is None:
 45 |         sortinfo_classes = default_sortinfo_classes
 46 |         assert sortinfo_shortforms is None
 47 |         sortinfo_shortforms = default_sortinfo_shortforms
 48 |     else:
 49 |         if sortinfo_shortforms is None:
 50 |             sortinfo_shortforms = dict()
 51 |         else:
 52 |             assert all(cvarsort in sortinfo_classes for cvarsort in sortinfo_shortforms)
 53 |         assert 'i' not in sortinfo_classes
 54 |         sortinfo_classes['i'] = Sortinfo
 55 |     nodeid = 1
 56 |     nodes = []
 57 |     links = []
 58 |     index = None
 59 |     top = None
 60 |     refs = {}
 61 |     lines = (item for line in string.split('\n') for item in line.split(';') if item)
 62 |     for line in lines:
 63 |         last_id = -1
 64 |         r = 0
 65 |         start = True
 66 |         while r < len(line):
 67 |             l = r  # position of link
 68 |             while l < len(line) and line[l] == ' ':
 69 |                 l += 1
 70 |             if l >= len(line):
 71 |                 break
 72 |             if start:
 73 |                 m = l
 74 |             else:
 75 |                 m = line.index(' ', l) + 1  # position of node (+ sortinfo)
 76 |                 while line[m] == ' ':
 77 |                     m += 1
 78 |             r1 = line.find('<', m)  # position of next link
 79 |             r2 = line.find('>', m)
 80 |             if r1 < m and r2 < m:
 81 |                 r = len(line) - 1
 82 |             else:
 83 |                 if r1 < m:
 84 |                     r = r2
 85 |                 elif r1 < r2 or r2 < m:
 86 |                     r = r1
 87 |                 else:
 88 |                     r = r2
 89 |                 r = line.rindex(' ', 0, r)
 90 |             while line[r] == ' ':
 91 |                 r -= 1
 92 |             r += 1
 93 |             if line[m] == ':':
 94 |                 ref = line[m+1:r]
 95 |                 assert ref in refs, 'Invalid reference id.'
 96 |                 current_id = refs[ref]
 97 |             else:
 98 |                 # TODO: index node?
 99 |                 if line[m] == '*' and line[m+1] == '*':  # index node
100 |                     assert index is None
101 |                     index = nodeid
102 |                     m += 2
103 |                 if line[m] == '*':  # top node
104 |                     assert top is None
105 |                     top = nodeid
106 |                     m += 1
107 |                 node, ref_ids, ref_name = _parse_node(line[m:r], nodeid, queries, equalities, anchors, sortinfo_classes, sortinfo_shortforms)
108 |                 nodes.append(node)
109 |                 current_id = nodeid
110 |                 nodeid += 1
111 |                 if ref_ids is not None:
112 |                     for ref_id in ref_ids:
113 |                         refs[ref_id] = current_id
114 |                 refs[ref_name] = current_id
115 |             if not start:
116 |                 m = line.index(' ', l, m)
117 |                 link = _parse_link(line[l:m], last_id, current_id, queries, equalities)
118 |                 links.append(link)
119 |             last_id = current_id
120 |             start = False
121 |     return cls(nodes=nodes, links=links, index=index, top=top)
122 | 
123 | 
124 | special_values = ('?', '=')
125 | 
126 | 
127 | def _parse_value(string, underspecified, queries, equalities, retriever):
128 |     if not string or string[0] not in special_values:
129 |         return string
130 |     if string in special_values:
131 |         return underspecified
132 |     if string[1] == string[0]:
133 |         return string[1:]
134 |     if string[0] == '?':
135 |         assert string[1:] not in queries
136 |         queries[string[1:]] = retriever
137 |     elif string[0] == '=':
138 |         if string[1:] in equalities:
139 |             equalities[string[1:]].append(retriever)
140 |         else:
141 |             equalities[string[1:]] = [retriever]
142 |     return underspecified
143 | 
144 | 
145 | def _parse_node(string, nodeid, queries, equalities, anchors, sortinfo_classes, sortinfo_shortforms):
146 |     m = string.find('(')
147 |     if m < 0:
148 |         m = string.find(' ')
149 |     if m < 0:
150 |         l = string.find(':')
151 |     else:
152 |         l = string.find(':', 0, m)
153 |     if l < 0:
154 |         ref_ids = None
155 |         l = 0
156 |     else:
157 |         ref_ids = string[:l]
158 |         l += 1
159 |         while string[l] == ' ':
160 |             l += 1
161 |     if string[l:l+4] == 'node' and (len(string) - l == 4 or string[l+4] in special_values):
162 |         value = _parse_value(string[l+4:], None, queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]]))
163 |         assert not value
164 |         pred = Pred()
165 |         carg = '?'
166 |         sortinfo = Sortinfo()
167 |         ref_name = 'node'
168 |     elif m < 0:
169 |         pred, ref_name = _parse_pred(string[l:], nodeid, queries, equalities)
170 |         carg = None
171 |         sortinfo = None
172 |     else:
173 |         pred, ref_name = _parse_pred(string[l:m], nodeid, queries, equalities)
174 |         if string[m] == '(':
175 |             r = string.index(')', m)
176 |             if string[m+1] == '"' and string[r-1] == '"':
177 |                 carg = string[m+2:r-1]
178 |             else:
179 |                 carg = string[m+1:r]
180 |             assert '"' not in carg
181 |             carg = _parse_value(carg, '?', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].carg))
182 |             m = r + 1
183 |         else:
184 |             carg = None
185 |         if m < len(string) and string[m] == ' ':
186 |             while string[m] == ' ':
187 |                 m += 1
188 |             sortinfo = _parse_sortinfo(string[m:], nodeid, queries, equalities, sortinfo_classes, sortinfo_shortforms)
189 |         else:
190 |             sortinfo = None
191 |     if not ref_ids:
192 |         ref_ids = None
193 |         node = Node(nodeid, pred, sortinfo=sortinfo, carg=carg)
194 |     else:
195 |         if ref_ids[0] == '[' and ref_ids[-1] == ']':
196 |             ref_ids = ref_ids[1:-1].split(',')
197 |             node = AnchorNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg)
198 |         elif ref_ids[0] == '(' and ref_ids[-1] == ')':
199 |             ref_ids = ref_ids[1:-1].split(',')
200 |             node = OptionalNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg)
201 |         elif ref_ids[0] == '{' and ref_ids[-1] == '}':
202 |             ref_ids = ref_ids[1:-1].split(',')
203 |             node = SubgraphNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg)
204 |         else:
205 |             ref_ids = ref_ids.split(',')
206 |             node = Node(nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg)
207 |         for ref_id in ref_ids:
208 |             assert ref_id not in anchors, 'Reference ids have to be unique.'
209 |             anchors[ref_id] = node
210 |     return node, ref_ids, ref_name
211 | 
212 | 
213 | def _parse_pred(string, nodeid, queries, equalities):
214 |     assert string.islower(), 'Predicates must be lower-case.'
215 |     assert ' ' not in string, 'Predicates must not contain spaces.'
216 |     if string[0] == '"' and string[-1] == '"':
217 |         string = string[1:-1]
218 |     assert '"' not in string, 'Predicates must not contain quotes.'
219 |     assert string[0] != '\'', 'Predicates with opening single-quote have been deprecated.'
220 |     if (string[:4] == 'pred' and (len(string) == 4 or string[4] in special_values)) or (string[:8] == 'predsort' and (len(string) == 8 or string[8] in special_values)):
221 |         i = 8 if string[:8] == 'predsort' else 4
222 |         value = _parse_value(string[i:], None, queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred))
223 |         assert not value
224 |         return Pred(), string[:i]
225 |     rel_suffix = ''
226 |     if string[-4:] == '_rel':
227 |         string = string[:-4]
228 |         rel_suffix = '_rel'
229 |     if string[0] != '_':
230 |         name = _parse_value(string, '?', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.name))
231 |         return GPred(name), name + rel_suffix
232 |     values = string[1:].rsplit('_', 2)
233 |     count = len(values)
234 |     assert count > 0, 'Invalid number of arguments for RealPred.'
235 |     if count == 1:
236 |         values.insert(0, '?')
237 |         values.append('unknown')
238 |     elif count == 2:
239 |         values.append(None)
240 |     lemma = _parse_value(values[0], '?', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.lemma))
241 |     pos = _parse_value(values[1], 'u', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.pos))  # u ???
242 |     sense = _parse_value(values[2], 'unknown', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.sense))  # unknown ???
243 |     if count == 1:
244 |         ref_name = '_{}{}'.format(pos, rel_suffix)
245 |     elif count == 2:
246 |         ref_name = '_{}_{}{}'.format(lemma, pos, rel_suffix)
247 |     else:
248 |         ref_name = '_{}_{}_{}{}'.format(lemma, pos, sense, rel_suffix)
249 |     return RealPred(lemma, pos, sense), ref_name
250 | 
251 | 
252 | def _parse_sortinfo(string, nodeid, queries, equalities, sortinfo_classes, sortinfo_shortforms):
253 |     assert string.islower(), 'Sortinfos must be lower-case.'
254 |     assert ' ' not in string, 'Sortinfos must not contain spaces.'
255 |     if string[0] == 'i':
256 |         assert len(string) == 1, 'Sortinfo type i cannot be specified.'
257 |         return Sortinfo()
258 |     assert string[0] in sortinfo_classes
259 |     sortinfo = sortinfo_classes[string[0]]()
260 |     if len(string) == 1:
261 |         return sortinfo
262 |     shortform = sortinfo_shortforms.get(string[0], dict())
263 |     index = 1
264 |     if string[1] in special_values:
265 |         index = string.find('[')
266 |         if index > 0:
267 |             value = _parse_value(string[1:index], None, queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].sortinfo))
268 |             assert not value
269 |         else:
270 |             value = _parse_value(string[1:], None, queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].sortinfo))
271 |             assert not value
272 |         for feature in sortinfo_classes[string[0]].features:
273 |             sortinfo[feature] = 'u'
274 |         if index < 0:
275 |             return sortinfo
276 |     assert string[index] == '[' and string[-1] == ']', 'Square brackets missing.'
277 |     if '=' in string:  # explicit key-value specification
278 |         for kv in string[index + 1: -1].split(','):
279 |             key, value = kv.split('=')
280 |             if value == '_':
281 |                 value = None
282 |             elif value == '?':
283 |                 value = 'u'
284 |             elif key in shortform and value in shortform[key]:
285 |                 value = shortform[key][value]
286 |             sortinfo[key] = value
287 |         return sortinfo
288 |     else:  # implicit specification
289 |         assert index == 1  # general underspecification makes no sense
290 |         assert len(string) == len(sortinfo.features) + 3
291 |         for n, feature in enumerate(sortinfo.features, 2):
292 |             value = string[n]
293 |             if value == '_':
294 |                 value = None
295 |             elif value == '?':
296 |                 value = 'u'
297 |             elif feature in shortform and string[n] in shortform[feature]:
298 |                 value = shortform[feature][value]
299 |             sortinfo[feature] = value
300 |         return sortinfo
301 | 
302 | 
303 | def _parse_link(string, left_nodeid, right_nodeid, queries, equalities):
304 |     assert ' ' not in string, 'Links must not contain spaces.'
305 |     l = 0
306 |     r = len(string) - 1
307 |     if string[l] == '<':  # pointing left
308 |         start = right_nodeid
309 |         end = left_nodeid
310 |         l += 1
311 |     elif string[r] == '>':  # pointing right
312 |         start = left_nodeid
313 |         end = right_nodeid
314 |         r -= 1
315 |     else:  # invalid link
316 |         assert False, 'Link must have a direction.'
317 |     assert string[l] in '-=' and string[r] in '-=', 'Link line must consist of either "-" or "=".'
318 |     link_char = string[l]
319 |     while l < len(string) and string[l] == link_char:  # arbitrary left length
320 |         l += 1
321 |     while r >= 0 and string[r] == link_char:  # arbitrary right length
322 |         r -= 1
323 |     if l + 1 < r:  # explicit specification
324 |         r += 1
325 |         if string[l:r] == 'rstr':  # rargname RSTR uniquely determines post H
326 |             rargname = 'rstr'
327 |             post = 'h'
328 |         elif string[l:r] == 'eq':  # post EQ uniquely determines rargname None
329 |             rargname = None
330 |             post = 'eq'
331 |         else:
332 |             m = string.find('/', l, r)
333 |             if m >= 0:
334 |                 if l == m and m + 1 == r:
335 |                     rargname = None
336 |                     post = None
337 |                 elif l == m:
338 |                     rargname = None
339 |                     post = _parse_value(string[m+1:r], '?', queries, equalities, (lambda matching, dmrs: ','.join(link.post for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end])))
340 |                 elif m + 1 == r:
341 |                     rargname = _parse_value(string[l:m], '?', queries, equalities, (lambda matching, dmrs: ','.join(link.rargname for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end])))
342 |                     post = None
343 |                 else:
344 |                     # problem: doesn't combine rargname and post
345 |                     rargname = _parse_value(string[l:m], '?', queries, equalities, (lambda matching, dmrs: ','.join(link.rargname for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end])))
346 |                     post = _parse_value(string[m+1:r], '?', queries, equalities, (lambda matching, dmrs: ','.join(link.post for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end])))
347 |             else:
348 |                 rargname = _parse_value(string[l:r], '?', queries, equalities, (lambda matching, dmrs: ','.join(link.labelstring for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end])))
349 |                 post = None
350 |         return Link(start, end, rargname, post)
351 |     if l > r:  # no specification symbol
352 |         if link_char == '=':
353 |             rargname = None
354 |             post = 'eq'
355 |         else:
356 |             rargname = 'rstr'
357 |             post = 'h'
358 |     else:
359 |         if string[l] == '?':  # no equal constraint
360 |             rargname = '?'
361 |             post = '?'
362 |             value = _parse_value(string[l:r+1], None, queries, equalities, (lambda matching, dmrs: ','.join(link.labelstring for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end])))
363 |             assert not value
364 |         elif l == r:  # one specification symbol, i.e. variable link
365 |             if link_char == '=':
366 |                 post = 'eq'
367 |             else:
368 |                 post = 'neq'
369 |         elif l + 1 == r:  # two specification symbol, i.e. handle link
370 |             assert string[r] == 'h', 'Second link specification symbol must be "h".'
371 |             if link_char == '=':
372 |                 post = 'heq'
373 |             else:
374 |                 post = 'h'
375 |         else:
376 |             assert False  # never reached
377 |         if string[l] == 'n':  # ARG/ARGN (underspecified ARG)
378 |             rargname = 'arg'
379 |         elif string[l] in '1234':  # ARG{1,2,3,4}
380 |             rargname = 'arg' + str(string[l])
381 |         elif string[l] in 'lr':  # {L,R}-{INDEX,HNDL}
382 |             if l == r:
383 |                 rargname = str(string[l]).upper() + '-index'
384 |             else:
385 |                 rargname = str(string[l]).upper() + '-hndl'
386 |         elif string[l] != '?':
387 |             assert False, 'Invalid link specification symbol.'
388 |     return Link(start, end, rargname, post)
389 | 
390 | 
391 | if __name__ == '__main__':
392 |     import sys
393 |     assert len(sys.argv) <= 2 and sys.stdin.isatty() == (len(sys.argv) == 2), 'Invalid arguments.'
394 |     if sys.stdin.isatty():
395 |         sys.stdout.write(parse_graphlang(sys.argv[1]).dumps_xml(encoding='utf-8') + '\n')
396 |     else:
397 |         for line in sys.stdin:
398 |             sys.stdout.write(parse_graphlang(line).dumps_xml(encoding='utf-8') + '\n')
399 | 


--------------------------------------------------------------------------------
/pydmrs/mapping/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/delph-in/pydmrs/795b35dba4986fa9084eaa81fb16206cb131a752/pydmrs/mapping/__init__.py


--------------------------------------------------------------------------------
/pydmrs/mapping/mapping.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from pydmrs._exceptions import PydmrsError
  3 | from pydmrs.components import Pred, RealPred, GPred, Sortinfo, EventSortinfo, InstanceSortinfo
  4 | from pydmrs.core import Link, Node
  5 | from pydmrs.matching.exact_matching import dmrs_exact_matching
  6 | 
  7 | 
  8 | class AnchorNode(Node):
  9 |     """
 10 |     A DMRS graph node with an additional anchor id to identify anchor nodes for DMRS mapping.
 11 |     """
 12 | 
 13 |     def __init__(self, anchors, *args, **kwargs):
 14 |         """
 15 |         Create a new anchor node instance.
 16 |         """
 17 |         super().__init__(*args, **kwargs)
 18 |         self.anchors = anchors
 19 |         self.required = True
 20 |         self.requires_target = True
 21 | 
 22 |     def before_map(self, dmrs, nodeid):
 23 |         """
 24 |         Is applied before the target node is mapped.
 25 |         :param dmrs Target DMRS graph.
 26 |         :param nodeid Target node id.
 27 |         """
 28 |         pass
 29 | 
 30 |     def after_map(self, dmrs, nodeid):
 31 |         """
 32 |         Is applied after the target node is mapped.
 33 |         :param dmrs Target DMRS graph.
 34 |         :param nodeid Target node id.
 35 |         """
 36 |         pass
 37 | 
 38 |     def map(self, dmrs, nodeid, hierarchy=None):
 39 |         """
 40 |         Overrides the values of the target node if they are not underspecified in this anchor node.
 41 |         :param dmrs Target DMRS graph.
 42 |         :param nodeid Target node id.
 43 |         :param hierarchy: An optional predicate hierarchy.
 44 |         """
 45 |         node = dmrs[nodeid]
 46 |         if self == node or self.is_less_specific(node, hierarchy=hierarchy):
 47 |             return
 48 |         if isinstance(self.pred, RealPred):
 49 |             if isinstance(node.pred, RealPred):
 50 |                 node.pred = RealPred(node.pred.lemma if self.pred.lemma == '?' else self.pred.lemma, node.pred.pos if self.pred.pos in ('u', '?') else self.pred.pos, node.pred.sense if self.pred.sense in ('unknown', '?') else self.pred.sense)
 51 |             else:
 52 |                 node.pred = copy.deepcopy(self.pred)
 53 |         elif isinstance(self.pred, GPred):
 54 |             if isinstance(node.pred, GPred):
 55 |                 node.pred = GPred(node.pred.name if self.pred.name == '?' else self.pred.name)
 56 |             else:
 57 |                 node.pred = copy.deepcopy(self.pred)
 58 |         elif not isinstance(self.pred, Pred):
 59 |             node.pred = None
 60 |         if isinstance(self.sortinfo, EventSortinfo):
 61 |             if isinstance(node.sortinfo, EventSortinfo):
 62 |                 node.sortinfo = EventSortinfo(node.sortinfo.sf if self.sortinfo.sf in ('u', '?') else self.sortinfo.sf, node.sortinfo.tense if self.sortinfo.tense in ('u', '?') else self.sortinfo.tense, node.sortinfo.mood if self.sortinfo.mood in ('u', '?') else self.sortinfo.mood, node.sortinfo.perf if self.sortinfo.perf in ('u', '?') else self.sortinfo.perf, node.sortinfo.prog if self.sortinfo.prog in ('u', '?') else self.sortinfo.prog)
 63 |             else:
 64 |                 node.sortinfo = copy.deepcopy(self.sortinfo)
 65 |         elif isinstance(self.sortinfo, InstanceSortinfo):
 66 |             if isinstance(node.sortinfo, InstanceSortinfo):
 67 |                 node.sortinfo = InstanceSortinfo(node.sortinfo.pers if self.sortinfo.pers in ('u', '?') else self.sortinfo.pers, node.sortinfo.num if self.sortinfo.num in ('u', '?') else self.sortinfo.num, node.sortinfo.gend if self.sortinfo.gend in ('u', '?') else self.sortinfo.gend, node.sortinfo.ind if self.sortinfo.ind in ('u', '?') else self.sortinfo.ind, node.sortinfo.pt if self.sortinfo.pt in ('u', '?') else self.sortinfo.pt)
 68 |             else:
 69 |                 node.sortinfo = copy.deepcopy(self.sortinfo)
 70 |         elif not isinstance(self.sortinfo, Sortinfo):
 71 |             node.sortinfo = None
 72 |         if self.carg != '?':
 73 |             node.carg = self.carg
 74 | 
 75 |     def unify(self, other, hierarchy=None):
 76 |         """
 77 |         Unify nodes.
 78 |         :param other: The node to unify with.
 79 |         :param hierarchy: An optional predicate hierarchy.
 80 |         """
 81 |         hierarchy = hierarchy or dict()
 82 |         if (
 83 |             type(self.pred) is RealPred and
 84 |             type(other.pred) is RealPred and
 85 |             (self.pred.lemma == other.pred.lemma or self.pred.lemma == '?' or other.pred.lemma == '?') and
 86 |             (self.pred.pos == other.pred.pos or self.pred.pos in ('u', '?') or other.pred.pos in ('u', '?')) and
 87 |             (self.pred.sense == other.pred.sense or self.pred.sense in ('unknown', '?') or other.pred.sense in ('unknown', '?'))
 88 |         ):
 89 |             # RealPred and predicate values are either equal or underspecified
 90 |             lemma = other.pred.lemma if self.pred.lemma == '?' else self.pred.lemma
 91 |             pos = other.pred.pos if self.pred.pos in ('u', '?') else self.pred.pos
 92 |             sense = other.pred.sense if self.pred.sense in ('unknown', '?') else self.pred.sense
 93 |             self.pred = RealPred(lemma, pos, sense)
 94 |         elif (
 95 |             type(self.pred) is GPred and
 96 |             type(other.pred) is GPred and
 97 |             (self.pred.name == other.pred.name or self.pred.name == '?' or other.pred.name == '?')
 98 |         ):
 99 |             # GPred and predicate values are either equal or underspecified
100 |             name = other.pred.name if self.pred.name == '?' else self.pred.name
101 |             self.pred = GPred(name)
102 |         elif type(self.pred) is Pred or str(other.pred) in hierarchy.get(str(self.pred), ()):
103 |             # predicate is underspecified, or predicate is more general according to the hierarchy
104 |             self.pred = other.pred
105 |         elif type(other.pred) is Pred or str(self.pred) in hierarchy.get(str(other.pred), ()):
106 |             # other is underspecified, or predicate is more specific according to the hierarchy
107 |             pass
108 |         else:
109 |             raise PydmrsError("Node predicates cannot be unified: {}, {}".format(self.pred, other.pred))
110 | 
111 |         if type(self.sortinfo) is not Sortinfo and isinstance(other.sortinfo, type(self.sortinfo)) and all((self.sortinfo[key] == other.sortinfo[key]) or (self.sortinfo[key] in ('u', '?')) or (other.sortinfo[key] in ('u', '?')) for key in self.sortinfo.features):
112 |             # same sortinfo type and values are either equal or underspecified
113 |             self.sortinfo = type(self.sortinfo)(*(other.sortinfo[key] if self.sortinfo[key] in ('u', '?') else self.sortinfo[key] for key in self.sortinfo.features))
114 |         elif type(self.sortinfo) is Sortinfo and isinstance(other.sortinfo, Sortinfo):
115 |             # sortinfo is underspecified
116 |             self.sortinfo = other.sortinfo
117 |         elif type(other.sortinfo) is Sortinfo and isinstance(self.sortinfo, Sortinfo):
118 |             # other is underspecified
119 |             pass
120 |         elif self.sortinfo is None and other.sortinfo is None:
121 |             pass
122 |         else:
123 |             raise PydmrsError("Node sortinfos cannot be unified: {}, {}".format(self.sortinfo, other.sortinfo))
124 | 
125 |         if self.carg == other.carg or other.carg == '?':
126 |             # same carg, or other is underspecified
127 |             pass
128 |         elif self.carg == '?':
129 |             # carg is underspecified
130 |             self.carg = other.carg
131 |         else:
132 |             raise PydmrsError("Node cargs cannot be unified: {}, {}".format(self.carg, other.carg))
133 | 
134 | 
135 | class SubgraphNode(AnchorNode):
136 |     """
137 |     A DMRS anchor node which comprises the subgraph attached to it.
138 |     The attached subgraph consists of the nodes which are connected only via this node to the top node of the graph, and would be disconnected if the subgraph node was removed.
139 |     """
140 | 
141 |     def __init__(self, *args, **kwargs):
142 |         """
143 |         Create a new subgraph node instance.
144 |         """
145 |         super().__init__(*args, **kwargs)
146 |         self.requires_target = False
147 | 
148 |     def before_map(self, dmrs, nodeid):
149 |         """
150 |         Removes the subgraph attached to the target node.
151 |         :param dmrs Target DMRS graph (requires the top node specified).
152 |         :param nodeid Target node id.
153 |         """
154 |         assert dmrs.top is not None, 'Top node has to be specified for subgraph node to map.'
155 |         node = dmrs[nodeid]
156 |         dmrs.remove_node(nodeid)
157 |         dmrs.remove_nodes(dmrs.disconnected_nodeids())
158 |         dmrs.add_node(node)
159 | 
160 | 
161 | class OptionalNode(AnchorNode):
162 |     """
163 |     A DMRS anchor node which is not required.
164 |     """
165 | 
166 |     def __init__(self, *args, **kwargs):
167 |         """
168 |         Create a new optional node instance.
169 |         """
170 |         super().__init__(*args, **kwargs)
171 |         self.required = False
172 |         self.requires_target = False
173 | 
174 | 
175 | def dmrs_mapping(dmrs, search_dmrs, replace_dmrs, equalities=(), hierarchy=None, copy_dmrs=True, iterative=True, all_matches=True, require_connected=True, max_matches=100):
176 |     """
177 |     Performs an exact DMRS (sub)graph matching of a (sub)graph against a containing graph.
178 |     :param dmrs DMRS graph to map.
179 |     :param search_dmrs DMRS subgraph to replace.
180 |     :param replace_dmrs DMRS subgraph to replace with.
181 |     :param equalities
182 |     :param hierarchy An optional predicate hierarchy.
183 |     :param copy_dmrs True if DMRS graph argument should be copied before being mapped.
184 |     :param iterative True if all possible mappings should be performed iteratively to the same DMRS graph, instead of a separate copy per mapping (iterative=False requires copy_dmrs=True).
185 |     :param all_matches True if all possible matches should be returned, instead of only the first (or None).
186 |     :param require_connected True if mappings resulting in a disconnected DMRS graph should be ignored.
187 |     :param max_matches: Maximum number of matches.
188 |     :return Mapped DMRS graph (resp. a list of graphs in case of iterative=False and all_matches=True)
189 |     """
190 |     assert copy_dmrs or iterative, 'Invalid argument combination.'
191 | 
192 |     # extract anchor node mapping between search_dmrs and replace_dmrs
193 |     sub_mapping = {}
194 |     optional_nodeids = []
195 |     for search_node in search_dmrs.iter_nodes():
196 |         if not isinstance(search_node, AnchorNode):
197 |             continue
198 |         if not search_node.required:
199 |             optional_nodeids.append(search_node.nodeid)
200 |         for replace_node in replace_dmrs.iter_nodes():
201 |             if not isinstance(replace_node, AnchorNode) or all(anchor not in replace_node.anchors for anchor in search_node.anchors):
202 |                 continue
203 |             assert search_node.nodeid not in sub_mapping, 'Node matches multiple nodes.' + str(search_node)
204 |             sub_mapping[search_node.nodeid] = replace_node.nodeid
205 |         if search_node.nodeid not in sub_mapping:
206 |             assert not search_node.requires_target, 'Un-matched anchor node.'
207 | 
208 |     # set up variables according to settings
209 |     if iterative:
210 |         result_dmrs = copy.deepcopy(dmrs) if copy_dmrs else dmrs
211 |         matchings = dmrs_exact_matching(search_dmrs, dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True)
212 |     else:
213 |         matchings = dmrs_exact_matching(search_dmrs, dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True)
214 |     if not iterative and all_matches:
215 |         result = []
216 | 
217 |     # continue while there is a match for search_dmrs
218 |     count = 0
219 |     for _ in range(max_matches):
220 |         if iterative:
221 |             pass
222 |             # matchings = dmrs_exact_matching(search_dmrs, result_dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True)
223 |         else:
224 |             result_dmrs = copy.deepcopy(dmrs) if copy_dmrs else dmrs
225 | 
226 |         # return mapping(s) if there are no more matches left
227 |         try:
228 |             search_matching = next(matchings)
229 |             count += 1
230 |         except StopIteration:
231 |             if not all_matches:
232 |                 if copy_dmrs:
233 |                     return None
234 |                 else:
235 |                     return False
236 |             elif iterative:
237 |                 if not require_connected or result_dmrs.is_connected():
238 |                     if copy_dmrs:
239 |                         return result_dmrs
240 |                     else:
241 |                         return count > 0
242 |                 else:
243 |                     if copy_dmrs:
244 |                         return None
245 |                     else:
246 |                         return False
247 |             else:
248 |                 return result
249 | 
250 |         # remove nodes in the matched search_dmrs if they are no anchor nodes, otherwise perform mapping()
251 |         # mapping() performs the mapping process (with whatever it involves) specific to this node type (e.g. fill underspecified values)
252 |         for nodeid in search_dmrs:
253 |             search_node = search_dmrs[nodeid]
254 |             if isinstance(search_node, AnchorNode):
255 |                 search_node.before_map(result_dmrs, search_matching[nodeid])
256 |         replace_matching = {}
257 |         for nodeid in search_matching:
258 |             if nodeid in sub_mapping:
259 |                 replace_dmrs[sub_mapping[nodeid]].map(result_dmrs, search_matching[nodeid], hierarchy=hierarchy)
260 |                 replace_dmrs[sub_mapping[nodeid]].after_map(result_dmrs, search_matching[nodeid])
261 |                 replace_matching[sub_mapping[nodeid]] = search_matching[nodeid]
262 |             elif search_matching[nodeid] is not None:
263 |                 result_dmrs.remove_node(search_matching[nodeid])
264 | 
265 |         # add copies of the non-anchor nodes for the matched replace_dmrs
266 |         for nodeid in replace_dmrs:
267 |             if nodeid in replace_matching:
268 |                 continue
269 |             node = copy.deepcopy(replace_dmrs[nodeid])
270 |             node.nodeid = result_dmrs.free_nodeid()
271 |             result_dmrs.add_node(node)
272 |             replace_matching[nodeid] = node.nodeid
273 | 
274 |         # set top/index if specified in replace_dmrs
275 |         if replace_dmrs.top is not None:
276 |             result_dmrs.top = result_dmrs[replace_matching[replace_dmrs.top.nodeid]]
277 |         if replace_dmrs.index is not None:
278 |             result_dmrs.index = result_dmrs[replace_matching[replace_dmrs.index.nodeid]]
279 | 
280 |         # remove all links in the matched search_dmrs
281 |         links = []
282 |         matching_values = set(search_matching.values())
283 |         for link in result_dmrs.iter_links():
284 |             if link.start in matching_values and link.end in matching_values:
285 |                 links.append(link)
286 |         result_dmrs.remove_links(links)
287 | 
288 |         # add all links for the matched replace_dmrs
289 |         for link in replace_dmrs.iter_links():
290 |             link = Link(replace_matching[link.start], replace_matching[link.end], link.rargname, link.post)
291 |             result_dmrs.add_link(link)
292 | 
293 |         # add/return result
294 |         if not require_connected or result_dmrs.is_connected():
295 |             if all_matches and not iterative:
296 |                 result.append(result_dmrs)
297 |             elif not all_matches:
298 |                 if copy_dmrs:
299 |                     return result_dmrs
300 |                 else:
301 |                     return True
302 | 
303 |     raise Exception('More than {} matches!'.format(max_matches))
304 | 


--------------------------------------------------------------------------------
/pydmrs/mapping/paraphrase.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pydmrs.core import Dmrs, ListDmrs
 3 | from pydmrs.mapping.mapping import dmrs_mapping
 4 | from pydmrs.graphlang.graphlang import parse_graphlang
 5 | 
 6 | 
 7 | def read_paraphrases_file(filename):
 8 |     """
 9 |     """
10 |     paraphrases = []
11 |     file = open(filename, 'r')
12 |     lines = iter(file)
13 |     for line in lines:
14 |         try:
15 |             # equalities etc
16 |             paraphrases.append((parse_graphlang(line), parse_graphlang(next(lines))))
17 |         except StopIteration:
18 |             assert False, 'Invalid paraphrases file format.'
19 |         try:
20 |             assert not next(lines)
21 |         except StopIteration:
22 |             break
23 |     return paraphrases
24 | 
25 | 
26 | def paraphrase(dmrs, paraphrases, hierarchy=None):
27 |     """
28 |     """
29 |     assert isinstance(dmrs, Dmrs), 'Object in dmrs_iter is not a Dmrs.'
30 |     for (search_dmrs, replace_dmrs) in paraphrases:
31 |         paraphrased_dmrs = dmrs_mapping(dmrs, search_dmrs, replace_dmrs, hierarchy=hierarchy)
32 |         if paraphrased_dmrs is None:
33 |             break
34 |         else:
35 |             dmrs = paraphrased_dmrs
36 |     return dmrs
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     assert len(sys.argv) == 2 and not sys.stdin.isatty(), 'Invalid arguments'
41 |     paraphrases = read_paraphrases_file(sys.argv[1])
42 |     for line in sys.stdin:
43 |         dmrs = ListDmrs.loads_xml(line[:-1])
44 |         sys.stdout.write(str(paraphrase(dmrs, paraphrases)) + '\n')
45 | 


--------------------------------------------------------------------------------
/pydmrs/matching/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/delph-in/pydmrs/795b35dba4986fa9084eaa81fb16206cb131a752/pydmrs/matching/__init__.py


--------------------------------------------------------------------------------
/pydmrs/matching/aligned_matching.py:
--------------------------------------------------------------------------------
  1 | from pydmrs.core import SortDictDmrs, span_pred_key, abstractSortDictDmrs
  2 | from pydmrs.matching.common import are_equal_links
  3 | 
  4 | 
  5 | # ------------------------------------------------------------------------
  6 | def match_nodes(nodes1, nodes2, excluded=[]):
  7 |     """
  8 |     :param nodes1: A list of Nodes from the DMRS to be matched, sorted by span_pred_key.
  9 |     :param nodes2: A list of Nodes from the DMRS against which we match, sorted by span_pred_key.
 10 |     :param excluded: A list of nodeids which should not be used for matching.
 11 | 
 12 |     :return: A list of lists of nodeid pairs. The first element in the pair is from small DMRS, the second from the
 13 |     larger one. The pairs are listed in reverse span_pred_key order of the corresponding nodes.  Returns [] if no
 14 |     match found.
 15 |     """
 16 |     if not nodes1 or not nodes2:
 17 |         return []
 18 |     matches = []
 19 |     earliest = len(nodes1)
 20 |     longest = 0
 21 |     for i, node2 in enumerate(nodes2):
 22 |         if len(nodes2) - i < longest:  # Not enough nodes left to beat the current longest match.
 23 |             break
 24 |         if excluded and node2.nodeid in excluded:
 25 |             continue
 26 |         for j, node1 in enumerate(nodes1):
 27 |             if j > earliest:  # To avoid repetition.
 28 |                 break
 29 |             if node1 == node2:
 30 |                 best_matches = match_nodes(nodes1[j + 1:], nodes2[i + 1:], excluded=excluded)
 31 |                 if best_matches:
 32 |                     for match in best_matches:
 33 |                         match.append((node1.nodeid, node2.nodeid))
 34 |                 else:
 35 |                     best_matches = [[(node1.nodeid, node2.nodeid)]]
 36 |                 earliest = j
 37 |                 longest = max(longest, len(best_matches[0]))
 38 |                 matches.extend(best_matches)
 39 |     if matches:
 40 |         max_len = len(max(matches, key=len))
 41 |         return [m for m in matches if len(m) == max_len]
 42 |     else:
 43 |         return []
 44 | 
 45 | 
 46 | def add_quantifier_matches(dmrs1, dmrs2, longest_matches):
 47 |     for m in longest_matches:
 48 |         q_pairs = []
 49 |         for nodeid1, nodeid2 in m:
 50 |             try:
 51 |                 q_link1 = dmrs1.get_in(nodeid1, rargname='RSTR', post='H').pop()
 52 |                 q_link2 = dmrs2.get_in(nodeid2, rargname='RSTR', post='H').pop()
 53 |             except KeyError:
 54 |                 continue
 55 |             if dmrs1[q_link1.start] == dmrs2[q_link2.start]:
 56 |                 q_pairs.append((q_link1.start, q_link2.start))
 57 |         m.extend(q_pairs)
 58 | 
 59 | 
 60 | def get_compounds(dmrs, compound_preds):
 61 |     compounds = []
 62 |     for node in dmrs.iter_nodes():
 63 |         if str(node.pred) in compound_preds:
 64 |             arg1 = dmrs.get_out_nodes(node.nodeid, rargname='ARG1').pop().nodeid
 65 |             arg2 = dmrs.get_out_nodes(node.nodeid, rargname='ARG2').pop().nodeid
 66 |             compounds.append({"node": node, "args": (arg1, arg2)})
 67 |     return compounds
 68 | 
 69 | 
 70 | def add_compound_matches(small_dmrs, large_dmrs, longest_matches, compound_preds):
 71 |     small_compounds = get_compounds(small_dmrs, compound_preds)
 72 |     large_compounds = get_compounds(large_dmrs, compound_preds)
 73 | 
 74 |     for m in longest_matches:
 75 |         cmpd_pairs = []
 76 |         for small_cmpd in small_compounds:
 77 |             query_arg1 = None
 78 |             query_arg2 = None
 79 |             for small, large in m:
 80 |                 if small == small_cmpd['args'][0]:
 81 |                     query_arg1 = large
 82 |                 elif small == small_cmpd['args'][1]:
 83 |                     query_arg2 = large
 84 |                 if query_arg1 and query_arg2:
 85 |                     break
 86 |             else:
 87 |                 continue
 88 |             for large_cmpd in large_compounds:
 89 |                 if (query_arg1, query_arg2) == large_cmpd['args']:
 90 |                     if small_cmpd['node'] == large_cmpd['node']:
 91 |                         cmpd_pairs.append((small_cmpd['node'].nodeid, large_cmpd['node'].nodeid))
 92 |         m.extend(cmpd_pairs)
 93 | 
 94 | 
 95 | def find_extra_surface_nodeids(nodeids, large_dmrs):
 96 |     """ Finds nodeids present in the aligned matched region of the large DMRS,
 97 |         but which have no equivalents in the small DMRS.
 98 | 
 99 |         :param nodeids Nodeids from the large DMRS which have equivalents in the small one, sorted by span_pred_key of
100 |         their nodes.
101 |         :param large_dmrs The large DMRS.
102 | 
103 |         :return A list of additional nodeids sharing the span with nodeids but without equivalents in the small DMRS.
104 |     """
105 |     max_cto = large_dmrs[nodeids[-1]].cto
106 |     extra_nodeids = []
107 |     reached_start = False
108 |     reached_end = False
109 |     for i, node in enumerate(large_dmrs.nodes):
110 |         if node.nodeid == nodeids[0]:
111 |             first_overlap_orderid = i
112 |             min_cfrom = node.cfrom
113 |             max_cto = max(max_cto, node.cto)
114 |             while True and first_overlap_orderid > 0:
115 |                 prev_node = large_dmrs.nodes[first_overlap_orderid - 1]
116 |                 prev_cfrom = prev_node.cfrom
117 |                 if prev_cfrom == min_cfrom and prev_node.cto <= max_cto:
118 |                     first_overlap_orderid -= 1
119 |                     extra_nodeids.append(prev_node.nodeid)
120 |                     max_cto = max(max_cto, prev_node.cto)
121 |                 else:
122 |                     break
123 |             reached_start = True
124 |         elif not reached_start:
125 |             continue
126 |         elif reached_end and node.cfrom >= max_cto:
127 |             break
128 |         else:
129 |             max_cto = max(max_cto, node.cto)
130 |             if node.nodeid not in nodeids and node.nodeid not in extra_nodeids:
131 |                 extra_nodeids.append(node.nodeid)
132 |         if node.nodeid == nodeids[-1]:
133 |             reached_end = True
134 | 
135 |     return extra_nodeids
136 | 
137 | 
138 | def get_links(dmrs, nodeids):
139 |     """
140 |     :param dmrs: A Dmrs object.
141 |     :param nodeids: A list of nodeids.
142 |     :return: A list of all links starting and ending on a node from nodeids.
143 |     """
144 |     links = []
145 |     eq_links = set()
146 |     for nodeid in nodeids:
147 |         node_links = dmrs.get_out(nodeid)
148 |         for link in node_links:
149 |             if link.end in nodeids:
150 |                 links.append(link)
151 |         node_links = dmrs.get_eq(nodeid)
152 |         for link in node_links:
153 |             if link not in eq_links:
154 |                 eq_links.add(link)
155 |     links.extend(eq_links)
156 |     return links
157 | 
158 | 
159 | def get_subgraph(dmrs, subgraph_nodeids):
160 |     """ Returns a subgraph of dmrs containing only nodes with subgraph_nodeids and all the links between them.
161 |     :param dmrs: A Dmrs object.
162 |     :param subgraph_nodeids: A list of nodeids.
163 |     :return A SortDictDmrs containing only nodes with subgraph_nodeids and links between them.
164 |     """
165 |     nodes = [dmrs[nodeid] for nodeid in subgraph_nodeids]
166 |     return SortDictDmrs(nodes, links=get_links(dmrs, subgraph_nodeids), node_key=span_pred_key)
167 | 
168 | 
169 | # -------------------------------------------------------------------------------
170 | 
171 | def get_link_diff(small_dmrs, matched_subgraph, matching_nodeids):
172 |     """
173 |     :param small_dmrs A Dmrs which we're matching.
174 |     :param matched_subgraph A Dmrs. A subgraph of the larger DMRS returned as a match for small_dmrs.
175 |     :param matching_nodeids A list of pairs of nodeids. The first nodeid in each pair comes from small_dmrs, the second
176 |     comes from the large dmrs.
177 |     :return three list of links:
178 |         1) links present only in the small dmrs
179 |         2) links present only in the matched subgraph
180 |         3) common links.
181 |     """
182 |     both = []
183 |     small_only = []
184 |     subgraph_only = []
185 |     checked_eq_links = set()
186 |     for small_nodeid, subgraph_nodeid in matching_nodeids:
187 |         if small_nodeid:
188 |             small_links = small_dmrs.get_out(small_nodeid) | small_dmrs.get_eq(small_nodeid)
189 |             subgraph_links = list(matched_subgraph.get_out(subgraph_nodeid))
190 |             links_flag = [False] * len(subgraph_links)
191 |             for link1 in small_links:
192 |                 # Check if the EQ has been counted already.
193 |                 if not link1.rargname:
194 |                     if link1 in checked_eq_links:
195 |                         continue
196 |                     checked_eq_links.add(link1)
197 |                 match_found = False
198 |                 for link2 in subgraph_links:
199 |                     if are_equal_links(link1, link2, small_dmrs, matched_subgraph):
200 |                         both.append(link1)
201 |                         match_found = True
202 |                         links_flag[subgraph_links.index(link2)] = True
203 |                         break
204 |                 if not match_found:
205 |                     small_only.append(link1)
206 |             for i in range(0, len(subgraph_links)):
207 |                 if not links_flag[i]:
208 |                     subgraph_only.append(subgraph_links[i])
209 |         else:
210 |             subgraph_only.extend(matched_subgraph.get_out(subgraph_nodeid))
211 | 
212 |     checked_eq_links = set()
213 |     for nodeid in small_dmrs:
214 |         if nodeid not in list(zip(*matching_nodeids))[0]:
215 |             small_only.extend(small_dmrs.get_out(nodeid))
216 |             eq_links = small_dmrs.get_eq(nodeid)
217 |             small_only.extend({link for link in eq_links if link not in checked_eq_links})
218 |             checked_eq_links.update(eq_links)
219 | 
220 |     return small_only, subgraph_only, both
221 | 
222 | 
223 | # ------------------------------------------------------------------------------
224 | ## IMPORTANT ##
225 | def get_matching_nodeids(small_dmrs, large_dmrs, all_surface=False, large_excluded=None):
226 |     """ Finds matching pairs of nodeids between small_dmrs and large_dmrs. Starts by matching all
227 |         nodes but quantifiers, then matches quantifiers for nouns with matches.
228 |         :param small_dmrs A DMRS object used as a match query,
229 |         :param large_dmrs A DMRS object to be searched for a match.
230 |         :param all_surface If true, include all nodes from the aligned surface region.
231 |                            If false, find only the nodes with equivalents in small_dmrs.
232 |         :param large_excluded The nodeids from the large DMRS to be ignored during matching.
233 | 
234 |         :return A list of lists of matched nodeid pairs (small_dmrs nodeid, large_dmrs nodeid).
235 |                 A list of lists, in case more than one best match found.
236 |     """
237 |     # Convert DMRSs to SortDictDmrs with span_pred_key node if needed.
238 |     if not isinstance(small_dmrs, SortDictDmrs) or (small_dmrs.node_key != span_pred_key):
239 |         small_dmrs = small_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key))
240 |     if not isinstance(large_dmrs, SortDictDmrs) or (large_dmrs.node_key != span_pred_key):
241 |         large_dmrs = large_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key))
242 | 
243 |     # Filter quantifiers.
244 |     small_no_qs = [n for n in small_dmrs.nodes if not small_dmrs.is_quantifier(n.nodeid)]
245 |     large_no_qs = [n for n in large_dmrs.nodes if not large_dmrs.is_quantifier(n.nodeid)]
246 |     # Filter compound_name and compund predicates.
247 |     filtered_pred = ['compound', 'compound_name']
248 |     filtered_small = [n for n in small_no_qs if str(n.pred) not in filtered_pred]
249 |     filtered_large = [n for n in large_no_qs if str(n.pred) not in filtered_pred]
250 | 
251 |     longest_matches = match_nodes(filtered_small, filtered_large,
252 |                                   excluded=large_excluded)  # list of lists of nodeid pairs
253 |     add_quantifier_matches(small_dmrs, large_dmrs, longest_matches)
254 |     add_compound_matches(small_dmrs, large_dmrs, longest_matches, filtered_pred)
255 |     max_len = len(max(longest_matches, key=len)) if longest_matches else 0
256 |     longest_matches = [m for m in longest_matches if len(m) == max_len]
257 |     # Returned in reverse span_pred_key order.
258 |     all_matched_nodeids = []
259 |     for match in longest_matches:
260 |         matched_large_nodeids = list(reversed((list(zip(*match))[1])))  # span_pred_key order
261 | 
262 |         if all_surface:
263 |             extra_overlap_nodeids = find_extra_surface_nodeids(matched_large_nodeids,
264 |                                                                large_dmrs)
265 |             match.extend([(None, nodeid) for nodeid in extra_overlap_nodeids])
266 |         all_matched_nodeids.append(match)
267 | 
268 |     return all_matched_nodeids
269 | 
270 | 
271 | def get_matched_subgraph(matching_nodeids, large_dmrs):
272 |     """
273 |     :param matching_nodeids: A list of pairs of matches nodeids from the small and large dmrs.
274 |     :param large_dmrs: A Dmrs.
275 |     :return: A Dmrs. A subgraph of large_dmrs containing only nodes with nodeids in matching_nodeids.
276 |     """
277 |     present_large_nodeids = list(zip(*matching_nodeids))[1]
278 |     return get_subgraph(large_dmrs, present_large_nodeids)
279 | 
280 | 
281 | def get_best_subgraph(nodeid_matches, small_dmrs, large_dmrs):
282 |     best_fscore = 0
283 |     best_score = 0, 0, 0
284 |     best_graphs = []
285 |     for match in nodeid_matches:
286 |         subgraph = get_matched_subgraph(match, large_dmrs)
287 |         score = get_score(small_dmrs, subgraph, match)
288 |         fscore = get_fscore(*score)
289 |         if fscore > best_fscore:
290 |             best_graphs = [subgraph]
291 |             best_fscore = fscore
292 |             best_score = score
293 |         elif fscore == best_fscore:
294 |             best_graphs.append(subgraph)
295 |     return best_graphs, best_score
296 | 
297 | 
298 | def get_score(small_dmrs, matched_subgraph, matching_nodeids):
299 |     num_extra_nodes = len([pair for pair in matching_nodeids if pair[0] is None])
300 |     num_matched_nodes = len(matching_nodeids) - num_extra_nodes
301 |     num_missing_nodes = len(
302 |         [nodeid for nodeid in small_dmrs if nodeid not in list(zip(*matching_nodeids))[0]])
303 | 
304 |     only_small_links, only_subgraph_links, shared_links = get_link_diff(small_dmrs,
305 |                                                                         matched_subgraph,
306 |                                                                         matching_nodeids)
307 |     num_extra_links = len(only_subgraph_links)
308 |     num_missing_links = len(only_small_links)
309 |     num_shared_links = len(shared_links)
310 | 
311 |     num_correct = num_matched_nodes + num_shared_links
312 |     num_matched = num_correct + num_extra_links + num_extra_nodes
313 |     num_expected = num_correct + num_missing_links + num_missing_nodes
314 | 
315 |     return num_correct, num_matched, num_expected
316 | 
317 | 
318 | def get_fscore(num_correct, num_matched, num_expected):
319 |     precision = num_correct / num_matched if num_matched > 0 else 0
320 |     recall = num_correct / num_expected if num_expected > 0 else 0
321 |     return 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0  # fscore
322 | 


--------------------------------------------------------------------------------
/pydmrs/matching/common.py:
--------------------------------------------------------------------------------
 1 | def are_equal_nodes(n1, n2, underspecified=True):
 2 |     """Returns True if nodes n1 and n2 have the same predicate and sortinfo. If underspecified,
 3 |     allow underspecification."""
 4 |     if underspecified:
 5 |         if n1.is_less_specific(n2) or n2.is_less_specific(n1):
 6 |             return True
 7 |     return n1.pred == n2.pred and n1.sortinfo == n2.sortinfo and n1.carg == n2.carg
 8 | 
 9 | 
10 | def are_equal_links(l1, l2, dmrs1, dmrs2, underspecified=True):
11 |     """Returns True if links l1 and l2 have the same link label and their
12 |        starting and ending nodes respectively satisfy are_equal_nodes."""
13 |     if l1.label == l2.label:
14 |         if l1.rargname is None:
15 |             if (are_equal_nodes(dmrs1[l1.start], dmrs2[l2.start], underspecified) and
16 |                     are_equal_nodes(dmrs1[l1.end], dmrs2[l2.end], underspecified)) or (
17 |                         are_equal_nodes(dmrs1[l1.start], dmrs2[l2.end], underspecified)
18 |                     and are_equal_nodes(dmrs1[l1.end],
19 |                                         dmrs2[l2.start], underspecified)):
20 |                 return True
21 |         else:
22 |             if (are_equal_nodes(dmrs1[l1.start], dmrs2[l2.start], underspecified) and
23 |                     are_equal_nodes(dmrs1[l1.end], dmrs2[l2.end], underspecified)):
24 |                 return True
25 |     else:
26 |         return False
27 | 


--------------------------------------------------------------------------------
/pydmrs/matching/exact_matching.py:
--------------------------------------------------------------------------------
  1 | from pydmrs.core import Dmrs
  2 | 
  3 | 
  4 | def dmrs_exact_matching(sub_dmrs, dmrs, optional_nodeids=(), equalities=(), hierarchy=None, match_top_index=True):
  5 |     """
  6 |     Performs an exact DMRS (sub)graph matching of a (sub)graph against a containing graph.
  7 |     :param sub_dmrs DMRS (sub)graph to match.
  8 |     :param dmrs DMRS graph to match against.
  9 |     :param optional_nodeids
 10 |     :param equalities
 11 |     :param hierarchy An optional predicate hierarchy.
 12 |     :param match_top_index
 13 |     :return Iterator of dictionaries, mapping node ids of the matched (sub)graph to the corresponding matching node id in the containing graph.
 14 |     """
 15 |     hierarchy = hierarchy or dict()
 16 | 
 17 |     if not isinstance(sub_dmrs, Dmrs) or not isinstance(dmrs, Dmrs):
 18 |         return
 19 |     matching = {}
 20 |     matching_values = set()
 21 |     matches = {}
 22 | 
 23 |     # find matchable nodes and add unambiguous matchings
 24 |     for sub_node in sub_dmrs.iter_nodes():
 25 |         match = [node.nodeid for node in dmrs.iter_nodes() if sub_node == node or sub_node.is_less_specific(node, hierarchy=hierarchy)]
 26 |         if match:
 27 |             if sub_node.nodeid in optional_nodeids:
 28 |                 match.append(None)
 29 |             if len(match) == 1:
 30 |                 matching[sub_node.nodeid] = match[0]
 31 |                 matching_values.add(match[0])
 32 |                 continue
 33 |             matches[sub_node.nodeid] = match
 34 |         elif sub_node.nodeid not in optional_nodeids:
 35 |             return
 36 | 
 37 |     # match index and top
 38 |     if match_top_index:
 39 |         # if sub_dmrs.top is None:
 40 |         #     if dmrs.top is not None:
 41 |         #         top = dmrs.top.nodeid
 42 |         #         if top in matching.values():
 43 |         #             return
 44 |         #         for sub_nodeid, match in matches.items():
 45 |         #             if top in match:
 46 |         #                 match.remove(top)
 47 |         #             if not match and sub_nodeid not in optional_nodeids:
 48 |         #                 return
 49 |         # else:
 50 |         if sub_dmrs.top is not None:
 51 |             if dmrs.top is None:
 52 |                 return
 53 |             sub_top = sub_dmrs.top.nodeid
 54 |             top = dmrs.top.nodeid
 55 |             if sub_top in matching:
 56 |                 if matching[sub_top] != top:
 57 |                     return
 58 |             else:
 59 |                 if top in matches[sub_top]:
 60 |                     matching[sub_top] = top
 61 |                     matching_values.add(top)
 62 |                     del matches[sub_top]
 63 |                 else:
 64 |                     return
 65 |         # if sub_dmrs.index is None:
 66 |         #     if dmrs.index is not None:
 67 |         #         index = dmrs.index.nodeid
 68 |         #         if index in matching.values():
 69 |         #             return
 70 |         #         for sub_nodeid, match in matches.items():
 71 |         #             if index in match:
 72 |         #                 match.remove(index)
 73 |         #             if not match and sub_nodeid not in optional_nodeids:
 74 |         #                 return
 75 |         # else:
 76 |         if sub_dmrs.index is not None:
 77 |             if dmrs.index is None:
 78 |                 return
 79 |             sub_index = sub_dmrs.index.nodeid
 80 |             index = dmrs.index.nodeid
 81 |             if sub_index in matching:
 82 |                 if matching[sub_index] != index:
 83 |                     return
 84 |             else:
 85 |                 if index in matches[sub_index]:
 86 |                     matching[sub_index] = index
 87 |                     matching_values.add(index)
 88 |                     del matches[sub_index]
 89 |                 else:
 90 |                     return
 91 | 
 92 |     change = True
 93 |     while change:
 94 |         change = False
 95 |         for sub_nodeid, match in list(matches.items()):
 96 |             for k, m in enumerate(match):
 97 |                 if m in matching_values:
 98 |                     del match[k]
 99 |             if len(match) == 1:
100 |                 m = matches.pop(sub_nodeid)[0]
101 |                 matching[sub_nodeid] = m
102 |                 matching_values.add(m)
103 |                 change = True
104 | 
105 |     # optimisation for nodes with uniquely matching neighbour nodes
106 |     for sub_nodeid, match in list(matches.items()):
107 |         neighbours = []
108 |         for n in sub_dmrs.get_neighbours(sub_nodeid, nodeids=True):
109 |             if n not in matching:
110 |                 break
111 |             neighbours.append(matching[n])
112 |         else:  # all neighbours in sub_dmrs match uniquely
113 |             candidate = None
114 |             for nodeid in match:
115 |                 if nodeid is None:  # not possible if an optional node is present
116 |                     candidate = None
117 |                     break
118 |                 if nodeid in matching_values or any(n not in dmrs.get_neighbours(nodeid, nodeids=True) for n in neighbours):  # node is already assigned or has invalid neighbourhood
119 |                     continue
120 |                 if candidate is not None:  # can't optimise in case of more than one candidate
121 |                     break
122 |                 candidate = nodeid
123 |             else:  # loop finished (no break), i.e. candidate is unique or non-existent
124 |                 if candidate is not None:
125 |                     matching[sub_nodeid] = candidate
126 |                     matching_values.add(candidate)
127 |                     del matches[sub_nodeid]
128 | 
129 |     matches_items = list(matches.items())
130 | 
131 |     # does an exhaustive search over all the left-over matches in matches_items
132 |     def _exhaustive_search(n):
133 |         if not n:
134 |             if _check_links():
135 |                 yield matching.copy()
136 |             return
137 |         n -= 1
138 |         sub_nodeid, match = matches_items[n]
139 |         for nodeid in match:  # assign and recursively continue for every possible match
140 |             if nodeid is None or nodeid in matching_values:
141 |                 continue
142 |             matching[sub_nodeid] = nodeid
143 |             matching_values.add(nodeid)
144 |             for result in _exhaustive_search(n):
145 |                 yield result
146 |             matching_values.remove(nodeid)
147 |         matching.pop(sub_nodeid, None)
148 |         if match[-1] is None:  # without assigning if optional node is present
149 |             for result in _exhaustive_search(n):
150 |                 yield result
151 | 
152 |     # checks whether the links match within the current node matching
153 |     def _check_links():
154 |         count = 0
155 |         for l1 in dmrs.iter_links():
156 |             if l1.start not in matching_values or l1.end not in matching_values:
157 |                 continue
158 |             for l2 in sub_dmrs.iter_links():
159 |                 if (l2.rargname == '?' or l2.rargname == l1.rargname or (l1.rargname and l2.rargname == l1.rargname[:3] == 'ARG')) and (l2.post == '?' or l2.post == l1.post) and matching[l2.start] == l1.start and matching[l2.end] == l1.end:
160 |                     count += 1
161 |                     break
162 |                 # reversed directionality for None/EQ links which (so far) are undirected
163 |                 if l1.rargname is l2.rargname is None and l1.post == l2.post == 'EQ' and matching[l2.start] == l1.end and matching[l2.end] == l1.start:
164 |                     count += 1
165 |                     break
166 |             else:
167 |                 return False
168 |         return count == sub_dmrs.count_links()
169 | 
170 |     if isinstance(equalities, dict):
171 |         equalities = tuple(equalities.values())
172 |     for result in _exhaustive_search(len(matches_items)):
173 |         if all(retriever(result, dmrs) == equality[0](result, dmrs) for equality in equalities for retriever in equality):
174 |             yield result
175 | 


--------------------------------------------------------------------------------
/pydmrs/matching/general_matching.py:
--------------------------------------------------------------------------------
  1 | from itertools import product, chain
  2 | 
  3 | from pydmrs.components import RealPred
  4 | from pydmrs.core import DictDmrs
  5 | from pydmrs.matching.common import are_equal_nodes, are_equal_links
  6 | from pydmrs.matching.match_evaluation import get_fscore
  7 | 
  8 | 
  9 | class Match(object):
 10 |     """ A mapping between two DMRS objects.
 11 |         The nodeid_pairs is a list of nodeid tuples (nodeid1, nodeid2), where
 12 |         nodeid1 and nodeid2 come from different DMRS.
 13 |         The link_pairs is the link equivalent of the nodeid_pairs.
 14 |     """
 15 | 
 16 |     def __init__(self, nodeid_pairs=None, link_pairs=None):
 17 |         self.nodeid_pairs = nodeid_pairs
 18 |         self.link_pairs = link_pairs
 19 | 
 20 |     def __str__(self):
 21 |         return "Nodes:{}; Links:{}".format(self.nodeid_pairs, self.link_pairs)
 22 | 
 23 |     def __len__(self):
 24 |         return len(self.nodeid_pairs) + len(self.link_pairs)
 25 | 
 26 |     def add(self, match):
 27 |         """Combines self with match, resolving any conflicts in favour of self."""
 28 |         if self.is_compatible(match):
 29 |             self.nodeid_pairs.extend(match.nodeid_pairs)
 30 |             self.link_pairs.extend(match.link_pairs)
 31 |         else:
 32 |             nodesA, nodesB = map(list, zip(*self.nodeid_pairs))
 33 |             for node_pair in match.nodeid_pairs:
 34 |                 if node_pair[0] not in nodesA and node_pair[1] not in nodesB:
 35 |                     self.nodeid_pairs.append(node_pair)
 36 |                     nodesA.append(node_pair[0])
 37 |                     nodesB.append(node_pair[1])
 38 | 
 39 |             linksA, linksB = map(set, zip(*self.link_pairs))
 40 |             for link1, link2 in match.link_pairs:
 41 |                 if link1 not in linksA and link2 not in linksB:
 42 |                     if link1.start in nodesA and link1.end in nodesA:
 43 |                         if link2.start in nodesB and link2.end in nodesB:
 44 |                             self.link_pairs.append((link1, link2))
 45 | 
 46 |     def is_compatible(self, match2):
 47 |         """ Checks if two matches are possible simultaneously. Two matches are conflicting
 48 |             if they pair nodes differently, e.g. (10001, 10003) in self and
 49 |             (10001, 10005) in match2.
 50 |             :param match2 Another Match object.
 51 |             :return True/False
 52 |         """
 53 |         if len(self) == 0 or len(match2) == 0:
 54 |             return True
 55 |         nodeA_set1, nodeA_set2 = map(set, zip(*self.nodeid_pairs))
 56 |         nodeB_set1, nodeB_set2 = map(set, zip(*match2.nodeid_pairs))
 57 |         if nodeA_set1.isdisjoint(nodeB_set1) and nodeA_set2.isdisjoint(nodeB_set2):
 58 |             return True
 59 |         else:
 60 |             return False
 61 | 
 62 |     def get_first(self, nodeid):
 63 |         for nodeid1, nodeid2 in self.nodeid_pairs:
 64 |             if nodeid == nodeid2:
 65 |                 return nodeid1
 66 |         return None
 67 | 
 68 |     def get_second(self, nodeid):
 69 |         for nodeid1, nodeid2 in self.nodeid_pairs:
 70 |             if nodeid == nodeid1:
 71 |                 return nodeid2
 72 |         return None
 73 | 
 74 | 
 75 | # ------------------------------------------------------------------------------
 76 | def group_same_nodes(nodes):
 77 |     """ Groups nodeids of equivalent nodes into sublists, using are_equal_nodes
 78 |         as the equivalency criterion.
 79 | 
 80 |         :param nodes A list of nodes.
 81 |         :return A list of tuples (pred, id list) sorted by pred. The pred is
 82 |                 the shared predicate of the group; the id_list is a list of
 83 |                 nodeids of equivalent nodes.
 84 |     """
 85 |     grouped_nodes = []
 86 |     group_node_type = None
 87 |     current_group = []
 88 |     sorted_nodes = sorted(nodes, key=lambda n: str(n.pred))
 89 |     for node in sorted_nodes:
 90 |         if not group_node_type:
 91 |             group_node_type = node
 92 |             current_group.append(node.nodeid)
 93 |         elif are_equal_nodes(node, group_node_type, underspecified=False):
 94 |             current_group.append(node.nodeid)
 95 |         else:
 96 |             grouped_nodes.append((group_node_type.pred, current_group))
 97 |             current_group = [node.nodeid]
 98 |             group_node_type = node
 99 |     grouped_nodes.append((group_node_type.pred, current_group))
100 |     return grouped_nodes
101 | 
102 | 
103 | def pair_same_node_groups(dmrs1, dmrs2, underspecified):
104 |     """ Finds which nodes in dmrs1 are equivalent to which nodes in dmrs2. Allow the nodes in dmrs1 to be
105 |     underspecified, but not the other way.
106 |         :param dmrs1 A DMRS object. For matching, the small dmrs.
107 |         :param dmrs2 A DMRS object. For matching, the large dmrs.
108 |         :param underspecified: If True, the underspecified nodes in dmrs1 will be matched to more specific ones in
109 |                             dmrs2.
110 | 
111 |         :return A list of tuples (pred, nodes from dmrs1, nodes from dmrs2). All
112 |                 nodes in nodes from dmrs1 and nodes form dmrs2 are quivalent.
113 |                 The pred is their common predicate. The list of tuples is sorted
114 |                  by pred.
115 |     """
116 |     grouped_nodes1 = group_same_nodes(dmrs1.nodes)
117 |     grouped_nodes2 = group_same_nodes(dmrs2.nodes)
118 |     grouped_nodes = []
119 | 
120 |     for pred1, node_list1 in grouped_nodes1:
121 |         paired_nodes2 = []
122 |         for pred2, node_list2 in grouped_nodes2:
123 |             if dmrs1[node_list1[0]] == dmrs2[node_list2[0]]:
124 |                 paired_nodes2 = node_list2
125 |                 break
126 |             elif underspecified and dmrs1[node_list1[0]].is_less_specific(dmrs2[node_list2[0]]):
127 |                 paired_nodes2.extend(node_list2)
128 |         grouped_nodes.append((pred1, node_list1, paired_nodes2))
129 |     return grouped_nodes
130 | 
131 | 
132 | def extend_match(match, start_nodeids, dmrs1, dmrs2, underspecified=True):
133 |     """ Finds a match between dmrs1 and dmrs2.
134 |         :param match: A Match object to be extended.
135 |         :param start_nodeids: A tuple of matching nodeids with which to start to match extension.
136 |         :param dmrs1 A DMRS object. For matching, the small dmrs.
137 |         :param dmrs2 A DMRS object. For matching, the large dmrs.
138 |         :param underspecified: If True (default), treat underspecified nodes as equal.
139 | 
140 |         The two start nodes should be equivalent by are_equal_nodes criterion.
141 | 
142 |         The function finds any links shared by the two start nodes (equivalent
143 |         according to are"equal_links) and follows them. The pairs of nodes at
144 |         other end of the links are added to a queue. Then the function calls
145 |         itself recursively with the queued pairs of nodes as the start nodes.
146 |         The recursion stops when no shared links are found and the queue is empty.
147 | 
148 |         :return A Match composed of updated matched_nodes, matched_links.
149 |     """
150 |     match.nodeid_pairs.append(start_nodeids)
151 |     matched_first = set(x[0] for x in match.nodeid_pairs)
152 |     if match.link_pairs:
153 |         matched_links1, matched_links2 = tuple(set(x) for x in zip(*match.link_pairs))
154 |     else:
155 |         matched_links1, matched_links2 = set(), set()
156 |     node_queue = []
157 |     start_id1, start_id2 = start_nodeids
158 |     links1 = dmrs1.get_out(start_id1)
159 |     links1.update(dmrs1.get_in(start_id1))
160 |     links1.update(dmrs1.get_eq(start_id1))
161 |     links2 = dmrs2.get_out(start_id2)
162 |     links2.update(dmrs2.get_in(start_id2))
163 |     links2.update(dmrs2.get_eq(start_id2))
164 |     for link1 in links1:
165 |         if link1 not in matched_links1:
166 |             for link2 in links2:
167 |                 if link2 not in matched_links2:
168 |                     if are_equal_links(link1, link2, dmrs1, dmrs2):
169 |                         if link1.start in matched_first and match.get_second(link1.start) != link2.start:
170 |                             continue
171 |                         if link1.end in matched_first and match.get_second(link1.end) != link2.end:
172 |                             continue
173 |                         match.link_pairs.append((link1, link2))
174 |                         matched_links1.add(link1)
175 |                         matched_links2.add(link2)
176 |                         paired1 = link1.start if link1.end == start_id1 else link1.end
177 |                         paired2 = link2.start if link2.end == start_id2 else link2.end
178 |                         node_queue.append((paired1, paired2))
179 |                         break
180 | 
181 |     for nodeid1, nodeid2 in node_queue:
182 |         if (nodeid1, nodeid2) not in match.nodeid_pairs and are_equal_nodes(dmrs1[nodeid1], dmrs2[nodeid2],
183 |                                                                             underspecified):
184 |             extend_match(match, (nodeid1, nodeid2), dmrs1, dmrs2, underspecified)
185 | 
186 | 
187 | def find_all_matches(dmrs1, dmrs2, underspecified=False):
188 |     """ Finds all regions with potential matches between two DMRS graphs.
189 |         :param dmrs1 A DMRS object. For matching, the small dmrs.
190 |         :param dmrs2 A DMRS object. For matching, the large dmrs.
191 |         :param underspecified: If True, the underspecified nodes in dmrs1 will be matched to more specific ones in
192 |                             dmrs2.
193 | 
194 |         The function initiates a extend_match top call and repeats it until all
195 |         possible pairings are explored. GPreds and quantifiers 'a' and 'the'
196 |         are not allowed as the start ndoes of extend_match to narrow down the search
197 |         space.
198 | 
199 |         :return A list of Match objects where pairs come from (dmrs1, dmrs2).
200 |         """
201 |     node_pairings = pair_same_node_groups(dmrs1, dmrs2, underspecified)
202 |     matches = []
203 |     checked_node_pairs = []
204 | 
205 |     # Sort pairs so that the ones with fewer matching combination are considered first.
206 |     # Exclude GPreds and some quantifiers from the pool of start nodes.
207 |     filter_func = lambda pairing: isinstance(pairing[0], RealPred) and pairing[0].lemma not in ['a',
208 |                                                                                                 'the']
209 |     filtered_pairings = filter(filter_func, node_pairings)
210 |     sorted_pairings = sorted(filtered_pairings,
211 |                              key=lambda pairing: len(pairing[1]) * len(pairing[2]))
212 | 
213 |     if not sorted_pairings:
214 |         sorted_pairings = node_pairings
215 |     for pred, group1, group2 in sorted_pairings:
216 |         all_pairs = product(group1, group2)
217 |         for pair in all_pairs:
218 |             if pair not in checked_node_pairs and are_equal_nodes(dmrs1[pair[0]], dmrs2[pair[1]],
219 |                                                                   underspecified=underspecified):
220 |                 match = Match([], [])
221 |                 extend_match(match, (pair[0], pair[1]), dmrs1, dmrs2, underspecified)
222 |                 checked_node_pairs.extend(match.nodeid_pairs)
223 |                 matches.append(match)
224 |     return matches  # (matched_nodes, matched_links)
225 | 
226 | 
227 | def group_compatible_matches(matches):
228 |     """ Groups matches into compatible sets of indices of non-conflicting matches.
229 |         Indices are given by the positions in the matches list.
230 |         :param matches A list of Matches.
231 | 
232 |         :return A list of sets of integers. Each set is unique and contains matches indices
233 |                 of compatible Matches.
234 |     """
235 |     are_all_clashes = True
236 |     clash_pairs = []
237 |     for i in range(len(matches)):
238 |         for j in range(i + 1, len(matches)):
239 |             if i != j:
240 |                 if matches[i].is_compatible(matches[j]):
241 |                     are_all_clashes = False
242 |                 else:
243 |                     clash_pairs.append((i, j))
244 |                     clash_pairs.append((j, i))
245 | 
246 |     combinations = [{i} for i in range(len(matches))]
247 |     if are_all_clashes:
248 |         return combinations
249 | 
250 |     for i in range(len(matches)):
251 |         for comb in combinations:
252 |             if i not in comb:
253 |                 if comb.union({i}) in combinations:
254 |                     combinations.remove(comb)
255 |                     break
256 |                 clash = False
257 |                 for match_id in comb:
258 |                     if (i, match_id) in clash_pairs:
259 |                         clash = True
260 |                         break
261 |                 if not clash:
262 |                     comb.add(i)
263 |     return combinations  # list of sets
264 | 
265 | 
266 | def find_biggest_disjoint_matches(matches):
267 |     """ Finds collections of compatible matches which maximize the number of
268 |         elements matches. Returns a list in case more than one combination scores
269 |         the highest.
270 |         :param matches A list of Matches.
271 |         :return A list of tuples (group, Match, where group is a set of matches
272 |                 indices (see group_compatible_matches) and the Match combines
273 |                 all the Matches in the group.
274 |     """
275 |     compatible_groups = group_compatible_matches(matches)
276 |     best_score = 0
277 |     best_groups = None
278 |     for group in compatible_groups:
279 |         group_score = sum(len(matches[i]) for i in group)
280 |         if group_score > best_score:
281 |             best_score = group_score
282 |             best_groups = [group]
283 |         elif group_score == best_score:
284 |             best_groups.append(group)
285 | 
286 |     full_matches = []
287 |     for group in best_groups:
288 |         nodes = list(chain(*[matches[i].nodeid_pairs for i in group]))
289 |         links = list(chain(*[matches[i].link_pairs for i in group]))
290 |         full_matches.append((group, Match(nodes, links)))
291 |     return full_matches
292 | 
293 | 
294 | # -------------------------------------------------------------------------------\
295 | # IMPORTANT
296 | 
297 | def find_best_matches(small_dmrs, large_dmrs, exact=False, underspecified=False):
298 |     """ Finds the best matches between two DMRS (in case more the one reached
299 |         the same score). If disconnected matches found, it finds their optimal combination.
300 |         :param small_dmrs A DMRS object.
301 |         :param large_dmrs A DMRS object.
302 |         :param exact: If True, only look for exact perfect matches.
303 |         :param underspecified: If True, the underspecified nodes in small_dmrs will be matched to more specific ones in
304 |                             large_dmrs.
305 |         :return A list of Matches.
306 |     """
307 |     matches = find_all_matches(small_dmrs, large_dmrs, underspecified)
308 |     if not matches:
309 |         return None
310 |     else:
311 |         if exact:
312 |             return [m for m in matches if get_fscore(m, small_dmrs) == 1]
313 |         if len(matches) == 1:
314 |             return matches
315 |         best_combinations = []
316 |         indexed_best_combined_matches = find_biggest_disjoint_matches(matches)
317 |         for index, match in indexed_best_combined_matches:
318 |             leftovers = [matches[i] for i in range(len(matches)) if i not in index]
319 |             for extra_match in leftovers:
320 |                 match.add(extra_match)
321 |             best_combinations.append(match)
322 |         return best_combinations
323 | 
324 | 
325 | def get_matched_subgraph(large_dmrs, match):
326 |     """ Returns the subgraph of large_dmrs described by match.
327 |         :param large_dmrs A DMRS object in which the match was found.
328 |         :param match A Match object.
329 | 
330 |         :return A DMRS object containing only the matched elements from large_dmrs.
331 |                 The graph can be disconnected.
332 |     """
333 |     links = list(zip(*match.link_pairs))[1]
334 |     nodeids = list(zip(*match.nodeid_pairs))[1]
335 |     nodes = [large_dmrs[nodeid] for nodeid in nodeids]
336 |     return DictDmrs(nodes, links)
337 | 


--------------------------------------------------------------------------------
/pydmrs/matching/match_evaluation.py:
--------------------------------------------------------------------------------
 1 | from pydmrs._exceptions import PydmrsTypeError
 2 | 
 3 | 
 4 | def get_recall(match, dmrs):
 5 |     from pydmrs.matching.general_matching import Match
 6 |     if isinstance(match, list) and isinstance(match[0], Match):
 7 |         raise PydmrsTypeError("More than one match passed in an argument.")
 8 |     return len(match) / (len(dmrs.nodes) + len(dmrs.links))
 9 | 
10 | 
11 | def get_fscore(match, dmrs):
12 |     # Precision always 1.0. for this algorithm.
13 |     recall = get_recall(match, dmrs)
14 |     return 2 * recall / (1.0 + recall)
15 | 
16 | 
17 | def get_missing_elements(match, dmrs):
18 |     """ Returns a list of elements of dmrs for which no match was found.:
19 |         :param match A Match object.
20 |         :param dmrs A DMRS object for which the match was searched.
21 |         :return A list of nodeids and links.
22 |     """
23 |     matched_nodeids = list(zip(*match.nodeid_pairs))[1]
24 |     matched_links = list(zip(*match.link_pairs))[1]
25 |     not_matched = []
26 |     for nodeid in dmrs:
27 |         if nodeid not in matched_nodeids:
28 |             not_matched.append(nodeid)
29 |     for link in dmrs.iter_links():
30 |         if link not in matched_links:
31 |             not_matched.append(link)
32 |     return not_matched
33 | 


--------------------------------------------------------------------------------
/pydmrs/matching/query.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pydmrs.core import Dmrs, ListDmrs
 3 | from pydmrs.matching.exact_matching import dmrs_exact_matching
 4 | from pydmrs.graphlang.graphlang import parse_graphlang
 5 | 
 6 | 
 7 | # not all_matches then None if no match
 8 | def dmrs_query(dmrs_iter, search_dmrs_graphlang, results_as_dict=False, results_per_dmrs=False):
 9 |     """
10 |     Queries DMRS graphs for an underspecified (sub)graph pattern and returns the values of named wildcards (of the form "?[Identifier]") as they are specified in the queried graph.
11 |     :param dmrs_iter An iterator of DMRS graphs to query.
12 |     :param search_dmrs_graphlang The query DMRS (sub)graph, given as a GraphLang string.
13 |     :param results_as_dict True if a query result should be a dictionary, mapping identifiers to values.
14 |     :param results_per_dmrs True if a (possibly empty) list per DMRS should be returned.
15 |     :return Iterator of dicts containing the matching node ids.
16 |     """
17 |     queries = {}
18 |     search_dmrs = parse_graphlang(search_dmrs_graphlang, queries=queries)
19 |     queries = [(key, queries[key]) for key in sorted(queries)]
20 |     for dmrs in dmrs_iter:
21 |         assert isinstance(dmrs, Dmrs), 'Object in dmrs_iter is not a Dmrs.'
22 |         # perform an exact matching of search_dmrs against dmrs
23 |         matchings = dmrs_exact_matching(search_dmrs, dmrs)
24 |         if results_per_dmrs:
25 |             results = []
26 |         for matching in matchings:
27 |             # extract matched values
28 |             if results_as_dict:
29 |                 result = {key: query(matching, dmrs) for key, query in queries}
30 |             else:
31 |                 result = tuple(query(matching, dmrs) for _, query in queries)
32 |             if results_per_dmrs:
33 |                 results.append(result)
34 |             else:
35 |                 yield result
36 |         if results_per_dmrs:
37 |             yield results
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     assert len(sys.argv) == 2 and not sys.stdin.isatty(), 'Invalid arguments'
42 |     search_dmrs = sys.argv[1]
43 |     dmrs_iter = (ListDmrs.loads_xml(line[:-1]) for line in sys.stdin)
44 |     sys.stdout.write(str(next(dmrs_query(dmrs_iter, search_dmrs, results_as_dict=True))) + '\n')
45 | 


--------------------------------------------------------------------------------
/pydmrs/pydelphin_interface.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from delphin import ace
 3 | from delphin.mrs import from_dmrs
 4 | from delphin.dmrs import from_mrs
 5 | from delphin.codecs import simplemrs, dmrx
 6 | 
 7 | from pydmrs.core import ListDmrs
 8 | from pydmrs.utils import load_config, get_config_option
 9 | 
10 | DEFAULT_CONFIG_FILE = 'default_interface.conf'
11 | 
12 | config = load_config(DEFAULT_CONFIG_FILE)
13 | DEFAULT_ERG_FILE = get_config_option(config, 'Grammar', 'ERG')
14 | 
15 | 
16 | def parse(sentence, cls=ListDmrs, erg_file=DEFAULT_ERG_FILE):
17 |     results = []
18 |     for result in ace.parse(erg_file, sentence).results():  # cmdargs=['-r', 'root_informal']
19 |         mrs = result.mrs()
20 |         _dmrs = from_mrs(mrs)
21 |         dmrs_xml = dmrx.encode(_dmrs)
22 |         dmrs = cls.loads_xml(dmrs_xml)
23 |         results.append(dmrs)
24 |     return results
25 | 
26 | 
27 | def generate(dmrs, erg_file=DEFAULT_ERG_FILE):
28 |     dmrs_xml = dmrs.dumps_xml(encoding='utf-8')
29 |     _dmrs = dmrx.decode(dmrs_xml)
30 |     _mrs = from_dmrs(_dmrs)
31 |     mrs = simplemrs.encode(_mrs)
32 |     results = []
33 |     for result in ace.generate(erg_file, mrs).results():
34 |         sentence = result['surface']
35 |         results.append(sentence)
36 |     return results
37 | 


--------------------------------------------------------------------------------
/pydmrs/rooted.py:
--------------------------------------------------------------------------------
  1 | from copy import copy
  2 | from operator import attrgetter
  3 | from collections import deque
  4 | 
  5 | from pydmrs._exceptions import PydmrsError, PydmrsValueError
  6 | from pydmrs.core import Link, Pred, Dmrs, ListDmrs, DictDmrs
  7 | from pydmrs.utils import load_config, get_config_option
  8 | 
  9 | DEFAULT_CONFIG_FILE = 'default_simplification.conf'
 10 | 
 11 | config = load_config(DEFAULT_CONFIG_FILE)
 12 | REVERSE_ARG1 = frozenset(Pred.from_string(x) for x in get_config_option(config, 'Rooted Conversion', 'reverse_arg1', opt_type=list))
 13 | 
 14 | 
 15 | def reverse_link(dmrs, link):
 16 |     """
 17 |     Reverse a Link in a Dmrs graph.
 18 |     The start and end nodeids are switched,
 19 |     and "_REV" is appended to the rargname (or removed if already present)
 20 |     """
 21 |     if link.rargname[-4:] == "_REV":
 22 |         new_rargname = link.rargname[:-4]
 23 |     else:
 24 |         new_rargname = link.rargname + "_REV"
 25 |     new_link = Link(link.end, link.start, new_rargname, link.post)
 26 |     dmrs.remove_link(link)
 27 |     dmrs.add_link(new_link)
 28 |     return new_link
 29 | 
 30 | def is_root(dmrs, nodeid):
 31 |     """
 32 |     Check if a node has no incoming links
 33 |     """
 34 |     return not any(dmrs.get_in(nodeid, itr=True))
 35 | 
 36 | def is_leaf(dmrs, nodeid):
 37 |     """
 38 |     Check if a node has no outgoing links
 39 |     """
 40 |     return not any(dmrs.get_out(nodeid, itr=True))
 41 | 
 42 | def is_singleton(dmrs, nodeid):
 43 |     """
 44 |     Check if a node has no links
 45 |     """
 46 |     return not any(dmrs.get_links(nodeid, itr=True))
 47 | 
 48 | def iter_roots(dmrs):
 49 |     """
 50 |     Find all nodes with no incoming links
 51 |     """
 52 |     for n in dmrs.iter_nodes():
 53 |         if is_root(dmrs, n.nodeid):
 54 |             yield n
 55 | 
 56 | def iter_leaves(dmrs):
 57 |     """
 58 |     Find all nodes with no outgoing links
 59 |     """
 60 |     for n in dmrs.iter_nodes():
 61 |         if is_leaf(dmrs, n.nodeid):
 62 |             yield n
 63 | 
 64 | def is_rooted(dmrs, check_connected=True):
 65 |     """
 66 |     Check if a dmrs has a single root
 67 |     """
 68 |     if check_connected and not dmrs.is_connected():
 69 |         return False
 70 |     return any(iter_roots(dmrs))
 71 | 
 72 | def is_acyclic(dmrs):
 73 |     """
 74 |     Check if the graph is acyclic
 75 |     """
 76 |     return not find_cycle(dmrs)
 77 | 
 78 | def find_cycle(dmrs):
 79 |     """
 80 |     If there is a cycle, return the nodeids in the largest subgraph with no roots or leaves.
 81 |     If there is no cycle, return False 
 82 |     """
 83 |     # There are no cycles iff iteratively removing all leaves leaves nothing
 84 |     trim_leaves = trimmable(dmrs, leaves=True)
 85 |     if len(trim_leaves) == len(dmrs):
 86 |         return False
 87 |     
 88 |     # If there is a cycle, do the same with roots
 89 |     trim_roots = trimmable(dmrs, leaves=False)
 90 |     return {n.nodeid for n in dmrs.iter_nodes()} - trim_leaves - trim_roots
 91 |      
 92 | 
 93 | def trimmable(dmrs, leaves=True):
 94 |     """
 95 |     Return the nodeids that can be removed by recursively trimming
 96 |     If leaves is True (by default), trim leaves; if False, trim roots
 97 |     """
 98 |     if leaves:
 99 |         initial = iter_leaves
100 |         forward = dmrs.get_in_nodes
101 |         back = dmrs.get_out_nodes
102 |     else:
103 |         initial = iter_roots
104 |         forward = dmrs.get_out_nodes
105 |         back = dmrs.get_in_nodes
106 |     
107 |     # Iteratively remove all leaves from the graph
108 |     discard = {n.nodeid for n in initial(dmrs)}
109 |     parents = {p for leaf in discard \
110 |                for p in forward(leaf, nodeids=True, itr=True)}
111 |     n = True
112 |     while n:  # Keep removing leaves until we can't remove any more
113 |         n = 0  # Count how many leaves we can remove in this pass
114 |         next_parents = set()  # Parents for the next iteration
115 |         for mother in parents:
116 |             if back(mother, nodeids=True) - discard:  # Has non-leaf children
117 |                 next_parents.add(mother)
118 |             else:
119 |                 n += 1
120 |                 discard.add(mother)
121 |                 next_parents.update(forward(mother, nodeids=True, itr=True))
122 |         parents = next_parents
123 |     
124 |     return discard
125 | 
126 | def connected_pair(dmrs, first_id, second_id):
127 |     """
128 |     Check if a pair of nodes are connected to each other
129 |     """
130 |     cover = set()  # Nodes reachable from the first node
131 |     queue = {first_id}  # Queue of nodes to explore
132 |     while queue:
133 |         new = queue.pop()
134 |         cover.add(new)
135 |         for adjacent in dmrs.get_neighbours(new, nodeids=True, itr=True):
136 |             if adjacent == second_id:
137 |                 return True
138 |             elif adjacent not in cover:
139 |                 queue.add(adjacent)
140 |     return False
141 | 
142 | def components(dmrs):
143 |     """
144 |     Find out how many connected components are in the graph
145 |     """
146 |     comps = 0  # Number of connected components
147 |     nodeids = {n.nodeid for n in dmrs.iter_nodes()}
148 |     queue = copy(nodeids)  # Queue of nodes to explore
149 |     while queue:
150 |         comps += 1
151 |         queue = dmrs.disconnected_nodeids(removed_nodeids=(nodeids - queue))
152 |     return comps
153 | 
154 | def iter_bottom_up(dmrs, check_acyclic=True, node_key=None):
155 |     """
156 |     Iterate through the graph bottom up,
157 |     i.e. nodes are only returned once all their children have been.
158 |     By default, raises an error if the graph has cycles.
159 |     By default, nodes are sorted by nodeid (or for SortDictDmrs, by node_key)
160 |     """
161 |     # Check if the graph is acyclic
162 |     if check_acyclic and not is_acyclic(dmrs):
163 |         raise PydmrsError
164 |     
165 |     # Choose how to sort nodes
166 |     if node_key is None:
167 |         if hasattr(dmrs, 'node_key'):
168 |             node_key = dmrs.node_key
169 |         else:
170 |             node_key = attrgetter('nodeid')
171 | 
172 |     returned = set()  # Nodeids that have already been yielded
173 |     queue = deque(sorted(iter_leaves(dmrs), key=node_key))  # Nodes to be considered next
174 |     while queue:
175 |         new = queue.popleft()
176 |         if dmrs.get_out_nodes(new.nodeid, nodeids=True) - returned:  # If the node has children yet to be returned
177 |             queue.append(new)  # Put back on the queue
178 |         else:
179 |             returned.add(new.nodeid)
180 |             for parent in sorted(dmrs.get_in_nodes(new.nodeid, itr=True), key=node_key):
181 |                 if parent.nodeid not in returned and parent not in queue:
182 |                     queue.append(parent)
183 |             yield new
184 | 
185 | 
186 | def make_rooted_local(dmrs, reverse_arg1=REVERSE_ARG1):
187 |     """
188 |     Attempt to convert a DMRS graph to a rooted graph,
189 |     by reversing links based on local properties.
190 |     May leave cycles.
191 |     """
192 |     # List of links to reverse (to avoid reversing back)
193 |     to_reverse = set()
194 |     
195 |     # Reverse ARG1 links for particular predicates 
196 |     for node in dmrs.iter_nodes():
197 |         if node.pred in reverse_arg1:
198 |             arg1 = dmrs.get_out(node.nodeid, rargname='ARG1')
199 |             if len(arg1) > 1:
200 |                 raise PydmrsError('Multiple ARG1s')
201 |             to_reverse.update(arg1)
202 |     
203 |     # Reverse modifiers (EQ links)
204 |     to_reverse.update(dmrs.get_label(post='EQ'))
205 |     
206 |     # Reverse quantifiers
207 |     to_reverse.update(dmrs.get_label(rargname='RSTR'))
208 |     
209 |     # Reverse the links!
210 |     for link in to_reverse:
211 |         reverse_link(dmrs, link)
212 |     
213 |     return dmrs
214 | 
215 | def make_rooted_global(dmrs, root=None):
216 |     """
217 |     Convert a DMRS graph to a rooted graph,
218 |     by fixing one node to be the root.
219 |     If no nodeid is given, defaults to top (and then index)
220 |     May leave cycles.
221 |     """
222 |     # Decide on the root
223 |     if root is None:
224 |         if dmrs.top:
225 |             root = dmrs.top.nodeid
226 |         elif dmrs.index:
227 |             root = dmrs.index.nodeid
228 |         else:
229 |             raise PydmrsError('No root nodeid given, no top, and no index')
230 |     
231 |     previous = set()
232 |     layer = {root}
233 |     while layer:
234 |         children = set()
235 |         for nid in layer:
236 |             for link in dmrs.get_in(nid):
237 |                 if link.start not in layer and link.start not in previous:
238 |                     reverse_link(dmrs, link)
239 |             children.update(dmrs.get_out_nodes(nid, nodeids=True) - layer)
240 |         previous = layer
241 |         layer = children
242 |     
243 |     return dmrs
244 | 
245 | def make_rooted_acyclic(dmrs, reverse_arg1=REVERSE_ARG1, root=None):
246 |     """
247 |     Make a DMRS rooted and acyclic, first trying local changes, and then global changes.
248 |     May leave cycles.
249 |     """
250 |     if not dmrs.is_connected():
251 |         raise PydmrsValueError('DMRS is not connected')
252 |     
253 |     make_rooted_local(dmrs, reverse_arg1)
254 |     
255 |     if not (is_acyclic(dmrs) and is_rooted(dmrs, check_connected=False)):
256 |         make_rooted_global(dmrs, root)
257 |     
258 |     if not (is_acyclic(dmrs) and is_rooted(dmrs, check_connected=False)):
259 |         raise PydmrsError('Conversion to a rooted acyclic graph failed')
260 | 
261 | 
262 | class RootedMixin(Dmrs):
263 |     """
264 |     Allows a Dmrs class access to the above functions as class methods
265 |     """
266 |     for name, object in copy(globals()).items():
267 |         try:
268 |             if object.__module__ == __name__:
269 |                 locals()[name] = object
270 |         except AttributeError:
271 |             continue
272 | 
273 | class DictRootDmrs(RootedMixin, DictDmrs):
274 |     pass
275 | class ListRootDmrs(RootedMixin, ListDmrs):
276 |     pass
277 | 


--------------------------------------------------------------------------------
/pydmrs/serial.py:
--------------------------------------------------------------------------------
  1 | import xml.etree.ElementTree as ET
  2 | 
  3 | from pydmrs.core import Link, ListDmrs, Node
  4 | from pydmrs._exceptions import PydmrsValueError
  5 | 
  6 | 
  7 | def loads_xml(bytestring, encoding=None, cls=ListDmrs, convert_legacy_prontype=True, **kwargs):
  8 |     """
  9 |     Currently processes "<dmrs>...</dmrs>"
 10 |     To be updated for "<dmrslist>...</dmrslist>"...
 11 |     Expects a bytestring; to load from a string instead, specify encoding
 12 |     Produces a ListDmrs by default; for a different type, specify cls
 13 |     """
 14 |     if encoding:
 15 |         bytestring = bytestring.encode(encoding)
 16 |     xml = ET.XML(bytestring)
 17 | 
 18 |     dmrs = cls(**kwargs)
 19 | 
 20 |     dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None
 21 |     dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None
 22 |     dmrs.surface = xml.get('surface')
 23 |     dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None
 24 |     # top may be set as a graph attribute or as a link (see below)
 25 |     top_id = int(xml.get('top')) if 'top' in xml.attrib else None
 26 |     index_id = int(xml.get('index')) if 'index' in xml.attrib else None
 27 | 
 28 |     for elem in xml:
 29 |         if elem.tag == 'node':
 30 |             node = Node.from_xml(elem, convert_legacy_prontype)
 31 |             dmrs.add_node(node)
 32 | 
 33 |         elif elem.tag == 'link':
 34 |             link = Link.from_xml(elem)
 35 |             if link.start == 0:
 36 |                 # this would overwrite any graph-level top attribute
 37 |                 # (see above), but let's assume we won't encounter
 38 |                 # both in the same graph
 39 |                 top_id = link.end
 40 |             else:
 41 |                 dmrs.add_link(link)
 42 |         else:
 43 |             raise PydmrsValueError(elem.tag)
 44 | 
 45 |     if top_id:
 46 |         dmrs.top = dmrs[top_id]
 47 |     if index_id:
 48 |         dmrs.index = dmrs[index_id]
 49 | 
 50 |     return dmrs
 51 | 
 52 | 
 53 | def load_xml(filehandle, cls=ListDmrs, **kwargs):
 54 |     """
 55 |     Load a DMRS from a file
 56 |     NB: read file as bytes!
 57 |     Produces a ListDmrs by default; for a different type, specify cls
 58 |     """
 59 |     return cls.loads(filehandle.read(), cls=cls, **kwargs)
 60 | 
 61 | 
 62 | def dumps_xml(dmrs, encoding=None):
 63 |     """
 64 |     Currently creates "<dmrs>...</dmrs>"
 65 |     To be updated for "<dmrslist>...</dmrslist>"...
 66 |     Returns a bytestring; to return a string instead, specify encoding
 67 |     """
 68 |     xdmrs = ET.Element('dmrs')
 69 |     if dmrs.index is not None:
 70 |         xdmrs.set('index', str(dmrs.index.nodeid))
 71 |     if dmrs.cfrom is not None and dmrs.cto is not None:
 72 |         xdmrs.set('cfrom', str(dmrs.cfrom))
 73 |         xdmrs.set('cto', str(dmrs.cto))
 74 |     for nodeid in sorted(dmrs):
 75 |         node = dmrs[nodeid]
 76 |         xnode = node.to_xml()
 77 |         xdmrs.append(xnode)
 78 |     if dmrs.top is not None:
 79 |         xlink = ET.SubElement(xdmrs, 'link')
 80 |         xlink.set('from', '0')
 81 |         xlink.set('to', str(dmrs.top.nodeid))
 82 |         xrargname = ET.SubElement(xlink, 'rargname')
 83 |         xpost = ET.SubElement(xlink, 'post')
 84 |         xpost.text = 'H'
 85 |     for link in dmrs.iter_links():
 86 |         xlink = link.to_xml()
 87 |         xdmrs.append(xlink)
 88 |     bytestring = ET.tostring(xdmrs)
 89 |     if encoding:
 90 |         return bytestring.decode(encoding)
 91 |     return bytestring
 92 | 
 93 | 
 94 | def dump_xml(filehandle, dmrs):
 95 |     """
 96 |     Dump a DMRS to a file
 97 |     NB: write as a bytestring!
 98 |     """
 99 |     filehandle.write(dumps_xml(dmrs))
100 | 
101 | 
102 | def visualise(dmrs, format):
103 |     """
104 |     Returns the bytestring of the chosen visualisation representation.
105 |     Supported formats: dot
106 |     """
107 |     if format == 'dot':
108 |         dot_strs = []
109 |         dot_strs.append('digraph g {\n')
110 |         if dmrs.top is not None:
111 |             dot_strs.append('NodeTop [label="top",style=bold];\n')
112 |         dot_strs.append('node[shape=box];\n')
113 |         for nodeid in dmrs:
114 |             node = dmrs[nodeid]
115 |             dot_strs.append('Node{} [label=<{}{}<BR /><FONT POINT-SIZE="10">{}</FONT>>];\n'.format(str(nodeid).replace('-', 'M'), node.pred, '("{}")'.format(node.carg) if node.carg else '', node.sortinfo))
116 |         dot_strs.append('edge[fontsize=10];\n')
117 |         if dmrs.top is not None:
118 |             dot_strs.append('NodeTop -> Node{} [style=dotted];\n'.format(str(dmrs.top.nodeid).replace('-', 'M')))
119 |         for link in dmrs.links:
120 |             dot_strs.append('Node{} -> Node{} [label="{}"];\n'.format(str(link.start).replace('-', 'M'), str(link.end).replace('-', 'M'), link.labelstring))
121 |         dot_strs.append('}\n')
122 |         dot_str = ''.join(dot_strs)
123 |         return dot_str.encode()
124 |     else:
125 |         raise PydmrsValueError('Visualisation format not supported')
126 | 


--------------------------------------------------------------------------------
/pydmrs/simplification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/delph-in/pydmrs/795b35dba4986fa9084eaa81fb16206cb131a752/pydmrs/simplification/__init__.py


--------------------------------------------------------------------------------
/pydmrs/simplification/gpred_filtering.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pydmrs.components import GPred
 3 | from pydmrs.serial import loads_xml, dumps_xml
 4 | from pydmrs.utils import get_config_option, load_config, split_dmrs_string
 5 | 
 6 | DEFAULT_CONFIG_FILE = 'default_simplification.conf'
 7 | 
 8 | # If run from the command line, load the specified file
 9 | # Otherwise, load the default file
10 | 
11 | if __name__ == '__main__':
12 |     parser = argparse.ArgumentParser(description='DMRS simplification tool')
13 |     parser.add_argument('-c', '--config', default=None,
14 |                         help='Path to simplifaction configuration file. By default, configuration in __config__/default_simplification.conf is used.')
15 |     parser.add_argument('input_dmrs', help='Specify input DMRS file')
16 |     parser.add_argument('output_dmrs', help='Specify output dmrs file.')
17 |     args = parser.parse_args()
18 |     if args.config is not None:  # Load the given file
19 |         config = load_config(args.config, default=False)
20 |     else:
21 |         config = load_config(DEFAULT_CONFIG_FILE)
22 | else:
23 |     config = load_config(DEFAULT_CONFIG_FILE)
24 | 
25 | DEFAULT_FILTER = frozenset(GPred.from_string(x) for x in get_config_option(config, 'General Predicate Filtering', 'filter', opt_type=list))
26 | DEFAULT_ALLOW_DISC = get_config_option(config, 'General Predicate Filtering', 'allow_disconnected_dmrs') 
27 | 
28 | def gpred_filtering(dmrs, gpred_filter=DEFAULT_FILTER, allow_disconnected_dmrs=DEFAULT_ALLOW_DISC):
29 |     """
30 |     Remove general predicate nodes on the filter list from the DMRS.
31 |     :param dmrs_xml: Input DMRS object
32 |     :param gpred_filter: A list of general predicates to filter (as strings)
33 |     :param allow_disconnected_dmrs: Remove gpred nodes even if their removal would result in a disconnected DMRS.
34 |      If DMRS was already disconnected, gpred nodes are removed regardless.
35 |     :return: Output DMRS object
36 |     """
37 | 
38 |     filterable_nodeids = set()
39 | 
40 |     # Find general predicate nodes to filter
41 |     for node in dmrs.iter_nodes():
42 |         if node.is_gpred_node and node.pred in gpred_filter:
43 |             filterable_nodeids.add(node.nodeid)
44 | 
45 |     test_connectedness = not allow_disconnected_dmrs and dmrs.is_connected(ignored_nodeids=filterable_nodeids)
46 | 
47 |     # If DMRS should remain connected, check that removing filterable nodes will not result in a disconnected DMRS
48 |     if test_connectedness:
49 |         filtered_nodeids = set()
50 |         for nodeid in filterable_nodeids:
51 |             if dmrs.is_connected(removed_nodeids=filtered_nodeids|{nodeid}, ignored_nodeids=filterable_nodeids):
52 |                 filtered_nodeids.add(nodeid)
53 | 
54 |     else:
55 |         filtered_nodeids = filterable_nodeids
56 | 
57 |     # Remove filtered nodes and their links from the DMRS
58 |     for nodeid in filtered_nodeids:
59 |         dmrs.remove_node(nodeid)
60 | 
61 |     return dmrs
62 | 
63 | 
64 | # If run from the command line, process the given file
65 | if __name__ == '__main__':
66 | 
67 |     with open(args.input_dmrs, 'r', encoding="utf-8") as fin, open(args.output_dmrs, 'w') as fout:
68 |         content = fin.read().strip()
69 | 
70 |         for dmrs_string in split_dmrs_string(content):
71 |             dmrs = loads_xml(dmrs_string)
72 |             simplified_dmrs = gpred_filtering(dmrs)
73 |             simplified_dmrs_string = dumps_xml(simplified_dmrs)
74 |             fout.write('{}\n\n'.format(simplified_dmrs_string.decode('utf-8')))
75 | 


--------------------------------------------------------------------------------
/pydmrs/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from configparser import ConfigParser, NoSectionError, NoOptionError
 3 | import pydmrs
 4 | 
 5 | CONFIG_DIR = os.path.normpath(os.path.join(pydmrs.__file__, '../__config__'))
 6 | 
 7 | def get_config_option(config, section, option, opt_type=None, default=None):
 8 |     """
 9 |     Safe read of config option that returns default value if the section or option are not present.
10 |     :param config: ConfigParser object with existing configuration
11 |     :param section: Section name string
12 |     :param option: Option name string
13 |     :param opt_type: Option python type. String by default.
14 |     :param default: Default value to return if section/option do not exist. None by default.
15 |     :return: Option value
16 |     """
17 | 
18 |     try:
19 |         if opt_type is None:
20 |             return config.get(section, option)
21 |         elif opt_type == int:
22 |             return config.getint(section, option)
23 |         elif opt_type == float:
24 |             return config.getfloat(section, option)
25 |         elif opt_type == bool:
26 |             return config.getboolean(section, option)
27 |         elif opt_type == list:
28 |             return parse_config(config.get(section, option))
29 | 
30 |     except (NoSectionError, NoOptionError):
31 |         return default
32 | 
33 | 
34 | def parse_config(config_string):
35 |     """
36 |     Parse the config string to a list of strings.
37 |      Lines starting with '#' are ignored.
38 |      Strings are split on commas
39 |     :param config_string: String as read from the config file
40 |     :return: List of general predicate strings to filter
41 |     """
42 | 
43 |     strings = []
44 | 
45 |     for line in config_string.split('\n'):
46 |         line = line.strip()
47 | 
48 |         if line == '' or line.startswith('#'):
49 |             continue
50 | 
51 |         string_group = [x.strip() for x in line.split(',') if x.strip() != '']
52 | 
53 |         strings.extend(string_group)
54 | 
55 |     return strings
56 | 
57 | def load_config(filename, default=True):
58 |     """
59 |     Load a default config file
60 |     :param filename: name of the file (in the config directory)
61 |     :param default: if True, append filename to default config directory
62 |     """
63 |     config = ConfigParser()
64 |     if default:
65 |         filename = os.path.join(CONFIG_DIR, filename) 
66 |     config.read(filename)
67 |     return config
68 | 
69 | 
70 | def split_dmrs_string(content):
71 |     """
72 |     Split a string of DMRS read from a file into indvidual DMRS strings.
73 |     :param content: File content
74 |     :return: List of DMRS XML strings
75 |     """
76 | 
77 |     content_split = content.split('<dmrs')
78 |     content_filter = filter(lambda x: x.strip() != '', content_split)
79 |     content_fixed = [('<dmrs' + x).strip() for x in content_filter]
80 |     return content_fixed


--------------------------------------------------------------------------------
/pydmrs/visualization/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML>
 2 | <html>
 3 | <head>
 4 | 	<meta charset="utf-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 7 | 		
 8 | 	<title>DMRS Visualizer</title>
 9 | 	
10 | 	<link href="static/dmrs.css"/ rel="stylesheet" type="text/css">
11 | 	<link href="static/bootstrap.min.css" rel="stylesheet" type="text/css">
12 | 	
13 | 	<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
14 |     <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
15 |     <!--[if lt IE 9]>
16 |       <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
17 |       <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
18 |     <![endif]-->
19 | </head>
20 | <body>
21 | 	<div class="container-fluid" style="height: 100%;">
22 | 		
23 | 		<div class="row" style="height: 50%; padding: 20px">
24 | 			<div id="visualizations" class="col-md-12"></div>
25 | 			<div id="tooltip" class="dmrs-tooltip"></div>
26 | 		</div>
27 | 		
28 | 		<div class="row">
29 | 			<div class="col-md-6 col-md-offset-3">
30 | 				<div id = "alert_placeholder"></div>
31 | 				<form id="vizform" method="post" onsubmit="visualizeSentence(document.getElementById('dmrsinput').value); return false;">
32 | 					<div class="form-group">
33 | 						<textarea id="dmrsinput" name="dmrs" form="vizform" class="form-control" rows="15" placeholder="Enter DMRS XML ..."></textarea>
34 | 					</div>
35 | 			
36 | 					<div class="text-center">
37 | 						<button type="submit" class="btn btn-primary">Visualize</button>
38 | 						<button type="button" class="btn btn-primary" onclick="resetVisualizer();">Reset</button>
39 | 					</div>
40 | 				</form>
41 | 			</div>
42 | 		</div>
43 | 	</div>
44 | 	<script src="static/d3.min.js"></script>
45 | 	<script src="static/visualization.js"></script>
46 | 	<script src="static/dmrs.js"></script>
47 | 	<script src="static/jquery-1.12.3.min.js"></script>
48 | 	<script src="static/bootstrap.min.js"></script>	
49 | </body>
50 | </html>


--------------------------------------------------------------------------------
/pydmrs/visualization/static/d3.min.js-LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2010-2015, Michael Bostock
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * The name Michael Bostock may not be used to endorse or promote products
15 |   derived from this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT,
21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/pydmrs/visualization/static/dmrs.css:
--------------------------------------------------------------------------------
  1 | /* The MIT License (MIT)
  2 | 
  3 | Demophin and d3.arcdiagram.js both use the same terms of the MIT license.
  4 |  
  5 | Demophin: Copyright (c) 2014 Michael Wayne Goodman
  6 |   (see https://github.com/goodmami/demophin)
  7 |  
  8 | d3.arcdiagram.js: Copyright (c) 2015 Michael Wayne Goodman
  9 |   (see https://github.com/goodmami/d3-arcdiagram)
 10 | 
 11 | Permission is hereby granted, free of charge, to any person obtaining a copy
 12 | of this software and associated documentation files (the "Software"), to deal
 13 | in the Software without restriction, including without limitation the rights
 14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 15 | copies of the Software, and to permit persons to whom the Software is
 16 | furnished to do so, subject to the following conditions:
 17 | 
 18 | The above copyright notice and this permission notice shall be included in all
 19 | copies or substantial portions of the Software.
 20 | 
 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 27 | SOFTWARE.*/
 28 | 
 29 | html {
 30 |   height: 100%;
 31 | }
 32 | 
 33 | body {
 34 |   background-color: #DDD;
 35 |   height: 100%;
 36 | }
 37 | 
 38 | 
 39 | .grammarname {
 40 |   font-weight: bold;
 41 | }
 42 | 
 43 | /*#sentenceinput {
 44 |   font-size: 12pt;
 45 | }
 46 | 
 47 | #parseSubmit {
 48 |   height: 12pt;
 49 | }*/
 50 | 
 51 | .sentence {
 52 |   text-align: center;
 53 |   font-size: larger;
 54 |   font-weight: bold;
 55 | }
 56 | 
 57 | .status {
 58 |   /*position: fixed;
 59 |   bottom: 100%;
 60 |   left: 0px; */
 61 |   width: 100%;
 62 |   height: 20px;
 63 |   font-size: small;
 64 | }
 65 | 
 66 | .error {
 67 |   font-weight: bold;
 68 |   font-size: large;
 69 | }
 70 | 
 71 | .result {
 72 |   display: block;
 73 |   margin: 10px;
 74 | }
 75 | 
 76 | .dmrs {
 77 |   display: inline-block;
 78 |   background-color: #FFF;
 79 |   border-radius: 10px;
 80 |   overflow: auto;
 81 | }
 82 | 
 83 | .node {
 84 |   pointer-events: all;
 85 |   cursor: pointer;
 86 |   fill: #000;
 87 |   stroke: none;
 88 | }
 89 | 
 90 | .nodeText {
 91 |   stroke: none;
 92 |   font-family: sans-serif;
 93 |   font-size: 16px;
 94 |   text-anchor: middle;
 95 | }
 96 | 
 97 | .nodeText:hover {
 98 |   font-weight: bold;
 99 | }
100 | 
101 | .node.selected {
102 |   font-weight: bold;
103 | }
104 | 
105 | .nodeBox {
106 |   stroke-width: 2px;
107 |   fill: none;
108 | }
109 | 
110 | .link {
111 |   stroke: #000;
112 |   fill: #000;
113 |   stroke-opacity: .5;
114 |   fill-opacity: .5;
115 | }
116 | 
117 | .linkedge {
118 |   fill: none;
119 |   stroke-width: 2px;
120 |   stroke-linejoin: round;
121 |   marker-end: url(#arrowhead);
122 | }
123 | 
124 | .eqedge {
125 |   fill: none;
126 |   stroke-width: 2px;
127 |   stroke-linejoin: round;
128 |   stroke-dasharray: 5,5;
129 | }
130 | 
131 | .topedge {
132 |   fill: #f00;
133 |   stroke-width: 2px;
134 |   stroke-linejoin: round;
135 |   stroke-dasharray: 5,5;
136 |   marker-end: url(#arrowhead);
137 | }
138 | 
139 | .linkend {
140 |   stroke-opacity: .5;
141 |   fill-opacity: .5;
142 | }
143 | 
144 | .rargname {
145 |   fill: #000;
146 |   stroke: none;
147 |   stroke-width: 1px;
148 |   font-family: sans-serif;
149 |   font-size: 10px;
150 |   text-anchor: middle;
151 | }
152 | 
153 | .node.in { fill: red; }
154 | .node.out { fill: blue; }
155 | .node.labelset { stroke: gold; }
156 | .node.scope { stroke: violet; }
157 | .node.out.labelset { fill: green; stroke: gold;}
158 | .node.in.labelset { fill: darkorange; stroke: gold;}
159 | 
160 | .link.in { fill: red; stroke: red; }
161 | .link.out { fill: blue; stroke: blue; }
162 | .link.labelset { fill: gold; stroke: gold; }
163 | .link.scope { fill: violet; stroke: violet; }
164 | .link.in.labelset { fill: darkorange; stroke: darkorange; }
165 | .link.out.labelset { fill: green; stroke: green; }
166 | 
167 | /* thanks: http://bl.ocks.org/d3noob/a22c42db65eb00d4e369 */
168 | .dmrs-tooltip {
169 |   position: absolute;
170 |   text-align: center;
171 |   padding: 2px;
172 |   font: 12px sans-serif;
173 |   color: #FFF;
174 |   background: #444;
175 |   border: 0px;
176 |   border-radius: 8px;
177 |   opacity: 0;
178 |   pointer-events: none;
179 | }
180 | 
181 | .dmrs-tooltip td {
182 | 	padding: 2px
183 | }
184 | 
185 | #visualizations {
186 | 	position: relative;
187 | 	padding: 100px;
188 | 	height: 100%;
189 | }
190 | 
191 | #visualizations svg {
192 | 	position: absolute;
193 |     top: -9999px;
194 |     bottom: -9999px;
195 |     left: -9999px;
196 |     right: -9999px;
197 |     margin: auto;
198 | }


--------------------------------------------------------------------------------
/pydmrs/visualization/static/dmrs.js:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | // 
  3 | // Demophin and d3.arcdiagram.js both use the same terms of the MIT license.
  4 | // 
  5 | // Demophin: Copyright (c) 2014 Michael Wayne Goodman
  6 | //   (see https://github.com/goodmami/demophin)
  7 | // 
  8 | // d3.arcdiagram.js: Copyright (c) 2015 Michael Wayne Goodman
  9 | //   (see https://github.com/goodmami/d3-arcdiagram)
 10 | // 
 11 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 12 | // of this software and associated documentation files (the "Software"), to deal
 13 | // in the Software without restriction, including without limitation the rights
 14 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 15 | // copies of the Software, and to permit persons to whom the Software is
 16 | // furnished to do so, subject to the following conditions:
 17 | // 
 18 | // The above copyright notice and this permission notice shall be included in all
 19 | // copies or substantial portions of the Software.
 20 | // 
 21 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 22 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 24 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 25 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 26 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 27 | // SOFTWARE.
 28 | 
 29 | 
 30 | var maxWidth = 600,
 31 |     height = 300;
 32 | 
 33 | var level_dy = 25,  // vertical separation between edges
 34 |     edge_radius = 15, // rounded corner radius,
 35 |     edge_xoffset = 10, // outgoing edges aren't centered
 36 |     node_dx = 20;  // horizontal separation between nodes
 37 | 
 38 | var color = d3.scale.category20();
 39 | 
 40 | 
 41 | function dmrsDisplay(svgElem, graph) {
 42 | //  d3.json(url, function(error, graph) {
 43 |       // calculate source and target for links
 44 |       prepareGraph(graph);
 45 | 
 46 |       var tip = d3.select("#tooltip")
 47 |           .style("opacity", 0);
 48 | 
 49 |       var id = svgElem;
 50 |       var svg = d3.select(svgElem)
 51 |         .attr("height", ((graph.maxTopLevel - graph.maxBottomLevel + 3) * level_dy));
 52 |       var g = svg.append("svg:g")
 53 |           .attr("transform", "translate(0," + ((graph.maxTopLevel + 2) * level_dy) + ")");
 54 | 
 55 |       g.append("defs").append("marker")
 56 |           .attr("class", "linkend")
 57 |           .attr("id", "arrowhead")
 58 |           .attr("refX", 1) /*must be smarter way to calculate shift*/
 59 |           .attr("refY", 2)
 60 |           .attr("markerWidth", 5)
 61 |           .attr("markerHeight", 4)
 62 |           .attr("orient", "auto")
 63 |           .append("path")
 64 |               .attr("d", "M0,0 L1,2 L0,4 L5,2 Z"); //this is actual shape for arrowhead
 65 | 
 66 |       var x_pos = 10;
 67 |       var nodes = g.selectAll(".node").order()
 68 |           .data(graph.nodes)
 69 |         .enter().append("svg:g")
 70 |           .attr("class", "node")
 71 |           .each(function(d) {
 72 |             var vps = [];
 73 |             for (var key in d.varprops) {
 74 |               vps.push("<td>" + key + "</td><td>=</td><td>" + d.varprops[key] + "</td>");
 75 |             }
 76 |             d.tooltipText = "<table><tr>" + vps.join("</tr><tr>") + "</tr></table>";
 77 |           });
 78 |       nodes.append("svg:text")
 79 |           .attr("class", "nodeText")
 80 |           .text(function(d) {
 81 |             if (d.carg) {
 82 |               return d.pred + "(" + d.carg + ")";
 83 |             } else {
 84 |               return d.pred;
 85 |             }
 86 |           })
 87 |           .attr("x", function(d, i) {
 88 |               d.bbox = this.getBBox();
 89 |               halfLen = d.bbox.width / 2;
 90 |               x = x_pos + halfLen;
 91 |               x_pos = x + halfLen + node_dx;
 92 |               d.x = x;
 93 |               return x;
 94 |           })
 95 |           .attr("y", function(d) { return 0; })
 96 |           .attr("dy", function(d) { return d.bbox.height/5; });
 97 |       nodes.insert("svg:rect", "text")
 98 |           .attr("class", "nodeBox")
 99 |           .attr("x", function(d) { return d.x - (d.bbox.width / 2) - 2; })
100 |           .attr("y", function(d) { return - (d.bbox.height / 2) - 2; })
101 |           .attr("width", function(d) { return d.bbox.width + 4; })
102 |           .attr("height", function(d) { return d.bbox.height + 4; })
103 |           .attr("rx", 4)
104 |           .attr("ry", 4);
105 |       nodes.on("mouseover", function(d) {
106 |               if (!graph.sticky) { d3.select(this).classed("selected", true) };
107 |               updateHighlights(id);
108 |               tip.html(d.tooltipText)
109 |                 .style("opacity", 0.8);
110 |           })
111 |           .on("mousemove", function(d) {
112 |               tip.style("left", (d3.event.pageX - 10) + "px")
113 |                 .style("top", (d3.event.pageY + 15) + "px");
114 |           })
115 |           .on("mouseout", function(d) {
116 |               if (!d.sticky) { d3.select(this).classed("selected", false); }
117 |               updateHighlights(id);
118 |               tip.style("opacity", 0);
119 |           })
120 |           .on("click", function(d) {
121 |               stickyState = toggleSticky(id, this, d);
122 |               graph.sticky = stickyState;
123 |               updateHighlights(id);
124 |           });
125 | 
126 |       // not working...
127 |       svg.attr("width", d3.sum(nodes.data(), function(d) { return d.bbox.width + node_dx; }));
128 | 
129 |       var links = g.selectAll(".link").order()
130 |           .data(graph.links)
131 |         .enter().append("svg:g")
132 |           .attr("class", "link");
133 |       links.append("svg:path")
134 |           .attr("class", function(d) {
135 |               if (d.start == 0) {
136 |                   return "topedge";
137 |               } else if (d.rargname == "" && d.post == "EQ") {
138 |                   return "eqedge";
139 |               } else {
140 |                   return "linkedge";
141 |               }
142 |           })
143 |           .attr("d", function(d) {
144 |               return getPathSpec(d, graph);
145 |           })
146 |           .attr("transform", function(d) {
147 |               return "scale(1," + (d.dir * -1) + ")";
148 |           })
149 |           .style("marker-end", function(d) {
150 |               return (d.rargname == "" && d.post == "EQ") ? "none" : "url(#arrowhead)";
151 |           });
152 |       links.append("svg:text")
153 |           .attr("class", "rargname")
154 |           .attr("x", function(d) { return d.midpoint.x; })
155 |           .attr("y", function(d) { return d.midpoint.y * (-1 * d.dir) - 3; })
156 |           .text(function(d) { return d.rargname + "/" + d.post; } );
157 | //  });
158 | }
159 | 
160 | 
161 | function prepareGraph(graph) {
162 |     var nodeIdx = {}, levelIdx = {};
163 |     graph.nodes.forEach(function(d, i) {
164 |         nodeIdx[d.id] = i;
165 |         levelIdx[[i,i+1].join()] = {}; // eg levelIdx["1,2"] = {}
166 |     });
167 |     graph.links.forEach(function(d) {
168 |         d.target = nodeIdx[d.end];
169 |         // start of 0 is TOP link
170 |         if (d.start == 0) {
171 |             d.dir = 1;  // always on top
172 |             return;
173 |         }
174 |         // the rest only apply to non-TOP links
175 |         d.source = nodeIdx[d.start];
176 |         d.distance = Math.abs(d.source - d.target);
177 |         // Quantifiers and undirected EQ links below preds
178 |         d.dir = (d.rargname == "" || d.post.toUpperCase() == "H") ? -1 : 1
179 |     });
180 |     graph.maxTopLevel = 0;
181 |     graph.maxBottomLevel = 0;
182 |     for (dist=0; dist<graph.nodes.length; dist++) {
183 |         graph.links.forEach(function(d) {
184 |             if (d.start == 0) return;
185 |             if (dist != d.distance) return;
186 |             d.level = nextAvailableLevel(d.source, d.target, d.dir, levelIdx);
187 |             if (d.dir == 1 && graph.maxTopLevel < d.level) {
188 |                 graph.maxTopLevel = d.level;
189 |             } else if (d.dir == -1 && graph.maxBottomLevel > d.level) {
190 |                 graph.maxBottomLevel = d.level;
191 |             }
192 |         });
193 |     }
194 |     graph.sticky = false;
195 | }
196 | 
197 | 
198 | function nextAvailableLevel(source, target, dir, lvlIdx) {
199 |     var level, curLevel, success;
200 |     if (source > target)
201 |         return nextAvailableLevel(target, source, dir, lvlIdx);
202 |     level = 0;
203 |     curLevel = dir;
204 |     while (level == 0) {
205 |         success = true;
206 |         for (var i = source; i < target; i++) {
207 |             if (curLevel in lvlIdx[[i, i+1].join()]) {
208 |                 success = false;
209 |                 break;
210 |             }
211 |         }
212 |         if (success) {
213 |             level = curLevel;
214 |             for (var i = source; i < target; i++) {
215 |                 lvlIdx[[i, i+1].join()][level] = true;
216 |             }
217 |         } else {
218 |             curLevel += dir;
219 |         }
220 |     }
221 |     return level;
222 | }
223 | 
224 | 
225 | function getPathSpec(link, graph) {
226 |     var x1, x2, y1, y2;
227 |     // get these first, they apply for all links
228 |     x2 = graph.nodes[link.target].x;
229 |     y1 = graph.nodes[link.target].bbox.height;
230 |     if (link.start == 0) {
231 |         y2 = y1 + (((link.dir == 1 ? graph.maxTopLevel : graph.maxBottomLevel) + 1) * level_dy);
232 |         link.midpoint = {"x": x2,
233 |                          "y": (y1 + y2) / 2};
234 |         return ["M", x2, y2, "L", x2, y1].join(' ');
235 |     }
236 |     // the following is only for non-TOP links
237 |     x1 = graph.nodes[link.source].x;
238 |     y2 = y1 + (Math.abs(link.level) * level_dy - 5);
239 |     // side-effect! calculate this while we know it
240 |     link.midpoint = {"x": (x1 + x2) / 2,
241 |                      "y": y2};
242 |     if (x1 < x2) {
243 |         x1 += edge_xoffset;
244 |         return ["M", x1, y1 - 5,
245 |                 "L", x1, (y2 - edge_radius),
246 |                 "Q", x1, y2, (x1 + edge_radius), y2,
247 |                 "L", (x2 - edge_radius), y2,
248 |                 "Q", x2, y2, x2, y2 - edge_radius,
249 |                 "L", x2, y1].join(' ');
250 |     } else {
251 |         x1 -= edge_xoffset;
252 |         return ["M", x1, y1 - 5,
253 |                 "L", x1, (y2 - edge_radius),
254 |                 "Q", x1, y2, (x1 - edge_radius), y2,
255 |                 "L", (x2 + edge_radius), y2,
256 |                 "Q", x2, y2, x2, y2 - edge_radius,
257 |                 "L", x2, y1].join(' ');
258 |     }
259 | }
260 | 
261 | 
262 | function updateHighlights(id) {
263 |     clearHighlights(id);
264 |     d3.select(id).selectAll(".node.selected").each(function(d){
265 |         var labelset = d3.set(),
266 |             outs = d3.set(),
267 |             ins = d3.set(),
268 |             scopes = d3.set();
269 |         d3.select(id).selectAll(".link")
270 |             .classed({
271 |                 "out": function(_d) {
272 |                     if (_d.rargname && d.id == _d.start) {
273 |                         outs.add(_d.end);
274 |                         return true;
275 |                     }
276 |                     return false;
277 |                 },
278 |                 "in": function(_d) {
279 |                     if (_d.rargname && d.id == _d.end) {
280 |                         ins.add(_d.start);
281 |                         return true;
282 |                     }
283 |                     return false;
284 |                 },
285 |                 "labelset": function(_d) {
286 |                     if (_d.post == "EQ" && (_d.start == d.id || _d.end == d.id)) {
287 |                         labelset.add(_d.start);
288 |                         labelset.add(_d.end);
289 |                         return true;
290 |                     }
291 |                     return false
292 |                 },
293 |                 "scope": function(_d) {
294 |                     if (_d.start == d.id && (_d.post == "H" || _d.post == "HEQ")) {
295 |                         scopes.add(_d.end);
296 |                         return true;
297 |                     } else if (_d.end == d.id && (_d.post == "H" || _d.post == "HEQ")) {
298 |                         return true;
299 |                     }
300 |                     return false;
301 |                 }
302 |             });
303 |         var labelAdded = true;
304 |         while (labelAdded) {
305 |             labelAdded = false;
306 |             d3.select(id).selectAll(".link").each(function(_d) {
307 |                 if (_d.post == "EQ") {
308 |                     if (labelset.has(_d.start) && !labelset.has(_d.end)) {
309 |                         labelset.add(_d.end);
310 |                         labelAdded = true;
311 |                     } else if (labelset.has(_d.end) && !labelset.has(_d.start)) {
312 |                         labelset.add(_d.start);
313 |                         labelAdded = true;
314 |                     }
315 |                 }
316 |             });
317 |         }
318 |         d3.select(id).selectAll(".node")
319 |             .classed({
320 |                 "out": function(_d) { return outs.has(_d.id); },
321 |                 "in": function(_d) { return ins.has(_d.id); },
322 |                 "labelset": function(_d) { return labelset.has(_d.id); },
323 |                 "scope": function(_d) { return scopes.has(_d.id); }
324 |             });
325 | 
326 |     });
327 | }
328 | 
329 | 
330 | function clearHighlights(id) {
331 |     d3.select(id).selectAll(".node").classed(
332 |         {"in": false, "out": false, "labelset": false, "scope": false}
333 |     );
334 |     d3.select(id).selectAll(".link").classed(
335 |         {"in": false, "out": false, "labelset": false, "scope": false}
336 |     );
337 | }
338 | 
339 | 
340 | function toggleSticky(id, node, d) {
341 |     if (d.sticky) {
342 |         d.sticky = false;
343 |         d3.select(node).classed("selected", false);
344 |     } else {
345 |         d3.select(id).selectAll(".node.selected").each(function(_d) {
346 |             _d.sticky = false;
347 |             d3.select(this).classed("selected", false);
348 |         });
349 |         d.sticky = true;
350 |         d3.select(node).classed("selected", true);
351 |     }
352 |     return d.sticky;
353 | }
354 | 
355 | 
356 | function clearVizArtifacts() {
357 |   d3.select("#visualizations").html("");
358 | }
359 | 


--------------------------------------------------------------------------------
/pydmrs/visualization/static/visualization.js:
--------------------------------------------------------------------------------
  1 | function visualizeSentence(xml_text) {
  2 | 	clearVizArtifacts();
  3 | 	clearAlertMessage();
  4 | 	
  5 | 	displayAlert('Visualizing...', 'alert-info');
  6 | 	
  7 | 	if (xml_text) {
  8 | 		d3_graph = parseXmlDMRS(xml_text)
  9 | 		
 10 | 		if (d3_graph) {
 11 | 			showGraphs(d3_graph);
 12 | 			clearAlertMessage();
 13 | 		}
 14 | 	} else {
 15 | 		displayAlert('<b>Error:</b> No XML provided.', 'alert-danger');
 16 | 	}
 17 | }
 18 | 
 19 | 
 20 | function clearAlertMessage() {
 21 | 	$("#alert_placeholder").html("");
 22 | }
 23 | 
 24 | 
 25 | function resetVisualizer() {
 26 | 	clearVizArtifacts();
 27 | 	clearAlertMessage();
 28 | 	$("#dmrsinput").val("");
 29 | }
 30 | 
 31 | 
 32 | function parseXmlDMRS(xml_text) {	
 33 | 	// Remove line breaks 
 34 | 	xml_text = xml_text.replace(/(\r\n|\n|\r)/gm,"");
 35 | 	
 36 | 	// Parse XML
 37 | 	try {
 38 | 		xmlDoc = $.parseXML(xml_text);
 39 | 	}
 40 | 	catch(err) {
 41 | 		displayAlert('<b>Error parsing XML.</b> ', 'alert-danger');
 42 | 		return null
 43 | 	}
 44 | 	
 45 | 	xml = $(xmlDoc);
 46 | 	
 47 | 	// Parse nodes into objects
 48 | 	nodes = xml.find('node');
 49 | 	d3_nodes = [];
 50 | 	
 51 | 	node_map = {};
 52 | 	
 53 | 	for (i = 0; i < nodes.length; i++) {
 54 | 		node = nodes[i];
 55 | 		
 56 | 		node_map[node.getAttribute('nodeid')] = i;
 57 | 		
 58 | 		sortinfo = node.getElementsByTagName('sortinfo')[0];
 59 | 		
 60 | 		d3_node = {
 61 | 			id: node.getAttribute('nodeid'),
 62 | 			pred: parseNodePred(node),
 63 | 			cfrom: node.getAttribute('cfrom'),
 64 | 			cto: node.getAttribute('cto'),
 65 | 			cvarsort: sortinfo.getAttribute('cvarsort'),
 66 | 			carg: node.getAttribute('carg'),
 67 | 			varprops: parseNodeProperties(node)
 68 | 		};
 69 | 	
 70 | 		d3_nodes.push(d3_node);
 71 | 	}
 72 | 	
 73 | 	// Parse links into objects
 74 | 	links = xml.find('link');
 75 | 	d3_links = [];
 76 | 	for (i = 0; i < links.length; i++) {
 77 | 		link = links[i];
 78 | 		rargname = link.getElementsByTagName('rargname')[0];
 79 | 		post = link.getElementsByTagName('post')[0];
 80 | 		
 81 | 		d3_link = {
 82 | 			source: node_map[link.getAttribute('from')],
 83 | 			target: node_map[link.getAttribute('to')],
 84 | 			start: link.getAttribute('from'),
 85 | 			end: link.getAttribute('to'),
 86 | 			rargname: rargname.textContent,
 87 | 			post: post.textContent
 88 | 		};
 89 | 		
 90 | 		d3_links.push(d3_link);
 91 | 	}
 92 | 	
 93 | 	d3_graph = {
 94 | 		nodes: d3_nodes,
 95 | 		links: d3_links
 96 | 	};
 97 | 	
 98 | 	return d3_graph;
 99 | }
100 | 
101 | 
102 | function parseNodePred(node) {
103 | 	realpred = node.getElementsByTagName('realpred');
104 | 		
105 | 	if (realpred.length > 0) {
106 | 		pred_elements = [
107 | 			'',
108 | 			realpred[0].getAttribute('lemma'),
109 | 			realpred[0].getAttribute('pos'),
110 | 			realpred[0].getAttribute('sense')
111 | 		];
112 | 		pred = pred_elements.filter(function(val) { return val !== null;}).join('_')
113 | 	}
114 | 	else {
115 | 		gpred = node.getElementsByTagName('gpred')[0].textContent;
116 | 		pred = gpred.replace('_rel', '')
117 | 	}
118 | 
119 | 	return pred;
120 | }
121 | 
122 | 
123 | function parseNodeProperties(node) {
124 | 	
125 | 	sortinfo = node.getElementsByTagName('sortinfo')[0];
126 | 	
127 | 	property_names = ['ind', 'pers', 'num', 'gend', 'sf', 'mood', 'tense', 'prog', 'perf', 'pt'];
128 | 	properties = {};
129 | 	
130 | 	for (j = 0; j < property_names.length; j++) {
131 | 		property_name = property_names[j];
132 | 		if (sortinfo.getAttribute(property_name) != null) {
133 | 			properties[property_name.toUpperCase()] = sortinfo.getAttribute(property_name);
134 | 		};
135 | 	}
136 | 	
137 | 	return properties;
138 | }
139 | 
140 | 
141 | function displayAlert(errorMessage, type) {
142 | 	bootstrap_alert = function() {}
143 | 	bootstrap_alert.warning = function(message) {
144 |             $('#alert_placeholder').html('<div class="alert '+type+'"><span>'+message+'</span></div>')
145 |         }
146 |     
147 | 	bootstrap_alert.warning(errorMessage);
148 | }
149 | 
150 | 
151 | 
152 | function showGraphs(graph) {
153 | 
154 | 	var svg = d3.select("#visualizations")
155 | 		.selectAll('.result')
156 | 		.data([graph])
157 | 		.enter()
158 | 		.append("svg")
159 | 		.attr("id", "dmrs")
160 | 		.attr("width", "100%")
161 | 		.attr("height", "100%")
162 | 		.attr("cursor", "grab")
163 | 		.call(d3.behavior.zoom().on("zoom", function () {
164 |         svg.attr("transform", "translate(" + d3.event.translate + ")" + " scale(" + d3.event.scale + ")")
165 | 		}))
166 | 		.append("g")
167 | 		
168 | 	svg.attr("id", function(d, i) { return "dmrs" + i; })
169 | 		.each(function(d, i) { dmrsDisplay(this, d); });
170 | }


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup, find_packages
 3 | 
 4 | VERSION = '1.0.5'
 5 | 
 6 | setup(
 7 |   name = 'pydmrs',
 8 |   version = VERSION,
 9 |   description = 'A library for manipulating DMRS graphs',
10 |   author = 'Ann Copestake, Guy Emerson, Michael Wayne Goodman, Matic Horvat, Alex Kuhnle, Ewa Muszyńska',
11 |   author_email = 'gete2@cam.ac.uk',
12 |   license = 'MIT',
13 |   url = 'https://github.com/delph-in/pydmrs',
14 |   download_url = 'https://github.com/delph-in/pydmrs/tarball/'+VERSION,
15 |   keywords = ['NLP', 'Natural Language Processing', 'Computational Linguistics', 'Semantics'],
16 |   packages = find_packages(),
17 |   package_data = {'pydmrs': ['__config__/*.conf']},
18 |   install_requires = [
19 |     'pydelphin >= 1.0.1'
20 |   ]
21 | )
22 | 


--------------------------------------------------------------------------------
/tests/matching/test_aligned_matching.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | from examples import examples_dmrs
  4 | from pydmrs.core import span_pred_key, abstractSortDictDmrs, ListDmrs, Node, RealPred, \
  5 |     InstanceSortinfo, Link
  6 | from pydmrs.matching import aligned_matching
  7 | 
  8 | 
  9 | class TestAlignedMatching(unittest.TestCase):
 10 |     def setUp(self):
 11 |         self.the_cat = examples_dmrs.the_cat().convert_to(
 12 |             abstractSortDictDmrs(node_key=span_pred_key))
 13 |         # Checks if the matching code converts to SortDictDmrs with span_pred_key
 14 |         self.the_cat_chases_the_dog = examples_dmrs.the_cat_chases_the_dog().convert_to(
 15 |             abstractSortDictDmrs(node_key=span_pred_key))
 16 |         self.the_dog_chases_the_cat = examples_dmrs.the_dog_chases_the_cat().convert_to(
 17 |             abstractSortDictDmrs(node_key=span_pred_key))
 18 |         self.the_mouse = examples_dmrs.the_mouse() \
 19 |             .convert_to(abstractSortDictDmrs(node_key=span_pred_key))
 20 |         self.dog_cat = examples_dmrs.dog_cat() \
 21 |             .convert_to(abstractSortDictDmrs(node_key=span_pred_key))
 22 |         # All other DMRS used here should udnergo conversion as well.
 23 | 
 24 |     def test_match_nodes(self):
 25 |         nodes1 = self.the_dog_chases_the_cat.nodes
 26 |         nodes2 = self.the_cat_chases_the_dog.nodes
 27 |         matches = aligned_matching.match_nodes(nodes1, nodes2)
 28 |         self.assertEqual(len(matches), 1)
 29 |         self.assertListEqual(matches[0], [(4, 4), (3, 3), (1, 1)])
 30 | 
 31 |         # Return [] if either of the nodes list empty.
 32 |         self.assertListEqual(aligned_matching.match_nodes([], nodes1), [])
 33 |         self.assertListEqual(aligned_matching.match_nodes(nodes1, []), [])
 34 | 
 35 |         nodes3 = self.the_cat.nodes
 36 |         matches = aligned_matching.match_nodes(nodes3, nodes1)
 37 |         self.assertEqual(len(matches), 2)
 38 |         self.assertListEqual(matches[0], [(2, 5), (1, 1)])
 39 |         self.assertListEqual(matches[1], [(2, 5), (1, 4)])
 40 | 
 41 |     def test_find_extra_surface_nodeids(self):
 42 |         nodeids = [1, 5]
 43 |         extras = aligned_matching.find_extra_surface_nodeids(nodeids, self.the_dog_chases_the_cat)
 44 |         self.assertListEqual(extras, [2, 3, 4])
 45 | 
 46 |         # No extras.
 47 |         nodeids1 = [1, 2]
 48 |         extras1 = aligned_matching.find_extra_surface_nodeids(nodeids1, self.the_cat)
 49 |         self.assertListEqual(extras1, [])
 50 | 
 51 |     def test_get_matching_nodeids(self):
 52 |         # Match "the cat" onto "the dog chases the cat" (exact fit, only one match)
 53 |         matches1 = aligned_matching.get_matching_nodeids(self.the_cat, self.the_dog_chases_the_cat)
 54 |         self.assertEqual(len(matches1), 1)
 55 |         self.assertCountEqual(matches1[0], [(2, 5), (1, 4)])
 56 | 
 57 |         # all_surface = True
 58 |         all_matches1 = aligned_matching.get_matching_nodeids(self.the_cat,
 59 |                                                              self.the_dog_chases_the_cat,
 60 |                                                              all_surface=True)
 61 |         # The same as earlier
 62 |         self.assertListEqual(matches1[0], all_matches1[0])
 63 |         # Extra surface nodes: between dog and cat
 64 | 
 65 |         all_matches1 = aligned_matching.get_matching_nodeids(self.dog_cat,
 66 |                                                              self.the_dog_chases_the_cat,
 67 |                                                              all_surface=True)
 68 |         self.assertCountEqual(all_matches1[0], [(2, 5), (1, 2), (None, 3), (None, 4)])
 69 | 
 70 |         # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit)
 71 |         matches2 = aligned_matching.get_matching_nodeids(self.the_dog_chases_the_cat,
 72 |                                                          self.the_cat_chases_the_dog)
 73 |         # Two options: "the dog" matches or "the cat" matches, 'chases' doesn't because it's not part of the longest match
 74 |         self.assertEqual(len(matches2), 2)
 75 |         self.assertCountEqual(matches2, [[(5, 2), (4, 1)], [(2, 5), (1, 4)]])
 76 | 
 77 |         # No match found
 78 |         matches = aligned_matching.get_matching_nodeids(self.the_mouse, self.dog_cat)
 79 |         self.assertListEqual(matches, [])
 80 | 
 81 |         # Should be the same as 'the cat'.
 82 |         mixed_cat = ListDmrs(surface='the cat')
 83 |         mixed_cat.add_node(Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7,
 84 |                                 sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
 85 |         mixed_cat.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3))
 86 |         mixed_cat.add_link(Link(start=1, end=2, rargname='RSTR', post='H'))
 87 |         mixed = aligned_matching.get_matching_nodeids(mixed_cat, self.the_dog_chases_the_cat)
 88 |         self.assertListEqual(mixed, matches1)
 89 | 
 90 |     def test_get_score(self):
 91 |         matches = aligned_matching.get_matching_nodeids(self.the_cat, self.the_dog_chases_the_cat)
 92 |         subgraph1 = aligned_matching.get_matched_subgraph(matches[0], self.the_dog_chases_the_cat)
 93 |         score1 = aligned_matching.get_score(self.the_cat, subgraph1, matches[0])
 94 |         self.assertEqual(score1, (3, 3, 3))  # 'the', 'cat' and the link
 95 | 
 96 |         # all_surface = True
 97 |         all_surface_matches = aligned_matching.get_matching_nodeids(self.dog_cat,
 98 |                                                                     self.the_dog_chases_the_cat,
 99 |                                                                     all_surface=True)
100 |         subgraph1a = aligned_matching.get_matched_subgraph(all_surface_matches[0],
101 |                                                            self.the_dog_chases_the_cat)
102 |         score1a = aligned_matching.get_score(self.the_cat, subgraph1a, all_surface_matches[0])
103 |         self.assertEqual(score1a, (2, 7, 3))
104 | 


--------------------------------------------------------------------------------
/tests/matching/test_general_matching.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | import pydmrs.matching.match_evaluation
  4 | from examples import examples_dmrs
  5 | from pydmrs._exceptions import PydmrsTypeError
  6 | from pydmrs.components import InstanceSortinfo, RealPred
  7 | from pydmrs.core import Link, DictDmrs, Node
  8 | from pydmrs.matching import general_matching
  9 | 
 10 | 
 11 | class TestMatch(unittest.TestCase):
 12 |     def setUp(self):
 13 |         self.match = general_matching.Match([(2, 3), (4, 2)], [(Link(4, 5, 'RSTR', 'H'),
 14 |                                                                 Link(1, 2, 'RSTR', 'H'))])
 15 | 
 16 |     def test_Match_init(self):
 17 |         self.assertEqual(general_matching.Match().nodeid_pairs, [])
 18 |         self.assertEqual(general_matching.Match().link_pairs, [])
 19 |         self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2)])
 20 |         self.assertCountEqual(self.match.link_pairs, [(Link(4, 5, 'RSTR', 'H'),
 21 |                                                        Link(1, 2, 'RSTR', 'H'))])
 22 | 
 23 |     def test_Match_len(self):
 24 |         self.assertEqual(len(self.match), 3)
 25 |         self.assertEqual(len(general_matching.Match()), 0)
 26 | 
 27 |     def test_Match_add(self):
 28 |         self.assertIsNone(self.match.add(general_matching.Match()))
 29 |         self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2)])
 30 |         self.assertCountEqual(self.match.link_pairs, [(Link(4, 5, 'RSTR', 'H'),
 31 |                                                        Link(1, 2, 'RSTR', 'H'))])
 32 | 
 33 |         incompatible_match = general_matching.Match([(1, 2), (8, 1)], [(Link(1, 8, 'RSTR', 'H'),
 34 |                                                                         Link(1, 2, 'RSTR', 'H'))])
 35 |         self.match.add(incompatible_match)
 36 |         self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2), (8, 1)])
 37 |         self.assertCountEqual(self.match.link_pairs, [(Link(4, 5, 'RSTR', 'H'),
 38 |                                                        Link(1, 2, 'RSTR', 'H'))])
 39 | 
 40 |         compatible_match = general_matching.Match([(1, 5), (3, 4)], [(Link(1, 3, 'ARG1', 'NEQ'),
 41 |                                                                       Link(1, 5, 'ARG2', 'NEQ'))])
 42 |         self.match.add(compatible_match)
 43 |         self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2), (1, 5), (8, 1), (3, 4)])
 44 |         self.assertCountEqual(self.match.link_pairs, [(Link(4, 5, 'RSTR', 'H'),
 45 |                                                        Link(1, 2, 'RSTR', 'H')),
 46 |                                                       (Link(1, 3, 'ARG1', 'NEQ'),
 47 |                                                        Link(1, 5, 'ARG2', 'NEQ'))])
 48 | 
 49 | 
 50 | class TestGeneralMatching(unittest.TestCase):
 51 |     def setUp(self):
 52 |         self.large_dmrs = examples_dmrs.the_dog_chases_the_cat_and_the_cat_chases_the_mouse()
 53 |         self.small_dmrs = examples_dmrs.the_dog_chases_the_cat()
 54 |         self.cat_dmrs = examples_dmrs.the_cat()
 55 |         self.reverse_dmrs = examples_dmrs.the_cat_chases_the_dog()
 56 | 
 57 |     def test_find_best_matches(self):
 58 |         # Match "the cat" onto "the dog chases the cat" (exact fit)
 59 |         matches = general_matching.find_best_matches(self.cat_dmrs, self.small_dmrs)
 60 | 
 61 |         self.assertEqual(len(matches), 1)
 62 |         self.assertCountEqual(matches[0].nodeid_pairs, [(2, 5), (1, 4)])
 63 |         self.assertCountEqual(matches[0].link_pairs,
 64 |                               [(Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))])
 65 | 
 66 |         # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit)
 67 |         matches = general_matching.find_best_matches(self.small_dmrs, self.reverse_dmrs)
 68 |         self.assertEqual(len(matches), 1)
 69 |         self.assertCountEqual(matches[0].nodeid_pairs, [(5, 2), (4, 1), (3, 3), (2, 5), (1, 4)])
 70 |         self.assertCountEqual(matches[0].link_pairs,
 71 |                               [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H')),
 72 |                                (Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))])
 73 | 
 74 |         # No match found
 75 |         matches = general_matching.find_best_matches(examples_dmrs.the_mouse(), self.reverse_dmrs)
 76 |         self.assertIsNone(matches)
 77 | 
 78 |         # More than one match found.
 79 |         matches = general_matching.find_best_matches(self.cat_dmrs, self.large_dmrs)
 80 |         self.assertEqual(len(matches), 2)
 81 |         self.assertCountEqual(matches[0].nodeid_pairs, [(2, 5), (1, 4)])
 82 |         self.assertCountEqual(matches[0].link_pairs,
 83 |                               [(Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))])
 84 |         self.assertCountEqual(matches[1].nodeid_pairs, [(2, 8), (1, 7)])
 85 |         self.assertCountEqual(matches[1].link_pairs,
 86 |                               [(Link(1, 2, 'RSTR', 'H'), Link(7, 8, 'RSTR', 'H'))])
 87 | 
 88 |     def test_get_matched_subgraph(self):
 89 |         match = general_matching.find_best_matches(self.cat_dmrs, self.small_dmrs)[0]
 90 |         subgraph = general_matching.get_matched_subgraph(self.small_dmrs, match)
 91 |         expected = DictDmrs(nodes=[Node(nodeid=4, pred=RealPred('the', 'q')),
 92 |                                    Node(nodeid=5, pred=RealPred('cat', 'n', '1'),
 93 |                                         sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))],
 94 |                             links=[Link(start=4, end=5, rargname='RSTR', post='H')])
 95 |         self.assertListEqual(subgraph.nodes, expected.nodes)
 96 |         self.assertListEqual(subgraph.links, expected.links)
 97 | 
 98 |     def test_get_recall_fscore(self):
 99 |         exact_matches = general_matching.find_best_matches(self.cat_dmrs, self.small_dmrs)
100 |         inexact_matches = general_matching.find_best_matches(self.small_dmrs, self.reverse_dmrs)
101 |         # Exact
102 |         self.assertEqual(pydmrs.matching.match_evaluation.get_recall(exact_matches[0], self.cat_dmrs), 1)
103 |         self.assertEqual(pydmrs.matching.match_evaluation.get_fscore(exact_matches[0], self.cat_dmrs), 1)
104 |         # Inexact
105 |         self.assertAlmostEqual(pydmrs.matching.match_evaluation.get_recall(inexact_matches[0], self.small_dmrs),
106 |                                7 / 9)
107 |         self.assertAlmostEqual(pydmrs.matching.match_evaluation.get_fscore(inexact_matches[0], self.small_dmrs),
108 |                                0.875)
109 | 
110 |         # List of matches instead of Match.
111 |         with self.assertRaises(PydmrsTypeError):
112 |             pydmrs.matching.match_evaluation.get_recall(exact_matches, self.cat_dmrs)
113 |         with self.assertRaises(PydmrsTypeError):
114 |             pydmrs.matching.match_evaluation.get_fscore(exact_matches, self.cat_dmrs)
115 | 
116 |     def test_get_missing_elements(self):
117 |         match = general_matching.find_best_matches(examples_dmrs.the_dog_chases_the_mouse(),
118 |                                                    self.small_dmrs)[0]
119 |         missing = pydmrs.matching.match_evaluation.get_missing_elements(match,
120 |                                                                         examples_dmrs.the_dog_chases_the_mouse())
121 |         self.assertCountEqual(missing, [4, 5, Link(3, 5, 'ARG2', 'NEQ'), Link(4, 5, 'RSTR', 'H')])
122 | 


--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import warnings
  3 | 
  4 | from pydmrs._exceptions import PydmrsTypeError, PydmrsValueError
  5 | from pydmrs.components import Pred, GPred, Sortinfo, EventSortinfo, InstanceSortinfo
  6 | from pydmrs.core import (
  7 |     Link, LinkLabel,
  8 |     Node, span_pred_key, abstractSortDictDmrs)
  9 | from examples import examples_dmrs
 10 | 
 11 | 
 12 | class TestLink(unittest.TestCase):
 13 |     """
 14 |     Test methods of Link and LinkLabel classes
 15 |     """
 16 | 
 17 |     def test_Link_new(self):
 18 |         """
 19 |         Links should have exactly four slots (start, end, rargname, post).
 20 |         The constructor should take either positional or keyword arguments.
 21 |         The slots should be accessible by attribute names.
 22 |         """
 23 |         # Check four arguments
 24 |         self.assert_ex_link(Link(0, 1, 'RSTR', 'H'))
 25 |         self.assert_ex_link(Link(start=0, end=1, rargname='RSTR', post='H'))
 26 | 
 27 |         # Check None values
 28 |         self.assertIsNone(Link(0, 1, '', 'H').rargname)
 29 |         self.assertIsNone(Link(0, 1, 'RSTR', 'NONE').post)
 30 |         self.assertIsNone(Link(0, 1, 'NULL', 'H').rargname)
 31 |         self.assertIsNone(Link(0, 1, 'RSTR', 'NIL').post)
 32 | 
 33 |         # Check wrong numbers of arguments
 34 |         with self.assertRaises(TypeError):
 35 |             Link(0, 1, 2)
 36 |         with self.assertRaises(TypeError):
 37 |             Link(0, 1, 2, 3, 4)
 38 | 
 39 |         # Check equal start and end
 40 |         with self.assertRaises(Warning):
 41 |             warnings.simplefilter('error')
 42 |             Link(0, 0, 1, 2)
 43 |         warnings.resetwarnings()
 44 | 
 45 |     # Helper function for test_Link_new
 46 |     def assert_ex_link(self, link):
 47 |         self.assertEqual(link.start, 0)
 48 |         self.assertEqual(link.end, 1)
 49 |         self.assertEqual(link.rargname, 'RSTR')
 50 |         self.assertEqual(link.post, 'H')
 51 | 
 52 |     def test_Link_str(self):
 53 |         """
 54 |         The 'informal' string representation of a Link
 55 |         should show a labelled arrow pointing from the start to the end
 56 |         """
 57 |         link = Link(0, 1, 'RSTR', 'H')
 58 |         self.assertEqual(str(link), "(0 - RSTR/H -> 1)")
 59 | 
 60 |     def test_Link_repr(self):
 61 |         """
 62 |         The 'official' string representation of a Link
 63 |         should evaluate to an equivalent Link
 64 |         """
 65 |         link = Link(0, 1, 'RSTR', 'H')
 66 |         self.assertEqual(link, eval(repr(link)))
 67 | 
 68 |     def test_Link_label(self):
 69 |         """
 70 |         The label of a link should be a LinkLabel
 71 |         """
 72 |         link = Link(0, 1, 'RSTR', 'H')
 73 |         label = LinkLabel('RSTR', 'H')
 74 |         self.assertIsInstance(link.label, LinkLabel)
 75 |         self.assertEqual(link.label, label)
 76 | 
 77 |     def test_Link_labelstring(self):
 78 |         """
 79 |         The labelstring of a link should be its label's string 
 80 |         """
 81 |         link = Link(0, 1, 'RSTR', 'H')
 82 |         labelstring = 'RSTR/H'
 83 |         self.assertEqual(link.labelstring, labelstring)
 84 | 
 85 |     def test_Link_copy(self):
 86 |         """
 87 |         copy.copy should return an equal Link
 88 |         copy.deepcopy should also return an equal Link
 89 |         """
 90 |         from copy import copy, deepcopy
 91 |         link = Link(0, 1, 'RSTR', 'H')
 92 |         link_copy = copy(link)
 93 |         link_deep = deepcopy(link)
 94 |         self.assertEqual(link, link_copy)
 95 |         self.assertEqual(link, link_deep)
 96 |         self.assertIsNot(link, link_copy)
 97 |         self.assertIsNot(link, link_deep)
 98 |         # Note that it doesn't make sense to check
 99 |         # if link.end is not link_deep.end,
100 |         # because identical strings and ints are considered to be the same
101 | 
102 |     def test_LinkLabel_new(self):
103 |         """
104 |         LinkLabels should have exactly two slots (rargname, post).
105 |         The constructor should take either positional or keyword arguments.
106 |         The slots should be accessible by attribute names.
107 |         """
108 |         # Check two arguments
109 |         self.assert_rstr_h(LinkLabel('RSTR', 'H'))
110 |         self.assert_rstr_h(LinkLabel(rargname='RSTR', post='H'))
111 | 
112 |         # Check wrong numbers of arguments
113 |         with self.assertRaises(TypeError):
114 |             LinkLabel(0, 1, 2)
115 |         with self.assertRaises(TypeError):
116 |             LinkLabel(0, 1, 2, 3, 4)
117 | 
118 |     # Helper function for test_LinkLabel_new
119 |     def assert_rstr_h(self, linklabel):
120 |         self.assertEqual(linklabel.rargname, 'RSTR')
121 |         self.assertEqual(linklabel.post, 'H')
122 | 
123 |     def test_LinkLabel_str(self):
124 |         """
125 |         The 'informal' string representation of a LinkLabel
126 |         should have a slash between the rargname and post
127 |         """
128 |         label = LinkLabel('RSTR', 'H')
129 |         self.assertEqual(str(label), "RSTR/H")
130 | 
131 |     def test_LinkLabel_repr(self):
132 |         """
133 |         The 'official' string representation of a LinkLabel
134 |         should evaluate to an equivalent LinkLabel
135 |         """
136 |         label = LinkLabel('RSTR', 'H')
137 |         self.assertEqual(label, eval(repr(label)))
138 | 
139 |     def test_LinkLabel_copy(self):
140 |         """
141 |         copy.copy should return an equal LinkLabel
142 |         copy.deepcopy should also return an equal LinkLabel
143 |         """
144 |         from copy import copy, deepcopy
145 |         label = LinkLabel('RSTR', 'H')
146 |         label_copy = copy(label)
147 |         label_deep = deepcopy(label)
148 |         self.assertEqual(label, label_copy)
149 |         self.assertEqual(label, label_deep)
150 |         self.assertIsNot(label, label_copy)
151 |         self.assertIsNot(label, label_deep)
152 |         # Note that it doesn't make sense to check
153 |         # if label.post is not label_deep.post,
154 |         # because identical strings are considered to be the same
155 | 
156 | 
157 | class TestNode(unittest.TestCase):
158 |     """
159 |     Test methods for Node class.
160 |     """
161 | 
162 |     def test_Node_init(self):
163 |         node = Node(nodeid=13, pred='the_q', surface='cat', base='x', cfrom=23, cto=27,
164 |                     carg='Kim', )
165 |         self.assertEqual(node.nodeid, 13)
166 |         self.assertEqual(node.surface, 'cat')
167 |         self.assertEqual(node.base, 'x')
168 | 
169 |         self.assertEqual(node.cfrom, 23)
170 |         self.assertEqual(node.cto, 27)
171 |         # Incorrect span
172 |         with self.assertRaises(PydmrsValueError):
173 |             Node(cfrom=22, cto=7)
174 | 
175 |         self.assertEqual(node.carg, 'Kim')
176 |         # Fix carg with  "".
177 |         self.assertEqual(Node(carg='"Kim"').carg, 'Kim')
178 |         # Unaccounted " in carg
179 |         with self.assertRaises(PydmrsValueError):
180 |             Node(carg='"Kim')
181 | 
182 |         # String pred.
183 |         self.assertEqual(node.pred, GPred('the_q'))
184 |         # Other pred
185 |         self.assertEqual(Node(pred=GPred('the_q')).pred, GPred('the_q'))
186 | 
187 |         # Allow None for sortinfo.
188 |         self.assertEqual(Node().sortinfo, None)
189 |         # Dict sortinfo
190 |         self.assertEqual(Node(sortinfo={'cvarsort': 'i', 'pers': '3'}).sortinfo,
191 |                          InstanceSortinfo(pers='3'))
192 |         # Sortinfo sortinfo
193 |         self.assertEqual(Node(sortinfo=InstanceSortinfo(pers='3')).sortinfo,
194 |                          InstanceSortinfo(pers='3'))
195 |         # List sortinfo
196 |         self.assertEqual(Node(sortinfo=[('cvarsort', 'i'), ('pers', '3')]).sortinfo,
197 |                          InstanceSortinfo(pers='3'))
198 |         # But nothing else.
199 |         with self.assertRaises(PydmrsTypeError):
200 |             Node(sortinfo="x[pers=3, num=sg, ind=+]")
201 | 
202 |     def test_Node_str(self):
203 |         node = Node()
204 |         self.assertEqual(str(node), "None")
205 |         node = Node(nodeid=2, pred='_dog_n_1',
206 |                     sortinfo=dict(cvarsort='i', pers='3', num='sg', ind='+'), carg='Pat')
207 |         self.assertEqual(str(node), '_dog_n_1(Pat) x[pers=3, num=sg, ind=+]')
208 | 
209 |     def test_Node_eq(self):
210 |         # Unspecified nodes are always equal.
211 |         node1 = Node()
212 |         node2 = Node()
213 |         self.assertEqual(node1, node2)
214 | 
215 |         sortinfo1 = {'cvarsort': 'e', 'tense': 'past'}
216 |         sortinfo2 = {'cvarsort': 'e', 'tense': 'pres'}
217 | 
218 |         # Two nodes are equal if they have the same pred, sortinfo and carg,
219 |         # even if all the other elements are different
220 |         node1 = Node(nodeid=23, pred='the_q', sortinfo=sortinfo1, cfrom=2, cto=22, carg='Kim',
221 |                      surface='cat', base='x')
222 |         node2 = Node(nodeid=25, pred='the_q', sortinfo=sortinfo1, cfrom=15, carg='Kim',
223 |                      surface='mad', base='w')
224 |         self.assertEqual(node1, node2)
225 | 
226 |         # Different carg
227 |         node2 = Node(pred='the_q', sortinfo=sortinfo1, carg='Jane')
228 |         self.assertNotEqual(node1, node2)
229 | 
230 |         # Different pred
231 |         node2 = Node(pred='_smile_v', sortinfo=sortinfo1, carg='Kim')
232 |         self.assertNotEqual(node1, node2)
233 | 
234 |         # Different sortinfo.
235 |         node2 = Node(pred='_the_q', sortinfo=sortinfo2, carg='Kim')
236 |         self.assertNotEqual(node1, node2)
237 | 
238 |     def test_Node_underspecification(self):
239 |         with self.assertRaises(TypeError):
240 |             Node(pred='_the_q').is_more_specific(4)
241 |         # complete underspecification
242 |         self.assertFalse(Node().is_more_specific(Node()))
243 |         self.assertFalse(Node().is_less_specific(Node()))
244 |         # pred underspecification
245 |         self.assertFalse(Node(pred=Pred()).is_more_specific(Node()))
246 |         self.assertTrue(Node(pred=Pred()).is_less_specific(Node()))
247 |         self.assertTrue(Node().is_more_specific(Node(pred=Pred())))
248 |         self.assertFalse(Node().is_less_specific(Node(pred=Pred())))
249 |         self.assertFalse(Node(pred=Pred()).is_more_specific(Node(pred=Pred())))
250 |         self.assertFalse(Node(pred=Pred()).is_less_specific(Node(pred=Pred())))
251 |         self.assertFalse(Node(pred=Pred()).is_more_specific(Node(pred=GPred(name='abc'))))
252 |         self.assertTrue(Node(pred=Pred()).is_less_specific(Node(pred=GPred(name='abc'))))
253 |         self.assertTrue(Node(pred=GPred(name='abc')).is_more_specific(Node(pred=Pred())))
254 |         self.assertFalse(Node(pred=GPred(name='abc')).is_less_specific(Node(pred=Pred())))
255 |         # carg underspecification
256 |         self.assertFalse(Node(carg='?').is_more_specific(Node()))
257 |         self.assertTrue(Node(carg='?').is_less_specific(Node()))
258 |         self.assertTrue(Node().is_more_specific(Node(carg='?')))
259 |         self.assertFalse(Node().is_less_specific(Node(carg='?')))
260 |         self.assertFalse(Node(carg='?').is_more_specific(Node(carg='?')))
261 |         self.assertFalse(Node(carg='?').is_less_specific(Node(carg='?')))
262 |         self.assertFalse(Node(carg='?').is_more_specific(Node(carg='abc')))
263 |         self.assertTrue(Node(carg='?').is_less_specific(Node(carg='abc')))
264 |         self.assertTrue(Node(carg='abc').is_more_specific(Node(carg='?')))
265 |         self.assertFalse(Node(carg='abc').is_less_specific(Node(carg='?')))
266 |         # sortinfo underspecification
267 |         self.assertFalse(Node(sortinfo=Sortinfo()).is_more_specific(Node()))
268 |         self.assertTrue(Node(sortinfo=Sortinfo()).is_less_specific(Node()))
269 |         self.assertTrue(Node().is_more_specific(Node(sortinfo=Sortinfo())))
270 |         self.assertFalse(Node().is_less_specific(Node(sortinfo=Sortinfo())))
271 |         self.assertFalse(Node(sortinfo=Sortinfo()).is_more_specific(Node(sortinfo=Sortinfo())))
272 |         self.assertFalse(Node(sortinfo=Sortinfo()).is_less_specific(Node(sortinfo=Sortinfo())))
273 |         self.assertFalse(
274 |             Node(sortinfo=Sortinfo()).is_more_specific(Node(sortinfo=EventSortinfo(sf='abc'))))
275 |         self.assertTrue(
276 |             Node(sortinfo=Sortinfo()).is_less_specific(Node(sortinfo=EventSortinfo(sf='abc'))))
277 |         self.assertTrue(
278 |             Node(sortinfo=EventSortinfo(sf='abc')).is_more_specific(Node(sortinfo=Sortinfo())))
279 |         self.assertFalse(
280 |             Node(sortinfo=EventSortinfo(sf='abc')).is_less_specific(Node(sortinfo=Sortinfo())))
281 |         # mixed specification
282 |         self.assertFalse(Node(pred=Pred()).is_more_specific(Node(carg='?')))
283 |         self.assertFalse(Node(pred=Pred()).is_less_specific(Node(carg='?')))
284 |         self.assertFalse(Node(pred=Pred()).is_more_specific(Node(sortinfo=Sortinfo())))
285 |         self.assertFalse(Node(pred=Pred()).is_less_specific(Node(sortinfo=Sortinfo())))
286 |         self.assertFalse(Node(carg='?').is_more_specific(Node(sortinfo=Sortinfo())))
287 |         self.assertFalse(Node(carg='?').is_less_specific(Node(sortinfo=Sortinfo())))
288 | 
289 |     def test_Node_span(self):
290 |         node = Node(cfrom=2, cto=15)
291 |         self.assertEqual(node.span, (2, 15))
292 | 
293 |     def test_Node_isgpred_realpred_node(self):
294 |         gnode = Node(pred='the_q')
295 |         realnode = Node(pred='_cat_n')
296 |         self.assertTrue(gnode.is_gpred_node)
297 |         self.assertTrue(realnode.is_realpred_node)
298 |         self.assertFalse(gnode.is_realpred_node)
299 |         self.assertFalse(realnode.is_gpred_node)
300 | 
301 | 
302 | class TestDmrs(unittest.TestCase):
303 |     def setUp(self):
304 |         self.test_dmrs = examples_dmrs.the_dog_chases_the_cat()
305 | 
306 |     def test_contains(self):
307 |         self.assertTrue(4 in self.test_dmrs)
308 |         self.assertFalse(16 in self.test_dmrs)
309 | 
310 |     def test_iter_outgoing(self):
311 |         with self.assertRaises(PydmrsValueError):
312 |             self.test_dmrs.iter_outgoing(15)
313 | 
314 |         self.test_dmrs.add_link(Link(3, 4, 'None', 'EQ'))
315 |         out_it = self.test_dmrs.iter_outgoing(3)
316 |         # Check that an iterator returned
317 |         self.assertTrue(hasattr(out_it, '__next__'))
318 |         # EQ link counted as outgoing
319 |         self.assertCountEqual(list(out_it), [Link(3, 5, 'ARG2', 'NEQ'), Link(3, 2, 'ARG1', 'NEQ'),
320 |                                              Link(3, 4, None, 'EQ')])
321 |         # TODO: Treat EQ links symmetrically or not at all, as long as it's consistent.
322 |         # Test e.g.
323 |         # self.test_dmrs.add_link(Link(4, 3, 'None', 'EQ'))
324 |         # out_it = self.test_dmrs.iter_outgoing(3)
325 |         # self.assertIn(Link(4, 3, 'None', 'EQ'), list(out_it))
326 | 
327 |         # No outgoing links
328 |         out_it = self.test_dmrs.iter_outgoing(2)
329 |         with self.assertRaises(StopIteration):
330 |             next(out_it)
331 | 
332 |     def test_iter_incoming(self):
333 |         with self.assertRaises(PydmrsValueError):
334 |             self.test_dmrs.iter_incoming(15)
335 | 
336 |         self.test_dmrs.add_link(Link(4, 2, 'None', 'EQ'))
337 |         in_it = self.test_dmrs.iter_incoming(2)
338 |         # Check that an iterator returned
339 |         self.assertTrue(hasattr(in_it, '__next__'))
340 |         # EQ link counted as incoming
341 |         self.assertCountEqual(list(in_it), [Link(1, 2, 'RSTR', 'H'), Link(3, 2, 'ARG1', 'NEQ'),
342 |                                             Link(4, 2, None, 'EQ')])
343 | 
344 |         # TODO: Treat EQ links somehow.
345 |         # Test e.g.
346 |         # self.test_dmrs.add_link(Link(2, 4, 'None', 'EQ'))
347 |         # in_it = self.test_dmrs.iter_incoming(2)
348 |         # self.assertIn(Link(2, 4, 'None', 'EQ'), list(in_it))
349 | 
350 |         # No incoming links
351 |         in_it = self.test_dmrs.iter_incoming(3)
352 |         with self.assertRaises(StopIteration):
353 |             next(in_it)
354 | 


--------------------------------------------------------------------------------