├── .gitignore ├── LICENSE ├── README.md ├── examples ├── examples_dmrs.py ├── examples_exact_matching.py ├── examples_mapping.py ├── examples_query.py └── examples_toy_robot.py ├── pydmrs ├── __config__ │ ├── default_interface.conf │ └── default_simplification.conf ├── __init__.py ├── _exceptions.py ├── components.py ├── core.py ├── graphlang │ ├── __init__.py │ └── graphlang.py ├── mapping │ ├── __init__.py │ ├── mapping.py │ └── paraphrase.py ├── matching │ ├── __init__.py │ ├── aligned_matching.py │ ├── common.py │ ├── exact_matching.py │ ├── general_matching.py │ ├── match_evaluation.py │ └── query.py ├── pydelphin_interface.py ├── rooted.py ├── serial.py ├── simplification │ ├── __init__.py │ └── gpred_filtering.py ├── utils.py └── visualization │ ├── index.html │ └── static │ ├── bootstrap.min.css │ ├── bootstrap.min.js │ ├── d3.min.js │ ├── d3.min.js-LICENSE │ ├── dmrs.css │ ├── dmrs.js │ ├── jquery-1.12.3.min.js │ └── visualization.js ├── setup.cfg ├── setup.py └── tests ├── matching ├── test_aligned_matching.py └── test_general_matching.py ├── test_components.py └── test_core.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # Eclipse 60 | .project 61 | .pydevproject 62 | 63 | # PyCharm 64 | .idea -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 DELPH-IN 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pydmrs 2 | 3 | A library for manipulating DMRS structures. 4 | 5 | ### References 6 | 7 | - [Copestake (2009)](http://www.aclweb.org/anthology/E/E09/E09-1001.pdf) 8 | - [Copestake et al. (2016)](http://www.lrec-conf.org/proceedings/lrec2016/pdf/634_Paper.pdf) 9 | -------------------------------------------------------------------------------- /examples/examples_dmrs.py: -------------------------------------------------------------------------------- 1 | from pydmrs.components import Pred, GPred, RealPred, Sortinfo, EventSortinfo, InstanceSortinfo 2 | from pydmrs.core import Node, Link, DictDmrs 3 | 4 | 5 | def the(): 6 | dmrs = DictDmrs() 7 | dmrs.add_node(Node(pred=RealPred('the', 'q'))) # node id set automatically 8 | return dmrs 9 | 10 | 11 | def the_cat(): 12 | dmrs = DictDmrs(surface='the cat') 13 | dmrs.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3)) 14 | dmrs.add_node(Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, 15 | sortinfo=InstanceSortinfo(pers='3', num='sg', 16 | ind='+'))) # underspecified sortinfo 17 | dmrs.add_link(Link(start=1, end=2, rargname='RSTR', post='H')) 18 | return dmrs 19 | 20 | 21 | def the_mouse(): 22 | dmrs = DictDmrs(surface='the mouse') 23 | dmrs.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3)) 24 | dmrs.add_node(Node(nodeid=2, pred=RealPred('mouse', 'n', '1'), cfrom=4, cto=9, 25 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) 26 | dmrs.add_link(Link(start=1, end=2, rargname='RSTR', post='H')) 27 | return dmrs 28 | 29 | 30 | def dog_cat(): 31 | dmrs = DictDmrs(surface='dog cat') 32 | dmrs.add_node(Node(pred=RealPred('dog', 'n', '1'), cfrom=0, cto=3, 33 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) 34 | dmrs.add_node(Node(pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, 35 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) 36 | return dmrs 37 | 38 | 39 | def the_dog_chases_the_cat(): 40 | return DictDmrs( 41 | surface='the dog chases the cat', 42 | nodes=[Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3), 43 | Node(nodeid=2, pred=RealPred('dog', 'n', '1'), cfrom=4, cto=7, 44 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), 45 | Node(nodeid=3, pred=RealPred('chase', 'v', '1'), cfrom=8, cto=14, 46 | sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')), 47 | Node(nodeid=4, pred=RealPred('the', 'q'), cfrom=15, cto=18), 48 | Node(nodeid=5, pred=RealPred('cat', 'n', '1'), cfrom=19, cto=22, 49 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))], 50 | links=[Link(start=1, end=2, rargname='RSTR', post='H'), 51 | Link(start=3, end=2, rargname='ARG1', post='NEQ'), 52 | Link(start=3, end=5, rargname='ARG2', post='NEQ'), 53 | Link(start=4, end=5, rargname='RSTR', post='H')], 54 | index=3, 55 | top=3) 56 | 57 | 58 | def the_cat_chases_the_dog(): 59 | return DictDmrs( 60 | surface='the cat chases the dog', 61 | nodes=[Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3), 62 | Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, 63 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), 64 | Node(nodeid=3, pred=RealPred('chase', 'v', '1'), cfrom=8, cto=14, 65 | sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')), 66 | Node(nodeid=4, pred=RealPred('the', 'q'), cfrom=15, cto=18), 67 | Node(nodeid=5, pred=RealPred('dog', 'n', '1'), cfrom=19, cto=22, 68 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))], 69 | links=[Link(start=1, end=2, rargname='RSTR', post='H'), 70 | Link(start=3, end=2, rargname='ARG1', post='NEQ'), 71 | Link(start=3, end=5, rargname='ARG2', post='NEQ'), 72 | Link(start=4, end=5, rargname='RSTR', post='H')], 73 | index=3, 74 | top=3) 75 | 76 | 77 | def the_dog_chases_the_mouse(): 78 | return DictDmrs( 79 | nodes=[Node(nodeid=1, pred=RealPred('the', 'q')), 80 | Node(nodeid=2, pred=RealPred('dog', 'n', '1'), 81 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), 82 | Node(nodeid=3, pred=RealPred('chase', 'v', '1'), 83 | sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')), 84 | Node(nodeid=4, pred=RealPred('the', 'q')), 85 | Node(nodeid=5, pred=RealPred('mouse', 'n', '1'), 86 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))], 87 | links=[Link(start=1, end=2, rargname='RSTR', post='H'), 88 | Link(start=3, end=2, rargname='ARG1', post='NEQ'), 89 | Link(start=3, end=5, rargname='ARG2', post='NEQ'), 90 | Link(start=4, end=5, rargname='RSTR', post='H')], 91 | index=3, 92 | top=3) 93 | 94 | 95 | def the_dog_chases_the_cat_and_the_mouse(): 96 | return DictDmrs( 97 | nodes=[Node(nodeid=1, pred=RealPred('the', 'q')), 98 | Node(nodeid=2, pred=RealPred('dog', 'n', '1'), 99 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), 100 | Node(nodeid=3, pred=RealPred('chase', 'v', '1'), 101 | sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')), 102 | Node(nodeid=4, pred=RealPred('the', 'q')), 103 | Node(nodeid=5, pred=RealPred('cat', 'n', '1'), 104 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), 105 | Node(nodeid=6, pred=GPred('udef_q')), 106 | Node(nodeid=7, pred=RealPred('and', 'c'), 107 | sortinfo=InstanceSortinfo(pers='3', num='pl')), 108 | Node(nodeid=8, pred=RealPred('the', 'q')), 109 | Node(nodeid=9, pred=RealPred('mouse', 'n', '1'), 110 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))], 111 | links=[Link(start=1, end=2, rargname='RSTR', post='H'), 112 | Link(start=3, end=2, rargname='ARG1', post='NEQ'), 113 | Link(start=3, end=7, rargname='ARG2', post='NEQ'), 114 | Link(start=4, end=5, rargname='RSTR', post='H'), 115 | Link(start=6, end=7, rargname='RSTR', post='H'), 116 | Link(start=7, end=5, rargname='L-INDEX', post='NEQ'), 117 | Link(start=7, end=9, rargname='R-INDEX', post='NEQ'), 118 | Link(start=8, end=9, rargname='RSTR', post='H')], 119 | index=3, 120 | top=3) 121 | 122 | 123 | def the_dog_chases_the_cat_and_the_cat_chases_the_mouse(): 124 | return DictDmrs( 125 | nodes=[Node(nodeid=1, pred=RealPred('the', 'q')), 126 | Node(nodeid=2, pred=RealPred('dog', 'n', '1'), 127 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), 128 | Node(nodeid=3, pred=RealPred('chase', 'v', '1'), 129 | sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')), 130 | Node(nodeid=4, pred=RealPred('the', 'q')), 131 | Node(nodeid=5, pred=RealPred('cat', 'n', '1'), 132 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), 133 | Node(nodeid=6, pred=RealPred('and', 'c'), 134 | sortinfo=InstanceSortinfo(pers='3', num='pl')), 135 | Node(nodeid=7, pred=RealPred('the', 'q')), 136 | Node(nodeid=8, pred=RealPred('cat', 'n', '1'), 137 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), 138 | Node(nodeid=9, pred=RealPred('chase', 'v', '1'), 139 | sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')), 140 | Node(nodeid=10, pred=RealPred('the', 'q')), 141 | Node(nodeid=11, pred=RealPred('mouse', 'n', '1'), 142 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))], 143 | links=[Link(start=1, end=2, rargname='RSTR', post='H'), 144 | Link(start=3, end=2, rargname='ARG1', post='NEQ'), 145 | Link(start=3, end=5, rargname='ARG2', post='NEQ'), 146 | Link(start=4, end=5, rargname='RSTR', post='H'), 147 | Link(start=6, end=3, rargname='L-INDEX', post='NEQ'), 148 | Link(start=6, end=3, rargname='L-HNDL', post='H'), 149 | Link(start=6, end=9, rargname='R-INDEX', post='NEQ'), 150 | Link(start=6, end=9, rargname='R-HNDL', post='H'), 151 | Link(start=7, end=8, rargname='RSTR', post='H'), 152 | Link(start=9, end=8, rargname='ARG1', post='NEQ'), 153 | Link(start=9, end=11, rargname='ARG2', post='NEQ'), 154 | Link(start=10, end=11, rargname='RSTR', post='H')], 155 | index=6, 156 | top=6) 157 | 158 | 159 | def predsort(): 160 | dmrs = DictDmrs() 161 | dmrs.add_node(Node(pred=Pred(), sortinfo=Sortinfo())) # underspecified predicate and sortinfo 162 | return dmrs 163 | 164 | 165 | def noun(): 166 | dmrs = DictDmrs() 167 | dmrs.add_node( 168 | Node(pred=RealPred('?', 'n', 'unknown'), sortinfo=Sortinfo())) # underspecified noun and sortinfo 169 | return dmrs 170 | -------------------------------------------------------------------------------- /examples/examples_exact_matching.py: -------------------------------------------------------------------------------- 1 | from pydmrs.matching.exact_matching import dmrs_exact_matching 2 | import examples.examples_dmrs as examples 3 | 4 | 5 | if __name__ == '__main__': 6 | 7 | # "the" - "the dog chases the cat" 8 | assert len(list(dmrs_exact_matching(examples.the(), examples.the_dog_chases_the_cat()))) == 2 9 | 10 | # "the cat" - "the dog chases the cat" 11 | assert len(list(dmrs_exact_matching(examples.the_cat(), examples.the_dog_chases_the_cat()))) == 1 12 | 13 | # "dog cat" - "the dog chases the cat" 14 | assert len(list(dmrs_exact_matching(examples.dog_cat(), examples.the_dog_chases_the_cat()))) == 1 15 | 16 | # "the dog chases the cat" - "the dog chases the cat" 17 | assert len(list(dmrs_exact_matching(examples.the_dog_chases_the_cat(), examples.the_dog_chases_the_cat()))) == 1 18 | 19 | # "the cat chases the dog" - "the dog chases the cat" 20 | assert not len(list(dmrs_exact_matching(examples.the_cat_chases_the_dog(), examples.the_dog_chases_the_cat()))) 21 | 22 | # "the dog chases the cat" - "the dog chases the cat and the mouse" 23 | assert not len(list(dmrs_exact_matching(examples.the_dog_chases_the_cat(), examples.the_dog_chases_the_cat_and_the_mouse()))) 24 | 25 | # "the dog chases the cat" - "the dog chases the cat and the cat chases the mouse" 26 | assert len(list(dmrs_exact_matching(examples.the_dog_chases_the_cat(), examples.the_dog_chases_the_cat_and_the_cat_chases_the_mouse()))) == 1 27 | 28 | # "the cat" - "the dog chases the cat and the cat chases the mouse" 29 | assert len(list(dmrs_exact_matching(examples.the_cat(), examples.the_dog_chases_the_cat_and_the_cat_chases_the_mouse()))) == 2 30 | 31 | # "dog cat" - "the dog chases the cat and the cat chases the mouse" 32 | assert len(list(dmrs_exact_matching(examples.dog_cat(), examples.the_dog_chases_the_cat_and_the_cat_chases_the_mouse()))) == 2 33 | 34 | # predsort - "the dog chases the cat" 35 | assert len(list(dmrs_exact_matching(examples.predsort(), examples.the_dog_chases_the_cat()))) == 5 36 | 37 | # noun - "the dog chases the cat" 38 | assert len(list(dmrs_exact_matching(examples.noun(), examples.the_dog_chases_the_cat()))) == 2 39 | -------------------------------------------------------------------------------- /examples/examples_mapping.py: -------------------------------------------------------------------------------- 1 | from pydmrs.pydelphin_interface import parse, generate 2 | from pydmrs.mapping.mapping import dmrs_mapping 3 | from pydmrs.graphlang.graphlang import parse_graphlang 4 | import examples.examples_dmrs as examples 5 | 6 | 7 | if __name__ == '__main__': 8 | 9 | # basic functionality 10 | dmrs = examples.the_dog_chases_the_cat() 11 | search_dmrs = parse_graphlang('[1]:_the_q') 12 | replace_dmrs = parse_graphlang('[1]:_a_q') 13 | 14 | # iterative, all 15 | assert 'A dog chases a cat.' in generate(dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=True, iterative=True, all_matches=True)) 16 | # not iterative, all 17 | assert all(sent in sents for sent, sents in zip(['A dog chases the cat.', 'The dog chases a cat.'], [generate(dmrs) for dmrs in dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=True, iterative=False, all_matches=True)])) 18 | # iterative, not all 19 | assert 'A dog chases the cat.' in generate(dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=True, iterative=True, all_matches=False)) 20 | # not iterative, not all 21 | assert 'A dog chases the cat.' in generate(dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=True, iterative=False, all_matches=False)) 22 | # original dmrs did not change so far 23 | assert 'The dog chases the cat.' in generate(dmrs) 24 | # iterative, not all 25 | dmrs = examples.the_dog_chases_the_cat() 26 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False, iterative=True, all_matches=False) 27 | assert 'A dog chases the cat.' in generate(dmrs) 28 | # iterative, all 29 | dmrs = examples.the_dog_chases_the_cat() 30 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False, iterative=True, all_matches=True) 31 | assert 'A dog chases a cat.' in generate(dmrs) 32 | 33 | 34 | 35 | dmrs = parse('Kim eats and Kim sleeps.')[0] 36 | search_dmrs = parse_graphlang('[4]:node=1 <-1- [2]:node <-l- [1]:_and_c e? -r-> [3]:node -1-> node=1 <-- proper_q; :2 <-lh- :1 -rh-> :3') 37 | replace_dmrs = parse_graphlang('[4]:node <-1- [2]:node <-l- [1]:_and_c e? -r-> [3]:node -1-> :4; :2 <=lh= :1 =rh=> :3') 38 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 39 | assert 'Kim eats and sleeps.' in generate(dmrs) 40 | 41 | 42 | # some examples inspired by examples from the AMR specification 43 | 44 | dmrs = parse('He described the mission as a failure.')[0] 45 | search_dmrs = parse_graphlang('[2]:node <-2- *[1]:_describe_v_as e? -3-> [3]:node') 46 | replace_dmrs = parse_graphlang('pronoun_q --> pron x[3sn_s] <-2- [1]:_describe_v_to e? <-2h- *_as_x_subord e[pui--] -1h-> _be_v_id e[ppi--] -1-> [2]:node; :_be_v_id -2-> [3]:node') 47 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 48 | assert 'As he described it, the mission is a failure.' in generate(dmrs) 49 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 50 | assert 'He described the mission as a failure.' in generate(dmrs) 51 | 52 | dmrs = parse('The boy can go.')[0] 53 | search_dmrs = parse_graphlang('[1]:_can_v_modal e[p????] -1h-> [2]:_v e[pui--]') 54 | replace_dmrs = parse_graphlang('[1]:_possible_a_for e[o????] -1h-> [2]:_v e[ppi--]') 55 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 56 | assert 'It is possible that the boy goes.' in generate(dmrs) 57 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 58 | assert 'The boy can go.' in generate(dmrs) 59 | 60 | dmrs = parse('The boy can\'t go.')[0] 61 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 62 | assert 'It is not possible that the boy goes.' in generate(dmrs) 63 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 64 | assert 'The boy can\'t go.' in generate(dmrs) 65 | 66 | dmrs = parse('The boy must go.')[0] 67 | search_dmrs = parse_graphlang('[1]:_must_v_modal e? -1h-> [2]:_v e[pui--]') 68 | replace_dmrs = parse_graphlang('[1]:_necessary_a_for e? -1h-> [2]:_v e[ppi--]') 69 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 70 | assert 'It is necessary that the boy goes.' in generate(dmrs) 71 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 72 | assert 'The boy must go.' in generate(dmrs) 73 | 74 | dmrs = parse('The boy should go.')[0] 75 | search_dmrs = parse_graphlang('[1]:_should_v_modal e? -1h-> [2]:_v e[pui--]') 76 | replace_dmrs = parse_graphlang('[1]:_recommend_v_to e? -2h-> [2]:_v e[ppi--]') 77 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 78 | assert 'That the boy goes, is recommended.' in generate(dmrs) 79 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 80 | assert 'The boy should go.' in generate(dmrs) 81 | 82 | dmrs = parse('The boy is likely to go.')[0] 83 | search_dmrs = parse_graphlang('[1]:_likely_a_1 e? -1h-> [2]:_v e[oui--]') 84 | replace_dmrs = parse_graphlang('[1]:_likely_a_1 e? -1h-> [2]:_v e[ppi--]') 85 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 86 | assert 'It is likely that the boy goes.' in generate(dmrs) 87 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 88 | assert 'The boy is likely to go.' in generate(dmrs) 89 | 90 | dmrs = parse('The boy would rather go.')[0] 91 | search_dmrs = parse_graphlang('[1]:_would_v_modal e? -1h-> [2]:_v e? <=1= _rather_a_1 i; :2 -1-> [3]:node') 92 | replace_dmrs = parse_graphlang('[1]:_prefer_v_to e? -2h-> [2]:_v e? -1-> [3]:node <-1- :1') 93 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 94 | assert 'The boy prefers to go.' in generate(dmrs) 95 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 96 | assert 'The boy would rather go.' in generate(dmrs) 97 | 98 | dmrs = parse('I don\'t have any money.')[0] 99 | search_dmrs = parse_graphlang('neg e[pui--] -1h-> [1]:_v e? -2-> [2]:node <-- _any_q') 100 | replace_dmrs = parse_graphlang('[1]:_v e? -2-> [2]:node <-- _no_q') 101 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 102 | assert 'I have no money.' in generate(dmrs) 103 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 104 | assert 'I don\'t have any money.' in generate(dmrs) 105 | 106 | dmrs = parse('Kim doesn\'t like any cake.')[0] 107 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 108 | assert 'Kim likes no cake.' in generate(dmrs) 109 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 110 | assert 'Kim doesn\'t like any cake.' in generate(dmrs) 111 | 112 | dmrs = parse('The boy doesn\'t think his team will win.')[0] 113 | search_dmrs = parse_graphlang('neg e[pui--] -1h-> [1]:_v e? -2h-> [2]:_v e?') 114 | replace_dmrs = parse_graphlang('[1]:_v e? -2h-> neg e[pui--] -1h-> [2]:_v e?') 115 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 116 | assert 'The boy thinks his team won\'t win.' in generate(dmrs) 117 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 118 | assert 'The boy doesn\'t think his team will win.' in generate(dmrs) 119 | 120 | dmrs = parse('I don\'t believe that Kim likes cake.')[0] 121 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 122 | assert 'I believe that Kim doesn\'t like cake.' in generate(dmrs) 123 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 124 | assert 'I don\'t believe that Kim likes cake.' in generate(dmrs) 125 | 126 | dmrs = parse('I don\'t think that Kim doesn\'t like cake.')[0] 127 | search_dmrs = parse_graphlang('neg e[pui--] -1h-> [1]:_v e? -2h-> neg e[pui--] -1h-> [2]:_v e?') 128 | replace_dmrs = parse_graphlang('[1]:_v e? -2h-> [2]:_v e?') 129 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 130 | assert 'I think that Kim likes cake.' in generate(dmrs) 131 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 132 | assert 'I don\'t think that Kim doesn\'t like cake.' in generate(dmrs) 133 | 134 | 135 | # Verb particle examples 136 | 137 | dmrs = parse('I look you up.')[0] 138 | search_dmrs = parse_graphlang('[1]:_look_v_up e?') 139 | replace_dmrs = parse_graphlang('[1]:_find_v_1 e?') 140 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 141 | assert 'I find you.' in generate(dmrs) 142 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 143 | assert 'I look you up.' in generate(dmrs) 144 | 145 | dmrs = parse('Kim carries on eating cake.')[0] 146 | search_dmrs = parse_graphlang('[1]:_carry_v_on e?') 147 | replace_dmrs = parse_graphlang('[1]:_continue_v_2 e?') 148 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 149 | assert 'Kim continues eating cake.' in generate(dmrs) 150 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 151 | assert 'Kim carries on eating cake.' in generate(dmrs) 152 | 153 | dmrs = parse('Alice passed a message on to Bob.')[0] 154 | search_dmrs = parse_graphlang('[1]:_pass_v_on e?') 155 | replace_dmrs = parse_graphlang('[1]:_give_v_1 e?') 156 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 157 | assert 'Alice gave a message to Bob.' in generate(dmrs) 158 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 159 | assert 'Alice passed a message on to Bob.' in generate(dmrs) 160 | 161 | dmrs = parse('Bob then gave Alice back the message.')[0] 162 | search_dmrs = parse_graphlang('[1]:node <-2- [2]:_give_v_back e? -3-> [3]:node') 163 | replace_dmrs = parse_graphlang('[3]:node <-2- [2]:_return_v_to e? -3-> [1]:node') 164 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 165 | assert 'Bob then returned the message to Alice.' in generate(dmrs) 166 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 167 | assert 'Bob then gave Alice back the message.' in generate(dmrs) 168 | 169 | dmrs = parse('He keeps on complaining.')[0] 170 | search_dmrs = parse_graphlang('[2]:node <-1- [1]:_keep_v_on e? -2h-> [3]:_v e[pui-+] -1-> :2') 171 | replace_dmrs = parse_graphlang('[1]:_continue_v_2 e? -1h-> [3]:_v e[oui--] -1-> [2]:node') 172 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 173 | assert 'He continues to complain.' in generate(dmrs) 174 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 175 | assert 'He keeps on complaining.' in generate(dmrs) 176 | 177 | dmrs = parse('He takes on great responsibility.')[0] 178 | search_dmrs = parse_graphlang('[1]:_take_v_on e?') 179 | replace_dmrs = parse_graphlang('[1]:_accept_v_1 e?') 180 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 181 | assert 'He accepts great responsibility.' in generate(dmrs) 182 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 183 | assert 'He takes on great responsibility.' in generate(dmrs) 184 | 185 | 186 | # determinerless PPs 187 | 188 | dmrs = parse('I found you at last.')[0] 189 | search_dmrs = parse_graphlang('[1]:_at_p e[pui--] -2-> _last_n_1 x[3s_+_] <-- idiom_q_i') 190 | replace_dmrs = parse_graphlang('[1]:_final_a_1 e[pui--]') 191 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 192 | assert 'I found you finally.' in generate(dmrs) 193 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 194 | assert 'I found you at last.' in generate(dmrs) 195 | 196 | dmrs = parse('I am on edge.')[0] 197 | search_dmrs = parse_graphlang('[1]:_on_p e? -2-> _edge_n_of x[3s_+_] <-- idiom_q_i') 198 | replace_dmrs = parse_graphlang('[1]:_nervous_a_about e?') 199 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 200 | assert 'I am nervous.' in generate(dmrs) 201 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 202 | assert 'I am on edge.' in generate(dmrs) 203 | 204 | dmrs = parse('You can see the insects at close range.')[0] 205 | search_dmrs = parse_graphlang('[1]:_at_p e[pui--] -2-> _range_n_of x[3s___] <-- udef_q; :_range_n_of <=1= _close_a_to e[p____]') 206 | replace_dmrs = parse_graphlang('[1]:_from_p_state e[pui--] -2-> _distance_n_1 x[3s_+_] <-- _a_q; :_distance_n_1 <=1= _small_a_1 e[p____]') 207 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 208 | assert 'You can see the insects from a small distance.' in generate(dmrs) 209 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 210 | assert 'You can see the insects at close range.' in generate(dmrs) 211 | 212 | 213 | # idioms 214 | 215 | dmrs = parse('Kim often took advantage of Sandy.')[0] 216 | search_dmrs = parse_graphlang('[2]:node <-3- [1]:_take_v_of-i e? -2-> _advantage_n_i x[3s_+_] <-- idiom_q_i') 217 | replace_dmrs = parse_graphlang('[1]:_benefit_v_from e? -2-> [2]:node') 218 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 219 | assert 'Kim often benefitted from Sandy.' in generate(dmrs) 220 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 221 | assert 'Kim often took advantage of Sandy.' in generate(dmrs) 222 | 223 | dmrs = parse('The government keeps tabs on everyone.')[0] 224 | search_dmrs = parse_graphlang('[2]:node <-3- [1]:_keep_v_on-i e? -2-> _tabs_n_i x[3p_+_] <-- udef_q') 225 | replace_dmrs = parse_graphlang('[1]:_watch_v_1 e? -2-> [2]:node') 226 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 227 | assert 'The government watches everyone.' in generate(dmrs) 228 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 229 | assert 'The government keeps tabs on everyone.' in generate(dmrs) 230 | 231 | dmrs = parse('I can give you a hand with your work.')[0] 232 | search_dmrs = parse_graphlang('[2]:node <-3- [1]:_give_v_1 e? -2-> _hand_n_1 x[3s_+_] <-- _a_q') 233 | replace_dmrs = parse_graphlang('[1]:_help_v_1 e? -2-> [2]:node') 234 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 235 | assert 'I can help you with your work.' in generate(dmrs) 236 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 237 | assert 'I can give you a hand with your work.' in generate(dmrs) 238 | 239 | dmrs = parse('The old senator kicked the bucket.')[0] 240 | search_dmrs = parse_graphlang('[1]:_kick_v_i e? -2-> _bucket_n_1 x[3s_+_] <-- _the_q') 241 | replace_dmrs = parse_graphlang('[1]:_die_v_1 e?') 242 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 243 | assert 'The old senator died.' in generate(dmrs) 244 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 245 | assert 'The old senator kicked the bucket.' in generate(dmrs) 246 | 247 | 248 | # light verbs 249 | 250 | dmrs = parse('I give a talk on linguistics.')[0] 251 | search_dmrs = parse_graphlang('[1]:_give_v_1 e? -2-> _talk_n_of-on x[3s_+_] <-- _a_q; :_talk_n_of-on -1-> [2]:node') 252 | replace_dmrs = parse_graphlang('[1]:_talk_v_about e? <=1= _about_p e -2-> [2]:node') 253 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 254 | assert 'I talk about linguistics.' in generate(dmrs) 255 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 256 | assert 'I give a talk on linguistics.' in generate(dmrs) 257 | 258 | 259 | # synonyms 260 | 261 | dmrs = parse('Kim loves cake.')[0] 262 | search_dmrs = parse_graphlang('[1]:_love_v_1 e?') 263 | replace_dmrs = parse_graphlang('[1]:_adore_v_1 e?') 264 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 265 | assert 'Kim adores cake.' in generate(dmrs) 266 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 267 | assert 'Kim loves cake.' in generate(dmrs) 268 | 269 | dmrs = parse('I like to play tennis.')[0] 270 | search_dmrs = parse_graphlang('[1]:_like_v_1 e? -2h-> [2]:_v e[pui--]') 271 | replace_dmrs = parse_graphlang('[1]:_enjoy_v_1 e? -2h-> [2]:_v e[pui-+]') 272 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 273 | assert 'I enjoy playing tennis.' in generate(dmrs) 274 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 275 | assert 'I like to play tennis.' in generate(dmrs) 276 | 277 | 278 | # synonyms with re-ordering 279 | 280 | dmrs = parse('Kim gave a book to Sandy.')[0] 281 | search_dmrs = parse_graphlang('[2]:node <-1- [1]:_give_v_1 e? -3-> [3]:node') 282 | replace_dmrs = parse_graphlang('[3]:node <-1- [1]:_get_v_1 e? <=1= _from_p e -2-> [2]:node') 283 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 284 | assert 'Sandy got a book from Kim.' in generate(dmrs) 285 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 286 | assert 'Kim gave a book to Sandy.' in generate(dmrs) 287 | 288 | dmrs = parse('Kim hates spinach.')[0] 289 | search_dmrs = parse_graphlang('[2]:node <-1- [1]:_hate_v_1 e? -2-> [3]:node') 290 | replace_dmrs = parse_graphlang('[3]:node <-1- [1]:_disgust_v_1 e? -2-> [2]:node') 291 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 292 | assert 'Spinach disgusts Kim.' in generate(dmrs) 293 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 294 | assert 'Kim hates spinach.' in generate(dmrs) 295 | 296 | dmrs = parse('I like to play tennis.')[0] 297 | search_dmrs = parse_graphlang('[1]:node <-1- [2]:_like_v_1 e? -2h-> [3]:_v e[pui--] -1-> :1') 298 | replace_dmrs = parse_graphlang('udef_q --> nominalization x <-1- [2]:_make_v_cause e? -2h-> _happy_a_with e[pui__] -1-> [1]:node; :nominalization =1h=> [3]:_v e[pui-+]') 299 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 300 | assert 'Playing tennis makes me happy.' in generate(dmrs) 301 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 302 | assert 'I like to play tennis.' in generate(dmrs) 303 | 304 | 305 | # think + subclause examples 306 | 307 | dmrs = parse('I think I will go.')[0] 308 | search_dmrs = parse_graphlang('[1]:_think_v_1 e[????-] -2h-> [2]:_v e[pfi--]') 309 | replace_dmrs = parse_graphlang('[1]:_think_v_of e[????+] -2-> nominalization x <-- udef_q; :nominalization =1h=> [2]:_v e[pui-+]') 310 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 311 | assert 'I am thinking of me going.' in generate(dmrs) 312 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 313 | assert 'I think I will go.' in generate(dmrs) 314 | 315 | dmrs = parse('I think he will go.')[0] 316 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 317 | assert 'I am thinking of him going.' in generate(dmrs) 318 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 319 | assert 'I think he will go.' in generate(dmrs) 320 | 321 | 322 | # determinerless PP (with optional node) 323 | 324 | dmrs = parse('I found you at last.')[0] 325 | search_dmrs = parse_graphlang('[1]:_at_p e[pui--] -2-> _last_n_1 x[3s_+_] <-- idiom_q_i; (2):_long_a_1 e[pui__] =1=> :_last_n_1') 326 | replace_dmrs = parse_graphlang('[1]:_final_a_1 e[pui--]') 327 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 328 | assert 'I found you finally.' in generate(dmrs) 329 | dmrs_mapping(dmrs, replace_dmrs, search_dmrs, copy_dmrs=False) 330 | assert 'I found you at last.' in generate(dmrs) 331 | 332 | 333 | # question generation (with subgraph nodes) 334 | 335 | dmrs = parse('Kim gave Sandy a book.')[0] 336 | search_dmrs = parse_graphlang('*[1]:_v e[p????] -1-> {2}:node') 337 | replace_dmrs = parse_graphlang('*[1]:_v e[q????] -1-> [2]:person x[3s___] <-- which_q') 338 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 339 | assert 'Who gave Sandy a book?' in generate(dmrs) 340 | 341 | dmrs = parse('Kim gave Sandy a book.')[0] 342 | search_dmrs = parse_graphlang('*[1]:_v e[p????] -2-> {2}:node') 343 | replace_dmrs = parse_graphlang('*[1]:_v e[q????] -2-> [2]:thing x <-- which_q') 344 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 345 | assert 'What did Kim give Sandy?' in generate(dmrs) 346 | 347 | dmrs = parse('Kim gave Sandy a book.')[0] 348 | search_dmrs = parse_graphlang('*[1]:_v e[p????] -3-> {2}:node') 349 | replace_dmrs = parse_graphlang('*[1]:_v e[q????] -3-> [2]:person x[3s___] <-- which_q') 350 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, copy_dmrs=False) 351 | assert 'Who did Kim give a book?' in generate(dmrs) 352 | 353 | 354 | # think example (with equal constraints) 355 | 356 | dmrs = parse('I think I will go.')[0] 357 | equalities = {} 358 | search_dmrs = parse_graphlang('[1]:node=1 <-1- [2]:_think_v_1 e[????-] -2h-> [3]:_v e[pfi--] -1-> node=1', equalities=equalities) 359 | replace_dmrs = parse_graphlang('[1]:node <-1- [2]:_think_v_of e[????+] -2-> nominalization x <-- udef_q; :nominalization =1h=> [3]:_v e[pui-+]') 360 | dmrs_mapping(dmrs, search_dmrs, replace_dmrs, equalities=equalities, copy_dmrs=False) 361 | assert 'I am thinking of going.' in generate(dmrs) 362 | -------------------------------------------------------------------------------- /examples/examples_query.py: -------------------------------------------------------------------------------- 1 | from pydmrs.pydelphin_interface import parse 2 | from pydmrs.matching.query import dmrs_query 3 | 4 | 5 | if __name__ == '__main__': 6 | 7 | # basic functionality 8 | dmrs_list = [parse('A mouse ate the whole cheese.')[0], 9 | parse('Lions eat around 15 zebras per year.')[0], 10 | parse('Their children eat so many sweets.')[0], 11 | parse('Potatoes are mostly eaten by humans.')[0]] 12 | search_dmrs = '_?1_?_?_rel i <-1- _eat_v_1_rel e? -2-> _?2_?_?_rel i' 13 | 14 | # not dict, not per dmrs 15 | results = list(dmrs_query(dmrs_list, search_dmrs, results_as_dict=False, results_per_dmrs=False)) 16 | assert len(results) == 4 17 | assert ('mouse', 'cheese') in results 18 | assert ('lion', 'zebra') in results 19 | assert ('child', 'sweet') in results 20 | assert ('human', 'potato') in results 21 | # dict, not per dmrs 22 | results = list(dmrs_query(dmrs_list, search_dmrs, results_as_dict=True, results_per_dmrs=False)) 23 | assert len(results) == 4 24 | assert {'1': 'mouse', '2': 'cheese'} in results 25 | assert {'1': 'lion', '2': 'zebra'} in results 26 | assert {'1': 'child', '2': 'sweet'} in results 27 | assert {'1': 'human', '2': 'potato'} in results 28 | # not dict, per dmrs 29 | results = list(dmrs_query(dmrs_list, search_dmrs, results_as_dict=False, results_per_dmrs=True)) 30 | assert len(results) == 4 and all(isinstance(result, list) for result in results) 31 | assert ('mouse', 'cheese') in results[0] 32 | assert ('lion', 'zebra') in results[1] 33 | assert ('child', 'sweet') in results[2] 34 | assert ('human', 'potato') in results[3] 35 | # dict, per dmrs 36 | results = list(dmrs_query(dmrs_list, search_dmrs, results_as_dict=True, results_per_dmrs=True)) 37 | assert len(results) == 4 and all(isinstance(result, list) for result in results) 38 | assert {'1': 'mouse', '2': 'cheese'} in results[0] 39 | assert {'1': 'lion', '2': 'zebra'} in results[1] 40 | assert {'1': 'child', '2': 'sweet'} in results[2] 41 | assert {'1': 'human', '2': 'potato'} in results[3] 42 | -------------------------------------------------------------------------------- /examples/examples_toy_robot.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | 3 | from pydmrs.core import Link, LinkLabel 4 | from pydmrs.components import Pred, RealPred, GPred 5 | from pydmrs.simplification.gpred_filtering import gpred_filtering, DEFAULT_FILTER 6 | #from pydmrs.mapping.mapping import dmrs_mapping 7 | from pydmrs.graphlang.graphlang import parse_graphlang 8 | 9 | # Also remove pronouns 10 | extended_filter = DEFAULT_FILTER | {GPred('pron')} 11 | 12 | # Replace the first pred with the second: 13 | rename = [(RealPred('forwards','p'), RealPred('forward','p','dir'))] 14 | 15 | # Replace a pair of nodes with a single node 16 | # (the first pred linked to the second pred, is replaced by the third pred) 17 | shrink = [('_left_a_1', 'ARG1/EQ', 'place_n', '_left_n_1'), 18 | ('_right_a_1', 'ARG1/EQ', 'place_n', '_right_n_1'), 19 | ('loc_nonsp', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'), 20 | ('loc_nonsp', 'ARG2/NEQ', '_right_n_1', '_right_p_dir'), 21 | ('_to_p', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'), 22 | ('_to_p', 'ARG2/NEQ', '_right_n_1', '_right_p_dir')] 23 | 24 | shrink = [(Pred.from_string(a), 25 | LinkLabel.from_string(b), 26 | Pred.from_string(c), 27 | Pred.from_string(d)) for a,b,c,d in shrink] 28 | 29 | def simplify(dmrs): 30 | """ 31 | Simplify an input DMRS to a form that can be converted to robot commands 32 | """ 33 | # Remove unnecessary GPreds (defaults, plus pronouns) 34 | gpred_filtering(dmrs, extended_filter) 35 | 36 | # Remove quantifiers 37 | for node in copy(dmrs.nodes): 38 | if dmrs.is_quantifier(node.nodeid): 39 | dmrs.remove_node(node.nodeid) 40 | 41 | # Apply mapping rules 42 | for before, after in rename: 43 | for node in dmrs.iter_nodes(): 44 | if node.pred == before: 45 | node.pred = after 46 | 47 | for first, label, second, new in shrink: 48 | for node in copy(dmrs.nodes): 49 | if node.pred == first: 50 | nid = node.nodeid 51 | for link in dmrs.get_out(nid, rargname=label.rargname, post=label.post): 52 | if dmrs[link.end].pred == second: 53 | # We've found a match 54 | endid = link.end 55 | dmrs.remove_link(link) 56 | # Copy links from second node to first 57 | for old_link in dmrs.get_out(endid): 58 | dmrs.add_link(Link(nid, old_link.end, old_link.rargname, old_link.post)) 59 | for old_link in dmrs.get_in(endid): 60 | dmrs.add_link(Link(old_link.start, nid, old_link.rargname, old_link.post)) 61 | # Remove the second node and update the first 62 | dmrs.remove_node(link.end) 63 | dmrs[nid].pred = new 64 | 65 | return dmrs 66 | 67 | 68 | dmrsstring = ''' 69 | _then_c -L-HNDL/H-> _drive_v_1 <-L-INDEX/NEQ- :_then_c -R-HNDL/H-> _turn_v_1 <-R-INDEX/NEQ- :_then_c; 70 | pronoun_q -RSTR/H-> pron <-1- :_drive_v_1 <=1= _forwards_p; 71 | pronoun_q -RSTR/H-> pron <-1- :_turn_v_1 <=1= loc_nonsp -2-> place_n <-RSTR/H- def_implicit_q; 72 | _left_a_1 =1=> :place_n 73 | ''' 74 | dmrs = parse_graphlang(dmrsstring) 75 | dmrs.surface = 'Drive forwards then turn left' 76 | 77 | print([(n.nodeid, n.pred) for n in dmrs.nodes]) 78 | print(dmrs.links) 79 | 80 | simplify(dmrs) 81 | 82 | print() 83 | print([(n.nodeid, n.pred) for n in dmrs.nodes]) 84 | print(dmrs.links) 85 | 86 | 'Go forward and then turn to the left' 87 | 'Turn left at a yellow line' 88 | 'On a yellow line, turn to the left' -------------------------------------------------------------------------------- /pydmrs/__config__/default_interface.conf: -------------------------------------------------------------------------------- 1 | [Grammar] 2 | ERG: /opt/erg/erg-1214.dat 3 | -------------------------------------------------------------------------------- /pydmrs/__config__/default_simplification.conf: -------------------------------------------------------------------------------- 1 | [General Predicate Filtering] 2 | allow_disconnected_dmrs: False 3 | filter: 4 | ## Uncommented lines indicate filtered gpreds 5 | 6 | ## Quantifier-like things (*_q_* indicates a quantifier gpred) 7 | ## When you don't have an explicit quantifier, it's added as a grammar quantifier 8 | 9 | ## Occurs with 'he' etc., always occurs with 'pron_rel'. It doesn't shown anything interesting. 10 | pronoun_q_rel 11 | 12 | ## These don't occur in newer versions of the grammar anymore 13 | focus_d_rel,parg_d_rel 14 | 15 | ## Shows up with e.g. 'Three bark' (dogs). 'three' is being treated as a noun 16 | number_q_rel 17 | 18 | ## Part of an idiom (indicated by '_i_') when there is no other explicit quantifier for idiom (?) 19 | idiom_q_i_rel 20 | 21 | ## Quantifier for proper names. Shows distinction between explicitly quantifier proper names 22 | ## e.g. 'The Kim I saw yesterday' vs. 'Kim jumped up a tree' (last one has proper_q_rel) 23 | ## Almost always present, so it can be removed to reduce complexity. 24 | proper_q_rel 25 | 26 | ## If it doesn't have any other quantifier, it's this. 27 | ## Mostly harmless, without a strong signal. Can be removed to decrease complexity. 28 | udef_q_rel 29 | 30 | ## Used for things like possesive 'her', 'whose'. 31 | ## (investigate further! - found in dates too) 32 | def_explicit_q_rel,def_implicit_q_rel 33 | 34 | ## // end quantifier-like things 35 | 36 | ## Signifies when copulas can't be treated implicitly. 37 | ## e.g. 'Kim is president' as opposed to 'Kim is tall' 38 | cop_id_rel 39 | 40 | ## Signifying a gap in a sentence, e.g. 'Kim doesn't know when' [...] 41 | ## They are supposed to happen here. They often happen due to misparse, 42 | ## which is why we filter them by default. 43 | ellipsis_rel,ellipsis_expl_rel,elliptical_n_rel,ellipsis_ref_rel 44 | 45 | approx_grad_rel 46 | 47 | eventuality_rel 48 | generic_nom_rel,generic_verb_rel 49 | 50 | id_rel 51 | interval_rel,interval_p_end_rel,interval_p_start_rel,hour_prep_rel 52 | property_rel 53 | prpstn_to_prop_rel 54 | string 55 | timezone_p_rel 56 | unknown_rel 57 | unspec_adj_rel 58 | v_event_rel 59 | 60 | ## UNFILTERED THINGS: 61 | 62 | ## Analysis of things like everybody (every body) 63 | # every_q_rel,some_q_rel 64 | 65 | ## Question-like things 66 | 67 | ## Corresponds to any 'what', 'why'. 68 | ## Signals what a question is about, clauses ('Kim wondered why this is so hard') 69 | # which_q_rel 70 | 71 | ## The following examples are distinguished by using these two gpreds 72 | ## 'Kim fell where Sandy fell' vs. 'Kim fell wherever Sandy fell' 73 | # free_relative_q_rel,free_relative_ever_q_rel 74 | 75 | ## // end question-like things 76 | 77 | ## Signifies a person being addressed in discourse 78 | ## e.g. 'No, Mr. Bond, I expect you to die' 79 | # addressee_rel 80 | 81 | ## Signify discourse expressions such as 'Hello', 'Please' 82 | # greet_rel,polite_rel 83 | 84 | ## Signifies when something that's not normally used as a noun, is used as a noun 85 | ## e.g. 'Playing is fun' 86 | # nominalization_rel 87 | 88 | ## Preposition-like gpreds 89 | 90 | ## Corresponds to 'in' in what manner 91 | ## e.g. 'How did Kim fall' (in what manner did Kim fall?) 92 | # unspec_manner_rel 93 | 94 | ## Indicates noun-noun compounds, preposition-like 95 | ## e.g. 'Kim Smith' 96 | ## compound_name_rel is obsolete in the latest version (everything is compound_rel) 97 | # compound_rel,compound_name_rel 98 | 99 | ## 100 | # temp_loc_x_rel,temp_rel,loc_nonsp_rel 101 | 102 | ## Noun-like gpreds 103 | 104 | ## Question-like things 105 | # manner_rel,person_rel,reason_rel 106 | # place_n_rel,time_n_rel 107 | 108 | ## Temporal gpreds, occuring with year, month, days, hours ... 109 | ## They all (?) have cargs 110 | # minute_rel,numbered_hour_rel,dofw_rel,dofm_rel,mofy_rel,holiday_rel,season_rel,year_range_rel,yofc_rel 111 | 112 | ## Signifying numbers 113 | ## They all have cargs 114 | # basic_card_rel,card_rel,ord_rel 115 | 116 | ## Signifying proper names, e.g. 'Kim' 117 | ## They all have cargs 118 | # named_rel,named_n_rel 119 | 120 | ## Signifies relations between composed numbers 121 | ## e.g. 'two hundred and twenty-three' (two-times-hundred-plus-three-plus-twenty) 122 | # fraction_rel,plus_rel,times_rel,num_seq_rel 123 | 124 | ## Signifies multiple coordination or sentence coordination without an explicit conjunction word 125 | ## e.g. 'Kim, Sandy and Lee are smart' (between Kim and 'Sandy') 126 | # implicit_conj_rel 127 | 128 | ## Signifies additional explanation/elaboration of something 129 | ## e.g. 'Kim (Smith) is visiting' 130 | # parenthetical_rel,appos_rel 131 | 132 | # measure_rel 133 | # comp_equal_rel,comp_enough_rel,comp_less_rel,comp_not+so_rel,comp_not+too_rel,comp_rel,comp_so_rel,comp_too_rel,superl_rel 134 | 135 | # little-few_a_rel,much-many_a_rel 136 | 137 | # generic_entity_rel 138 | 139 | # neg_rel,poss_rel,pron_rel,subord_rel,thing_rel 140 | 141 | # meas_np_rel 142 | # abstr_deg_rel 143 | # all+too_rel 144 | # discourse_rel 145 | # excl_rel 146 | # fw_seq_rel 147 | # ne_x_rel 148 | # part_of_rel 149 | # of_p_rel 150 | # recip_pro_rel 151 | # refl_mod_rel 152 | # with_p_rel 153 | # relative_mod_rel 154 | # prednom_state_rel 155 | 156 | [Rooted Conversion] 157 | reverse_arg1: 158 | appos_rel 159 | parenthetical_rel 160 | compound_name_rel 161 | compound_rel 162 | of_p_rel 163 | but_p_except_rel 164 | poss_rel 165 | -------------------------------------------------------------------------------- /pydmrs/__init__.py: -------------------------------------------------------------------------------- 1 | # Control what is imported using `from pydmrs import *` 2 | __all__ = ['components', 'core', 'serial', 'simplification'] 3 | -------------------------------------------------------------------------------- /pydmrs/_exceptions.py: -------------------------------------------------------------------------------- 1 | class PydmrsError(Exception): 2 | pass 3 | 4 | class PydmrsTypeError(PydmrsError, TypeError): 5 | pass 6 | 7 | class PydmrsValueError(PydmrsError, ValueError): 8 | pass 9 | 10 | class PydmrsKeyError(PydmrsError, KeyError): 11 | pass 12 | 13 | class PydmrsWarning(PydmrsError, Warning): 14 | pass 15 | 16 | class PydmrsDeprecationWarning(PydmrsWarning, DeprecationWarning): 17 | pass -------------------------------------------------------------------------------- /pydmrs/graphlang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delph-in/pydmrs/795b35dba4986fa9084eaa81fb16206cb131a752/pydmrs/graphlang/__init__.py -------------------------------------------------------------------------------- /pydmrs/graphlang/graphlang.py: -------------------------------------------------------------------------------- 1 | from pydmrs.components import Pred, RealPred, GPred, Sortinfo, EventSortinfo, InstanceSortinfo 2 | from pydmrs.core import Link, Node, ListDmrs 3 | from pydmrs.mapping.mapping import AnchorNode, OptionalNode, SubgraphNode 4 | 5 | 6 | default_sortinfo_classes = dict( 7 | e=EventSortinfo, 8 | x=InstanceSortinfo 9 | ) 10 | 11 | default_sortinfo_shortforms = dict( 12 | e=dict( 13 | sf={'p': 'prop', 'q': 'ques', 'o': 'prop-or-ques', 'c': 'comm'}, 14 | tense={'u': 'untensed', 't': 'tensed', 'p': 'pres', 'a': 'past', 'f': 'fut'}, 15 | mood={'i': 'indicative', 's': 'subjunctive'}, 16 | perf={'+': '+', '-': '-'}, 17 | prog={'+': '+', '-': '-', 'b': 'bool'} 18 | ), 19 | x=dict( 20 | pers={'1': '1', '2': '2', '3': '3', 'o': '1-or-3'}, 21 | num={'s': 'sg', 'p': 'pl'}, 22 | gend={'f': 'f', 'm': 'm', 'n': 'n', 'o': 'm-or-f'}, 23 | ind={'+': '+', '-': '-'}, 24 | pt={'s': 'std', 'z': 'zero', 'r': 'refl'} 25 | ) 26 | ) 27 | 28 | 29 | def parse_graphlang( 30 | string, 31 | cls=ListDmrs, 32 | queries=None, 33 | equalities=None, 34 | anchors=None, 35 | sortinfo_classes=None, 36 | sortinfo_shortforms=None 37 | ): 38 | if queries is None: 39 | queries = {} 40 | if equalities is None: 41 | equalities = {} 42 | if anchors is None: 43 | anchors = {} 44 | if sortinfo_classes is None: 45 | sortinfo_classes = default_sortinfo_classes 46 | assert sortinfo_shortforms is None 47 | sortinfo_shortforms = default_sortinfo_shortforms 48 | else: 49 | if sortinfo_shortforms is None: 50 | sortinfo_shortforms = dict() 51 | else: 52 | assert all(cvarsort in sortinfo_classes for cvarsort in sortinfo_shortforms) 53 | assert 'i' not in sortinfo_classes 54 | sortinfo_classes['i'] = Sortinfo 55 | nodeid = 1 56 | nodes = [] 57 | links = [] 58 | index = None 59 | top = None 60 | refs = {} 61 | lines = (item for line in string.split('\n') for item in line.split(';') if item) 62 | for line in lines: 63 | last_id = -1 64 | r = 0 65 | start = True 66 | while r < len(line): 67 | l = r # position of link 68 | while l < len(line) and line[l] == ' ': 69 | l += 1 70 | if l >= len(line): 71 | break 72 | if start: 73 | m = l 74 | else: 75 | m = line.index(' ', l) + 1 # position of node (+ sortinfo) 76 | while line[m] == ' ': 77 | m += 1 78 | r1 = line.find('<', m) # position of next link 79 | r2 = line.find('>', m) 80 | if r1 < m and r2 < m: 81 | r = len(line) - 1 82 | else: 83 | if r1 < m: 84 | r = r2 85 | elif r1 < r2 or r2 < m: 86 | r = r1 87 | else: 88 | r = r2 89 | r = line.rindex(' ', 0, r) 90 | while line[r] == ' ': 91 | r -= 1 92 | r += 1 93 | if line[m] == ':': 94 | ref = line[m+1:r] 95 | assert ref in refs, 'Invalid reference id.' 96 | current_id = refs[ref] 97 | else: 98 | # TODO: index node? 99 | if line[m] == '*' and line[m+1] == '*': # index node 100 | assert index is None 101 | index = nodeid 102 | m += 2 103 | if line[m] == '*': # top node 104 | assert top is None 105 | top = nodeid 106 | m += 1 107 | node, ref_ids, ref_name = _parse_node(line[m:r], nodeid, queries, equalities, anchors, sortinfo_classes, sortinfo_shortforms) 108 | nodes.append(node) 109 | current_id = nodeid 110 | nodeid += 1 111 | if ref_ids is not None: 112 | for ref_id in ref_ids: 113 | refs[ref_id] = current_id 114 | refs[ref_name] = current_id 115 | if not start: 116 | m = line.index(' ', l, m) 117 | link = _parse_link(line[l:m], last_id, current_id, queries, equalities) 118 | links.append(link) 119 | last_id = current_id 120 | start = False 121 | return cls(nodes=nodes, links=links, index=index, top=top) 122 | 123 | 124 | special_values = ('?', '=') 125 | 126 | 127 | def _parse_value(string, underspecified, queries, equalities, retriever): 128 | if not string or string[0] not in special_values: 129 | return string 130 | if string in special_values: 131 | return underspecified 132 | if string[1] == string[0]: 133 | return string[1:] 134 | if string[0] == '?': 135 | assert string[1:] not in queries 136 | queries[string[1:]] = retriever 137 | elif string[0] == '=': 138 | if string[1:] in equalities: 139 | equalities[string[1:]].append(retriever) 140 | else: 141 | equalities[string[1:]] = [retriever] 142 | return underspecified 143 | 144 | 145 | def _parse_node(string, nodeid, queries, equalities, anchors, sortinfo_classes, sortinfo_shortforms): 146 | m = string.find('(') 147 | if m < 0: 148 | m = string.find(' ') 149 | if m < 0: 150 | l = string.find(':') 151 | else: 152 | l = string.find(':', 0, m) 153 | if l < 0: 154 | ref_ids = None 155 | l = 0 156 | else: 157 | ref_ids = string[:l] 158 | l += 1 159 | while string[l] == ' ': 160 | l += 1 161 | if string[l:l+4] == 'node' and (len(string) - l == 4 or string[l+4] in special_values): 162 | value = _parse_value(string[l+4:], None, queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]])) 163 | assert not value 164 | pred = Pred() 165 | carg = '?' 166 | sortinfo = Sortinfo() 167 | ref_name = 'node' 168 | elif m < 0: 169 | pred, ref_name = _parse_pred(string[l:], nodeid, queries, equalities) 170 | carg = None 171 | sortinfo = None 172 | else: 173 | pred, ref_name = _parse_pred(string[l:m], nodeid, queries, equalities) 174 | if string[m] == '(': 175 | r = string.index(')', m) 176 | if string[m+1] == '"' and string[r-1] == '"': 177 | carg = string[m+2:r-1] 178 | else: 179 | carg = string[m+1:r] 180 | assert '"' not in carg 181 | carg = _parse_value(carg, '?', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].carg)) 182 | m = r + 1 183 | else: 184 | carg = None 185 | if m < len(string) and string[m] == ' ': 186 | while string[m] == ' ': 187 | m += 1 188 | sortinfo = _parse_sortinfo(string[m:], nodeid, queries, equalities, sortinfo_classes, sortinfo_shortforms) 189 | else: 190 | sortinfo = None 191 | if not ref_ids: 192 | ref_ids = None 193 | node = Node(nodeid, pred, sortinfo=sortinfo, carg=carg) 194 | else: 195 | if ref_ids[0] == '[' and ref_ids[-1] == ']': 196 | ref_ids = ref_ids[1:-1].split(',') 197 | node = AnchorNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) 198 | elif ref_ids[0] == '(' and ref_ids[-1] == ')': 199 | ref_ids = ref_ids[1:-1].split(',') 200 | node = OptionalNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) 201 | elif ref_ids[0] == '{' and ref_ids[-1] == '}': 202 | ref_ids = ref_ids[1:-1].split(',') 203 | node = SubgraphNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) 204 | else: 205 | ref_ids = ref_ids.split(',') 206 | node = Node(nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) 207 | for ref_id in ref_ids: 208 | assert ref_id not in anchors, 'Reference ids have to be unique.' 209 | anchors[ref_id] = node 210 | return node, ref_ids, ref_name 211 | 212 | 213 | def _parse_pred(string, nodeid, queries, equalities): 214 | assert string.islower(), 'Predicates must be lower-case.' 215 | assert ' ' not in string, 'Predicates must not contain spaces.' 216 | if string[0] == '"' and string[-1] == '"': 217 | string = string[1:-1] 218 | assert '"' not in string, 'Predicates must not contain quotes.' 219 | assert string[0] != '\'', 'Predicates with opening single-quote have been deprecated.' 220 | if (string[:4] == 'pred' and (len(string) == 4 or string[4] in special_values)) or (string[:8] == 'predsort' and (len(string) == 8 or string[8] in special_values)): 221 | i = 8 if string[:8] == 'predsort' else 4 222 | value = _parse_value(string[i:], None, queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred)) 223 | assert not value 224 | return Pred(), string[:i] 225 | rel_suffix = '' 226 | if string[-4:] == '_rel': 227 | string = string[:-4] 228 | rel_suffix = '_rel' 229 | if string[0] != '_': 230 | name = _parse_value(string, '?', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.name)) 231 | return GPred(name), name + rel_suffix 232 | values = string[1:].rsplit('_', 2) 233 | count = len(values) 234 | assert count > 0, 'Invalid number of arguments for RealPred.' 235 | if count == 1: 236 | values.insert(0, '?') 237 | values.append('unknown') 238 | elif count == 2: 239 | values.append(None) 240 | lemma = _parse_value(values[0], '?', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.lemma)) 241 | pos = _parse_value(values[1], 'u', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.pos)) # u ??? 242 | sense = _parse_value(values[2], 'unknown', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].pred.sense)) # unknown ??? 243 | if count == 1: 244 | ref_name = '_{}{}'.format(pos, rel_suffix) 245 | elif count == 2: 246 | ref_name = '_{}_{}{}'.format(lemma, pos, rel_suffix) 247 | else: 248 | ref_name = '_{}_{}_{}{}'.format(lemma, pos, sense, rel_suffix) 249 | return RealPred(lemma, pos, sense), ref_name 250 | 251 | 252 | def _parse_sortinfo(string, nodeid, queries, equalities, sortinfo_classes, sortinfo_shortforms): 253 | assert string.islower(), 'Sortinfos must be lower-case.' 254 | assert ' ' not in string, 'Sortinfos must not contain spaces.' 255 | if string[0] == 'i': 256 | assert len(string) == 1, 'Sortinfo type i cannot be specified.' 257 | return Sortinfo() 258 | assert string[0] in sortinfo_classes 259 | sortinfo = sortinfo_classes[string[0]]() 260 | if len(string) == 1: 261 | return sortinfo 262 | shortform = sortinfo_shortforms.get(string[0], dict()) 263 | index = 1 264 | if string[1] in special_values: 265 | index = string.find('[') 266 | if index > 0: 267 | value = _parse_value(string[1:index], None, queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].sortinfo)) 268 | assert not value 269 | else: 270 | value = _parse_value(string[1:], None, queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].sortinfo)) 271 | assert not value 272 | for feature in sortinfo_classes[string[0]].features: 273 | sortinfo[feature] = 'u' 274 | if index < 0: 275 | return sortinfo 276 | assert string[index] == '[' and string[-1] == ']', 'Square brackets missing.' 277 | if '=' in string: # explicit key-value specification 278 | for kv in string[index + 1: -1].split(','): 279 | key, value = kv.split('=') 280 | if value == '_': 281 | value = None 282 | elif value == '?': 283 | value = 'u' 284 | elif key in shortform and value in shortform[key]: 285 | value = shortform[key][value] 286 | sortinfo[key] = value 287 | return sortinfo 288 | else: # implicit specification 289 | assert index == 1 # general underspecification makes no sense 290 | assert len(string) == len(sortinfo.features) + 3 291 | for n, feature in enumerate(sortinfo.features, 2): 292 | value = string[n] 293 | if value == '_': 294 | value = None 295 | elif value == '?': 296 | value = 'u' 297 | elif feature in shortform and string[n] in shortform[feature]: 298 | value = shortform[feature][value] 299 | sortinfo[feature] = value 300 | return sortinfo 301 | 302 | 303 | def _parse_link(string, left_nodeid, right_nodeid, queries, equalities): 304 | assert ' ' not in string, 'Links must not contain spaces.' 305 | l = 0 306 | r = len(string) - 1 307 | if string[l] == '<': # pointing left 308 | start = right_nodeid 309 | end = left_nodeid 310 | l += 1 311 | elif string[r] == '>': # pointing right 312 | start = left_nodeid 313 | end = right_nodeid 314 | r -= 1 315 | else: # invalid link 316 | assert False, 'Link must have a direction.' 317 | assert string[l] in '-=' and string[r] in '-=', 'Link line must consist of either "-" or "=".' 318 | link_char = string[l] 319 | while l < len(string) and string[l] == link_char: # arbitrary left length 320 | l += 1 321 | while r >= 0 and string[r] == link_char: # arbitrary right length 322 | r -= 1 323 | if l + 1 < r: # explicit specification 324 | r += 1 325 | if string[l:r] == 'rstr': # rargname RSTR uniquely determines post H 326 | rargname = 'rstr' 327 | post = 'h' 328 | elif string[l:r] == 'eq': # post EQ uniquely determines rargname None 329 | rargname = None 330 | post = 'eq' 331 | else: 332 | m = string.find('/', l, r) 333 | if m >= 0: 334 | if l == m and m + 1 == r: 335 | rargname = None 336 | post = None 337 | elif l == m: 338 | rargname = None 339 | post = _parse_value(string[m+1:r], '?', queries, equalities, (lambda matching, dmrs: ','.join(link.post for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) 340 | elif m + 1 == r: 341 | rargname = _parse_value(string[l:m], '?', queries, equalities, (lambda matching, dmrs: ','.join(link.rargname for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) 342 | post = None 343 | else: 344 | # problem: doesn't combine rargname and post 345 | rargname = _parse_value(string[l:m], '?', queries, equalities, (lambda matching, dmrs: ','.join(link.rargname for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) 346 | post = _parse_value(string[m+1:r], '?', queries, equalities, (lambda matching, dmrs: ','.join(link.post for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) 347 | else: 348 | rargname = _parse_value(string[l:r], '?', queries, equalities, (lambda matching, dmrs: ','.join(link.labelstring for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) 349 | post = None 350 | return Link(start, end, rargname, post) 351 | if l > r: # no specification symbol 352 | if link_char == '=': 353 | rargname = None 354 | post = 'eq' 355 | else: 356 | rargname = 'rstr' 357 | post = 'h' 358 | else: 359 | if string[l] == '?': # no equal constraint 360 | rargname = '?' 361 | post = '?' 362 | value = _parse_value(string[l:r+1], None, queries, equalities, (lambda matching, dmrs: ','.join(link.labelstring for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) 363 | assert not value 364 | elif l == r: # one specification symbol, i.e. variable link 365 | if link_char == '=': 366 | post = 'eq' 367 | else: 368 | post = 'neq' 369 | elif l + 1 == r: # two specification symbol, i.e. handle link 370 | assert string[r] == 'h', 'Second link specification symbol must be "h".' 371 | if link_char == '=': 372 | post = 'heq' 373 | else: 374 | post = 'h' 375 | else: 376 | assert False # never reached 377 | if string[l] == 'n': # ARG/ARGN (underspecified ARG) 378 | rargname = 'arg' 379 | elif string[l] in '1234': # ARG{1,2,3,4} 380 | rargname = 'arg' + str(string[l]) 381 | elif string[l] in 'lr': # {L,R}-{INDEX,HNDL} 382 | if l == r: 383 | rargname = str(string[l]).upper() + '-index' 384 | else: 385 | rargname = str(string[l]).upper() + '-hndl' 386 | elif string[l] != '?': 387 | assert False, 'Invalid link specification symbol.' 388 | return Link(start, end, rargname, post) 389 | 390 | 391 | if __name__ == '__main__': 392 | import sys 393 | assert len(sys.argv) <= 2 and sys.stdin.isatty() == (len(sys.argv) == 2), 'Invalid arguments.' 394 | if sys.stdin.isatty(): 395 | sys.stdout.write(parse_graphlang(sys.argv[1]).dumps_xml(encoding='utf-8') + '\n') 396 | else: 397 | for line in sys.stdin: 398 | sys.stdout.write(parse_graphlang(line).dumps_xml(encoding='utf-8') + '\n') 399 | -------------------------------------------------------------------------------- /pydmrs/mapping/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delph-in/pydmrs/795b35dba4986fa9084eaa81fb16206cb131a752/pydmrs/mapping/__init__.py -------------------------------------------------------------------------------- /pydmrs/mapping/mapping.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from pydmrs._exceptions import PydmrsError 3 | from pydmrs.components import Pred, RealPred, GPred, Sortinfo, EventSortinfo, InstanceSortinfo 4 | from pydmrs.core import Link, Node 5 | from pydmrs.matching.exact_matching import dmrs_exact_matching 6 | 7 | 8 | class AnchorNode(Node): 9 | """ 10 | A DMRS graph node with an additional anchor id to identify anchor nodes for DMRS mapping. 11 | """ 12 | 13 | def __init__(self, anchors, *args, **kwargs): 14 | """ 15 | Create a new anchor node instance. 16 | """ 17 | super().__init__(*args, **kwargs) 18 | self.anchors = anchors 19 | self.required = True 20 | self.requires_target = True 21 | 22 | def before_map(self, dmrs, nodeid): 23 | """ 24 | Is applied before the target node is mapped. 25 | :param dmrs Target DMRS graph. 26 | :param nodeid Target node id. 27 | """ 28 | pass 29 | 30 | def after_map(self, dmrs, nodeid): 31 | """ 32 | Is applied after the target node is mapped. 33 | :param dmrs Target DMRS graph. 34 | :param nodeid Target node id. 35 | """ 36 | pass 37 | 38 | def map(self, dmrs, nodeid, hierarchy=None): 39 | """ 40 | Overrides the values of the target node if they are not underspecified in this anchor node. 41 | :param dmrs Target DMRS graph. 42 | :param nodeid Target node id. 43 | :param hierarchy: An optional predicate hierarchy. 44 | """ 45 | node = dmrs[nodeid] 46 | if self == node or self.is_less_specific(node, hierarchy=hierarchy): 47 | return 48 | if isinstance(self.pred, RealPred): 49 | if isinstance(node.pred, RealPred): 50 | node.pred = RealPred(node.pred.lemma if self.pred.lemma == '?' else self.pred.lemma, node.pred.pos if self.pred.pos in ('u', '?') else self.pred.pos, node.pred.sense if self.pred.sense in ('unknown', '?') else self.pred.sense) 51 | else: 52 | node.pred = copy.deepcopy(self.pred) 53 | elif isinstance(self.pred, GPred): 54 | if isinstance(node.pred, GPred): 55 | node.pred = GPred(node.pred.name if self.pred.name == '?' else self.pred.name) 56 | else: 57 | node.pred = copy.deepcopy(self.pred) 58 | elif not isinstance(self.pred, Pred): 59 | node.pred = None 60 | if isinstance(self.sortinfo, EventSortinfo): 61 | if isinstance(node.sortinfo, EventSortinfo): 62 | node.sortinfo = EventSortinfo(node.sortinfo.sf if self.sortinfo.sf in ('u', '?') else self.sortinfo.sf, node.sortinfo.tense if self.sortinfo.tense in ('u', '?') else self.sortinfo.tense, node.sortinfo.mood if self.sortinfo.mood in ('u', '?') else self.sortinfo.mood, node.sortinfo.perf if self.sortinfo.perf in ('u', '?') else self.sortinfo.perf, node.sortinfo.prog if self.sortinfo.prog in ('u', '?') else self.sortinfo.prog) 63 | else: 64 | node.sortinfo = copy.deepcopy(self.sortinfo) 65 | elif isinstance(self.sortinfo, InstanceSortinfo): 66 | if isinstance(node.sortinfo, InstanceSortinfo): 67 | node.sortinfo = InstanceSortinfo(node.sortinfo.pers if self.sortinfo.pers in ('u', '?') else self.sortinfo.pers, node.sortinfo.num if self.sortinfo.num in ('u', '?') else self.sortinfo.num, node.sortinfo.gend if self.sortinfo.gend in ('u', '?') else self.sortinfo.gend, node.sortinfo.ind if self.sortinfo.ind in ('u', '?') else self.sortinfo.ind, node.sortinfo.pt if self.sortinfo.pt in ('u', '?') else self.sortinfo.pt) 68 | else: 69 | node.sortinfo = copy.deepcopy(self.sortinfo) 70 | elif not isinstance(self.sortinfo, Sortinfo): 71 | node.sortinfo = None 72 | if self.carg != '?': 73 | node.carg = self.carg 74 | 75 | def unify(self, other, hierarchy=None): 76 | """ 77 | Unify nodes. 78 | :param other: The node to unify with. 79 | :param hierarchy: An optional predicate hierarchy. 80 | """ 81 | hierarchy = hierarchy or dict() 82 | if ( 83 | type(self.pred) is RealPred and 84 | type(other.pred) is RealPred and 85 | (self.pred.lemma == other.pred.lemma or self.pred.lemma == '?' or other.pred.lemma == '?') and 86 | (self.pred.pos == other.pred.pos or self.pred.pos in ('u', '?') or other.pred.pos in ('u', '?')) and 87 | (self.pred.sense == other.pred.sense or self.pred.sense in ('unknown', '?') or other.pred.sense in ('unknown', '?')) 88 | ): 89 | # RealPred and predicate values are either equal or underspecified 90 | lemma = other.pred.lemma if self.pred.lemma == '?' else self.pred.lemma 91 | pos = other.pred.pos if self.pred.pos in ('u', '?') else self.pred.pos 92 | sense = other.pred.sense if self.pred.sense in ('unknown', '?') else self.pred.sense 93 | self.pred = RealPred(lemma, pos, sense) 94 | elif ( 95 | type(self.pred) is GPred and 96 | type(other.pred) is GPred and 97 | (self.pred.name == other.pred.name or self.pred.name == '?' or other.pred.name == '?') 98 | ): 99 | # GPred and predicate values are either equal or underspecified 100 | name = other.pred.name if self.pred.name == '?' else self.pred.name 101 | self.pred = GPred(name) 102 | elif type(self.pred) is Pred or str(other.pred) in hierarchy.get(str(self.pred), ()): 103 | # predicate is underspecified, or predicate is more general according to the hierarchy 104 | self.pred = other.pred 105 | elif type(other.pred) is Pred or str(self.pred) in hierarchy.get(str(other.pred), ()): 106 | # other is underspecified, or predicate is more specific according to the hierarchy 107 | pass 108 | else: 109 | raise PydmrsError("Node predicates cannot be unified: {}, {}".format(self.pred, other.pred)) 110 | 111 | if type(self.sortinfo) is not Sortinfo and isinstance(other.sortinfo, type(self.sortinfo)) and all((self.sortinfo[key] == other.sortinfo[key]) or (self.sortinfo[key] in ('u', '?')) or (other.sortinfo[key] in ('u', '?')) for key in self.sortinfo.features): 112 | # same sortinfo type and values are either equal or underspecified 113 | self.sortinfo = type(self.sortinfo)(*(other.sortinfo[key] if self.sortinfo[key] in ('u', '?') else self.sortinfo[key] for key in self.sortinfo.features)) 114 | elif type(self.sortinfo) is Sortinfo and isinstance(other.sortinfo, Sortinfo): 115 | # sortinfo is underspecified 116 | self.sortinfo = other.sortinfo 117 | elif type(other.sortinfo) is Sortinfo and isinstance(self.sortinfo, Sortinfo): 118 | # other is underspecified 119 | pass 120 | elif self.sortinfo is None and other.sortinfo is None: 121 | pass 122 | else: 123 | raise PydmrsError("Node sortinfos cannot be unified: {}, {}".format(self.sortinfo, other.sortinfo)) 124 | 125 | if self.carg == other.carg or other.carg == '?': 126 | # same carg, or other is underspecified 127 | pass 128 | elif self.carg == '?': 129 | # carg is underspecified 130 | self.carg = other.carg 131 | else: 132 | raise PydmrsError("Node cargs cannot be unified: {}, {}".format(self.carg, other.carg)) 133 | 134 | 135 | class SubgraphNode(AnchorNode): 136 | """ 137 | A DMRS anchor node which comprises the subgraph attached to it. 138 | The attached subgraph consists of the nodes which are connected only via this node to the top node of the graph, and would be disconnected if the subgraph node was removed. 139 | """ 140 | 141 | def __init__(self, *args, **kwargs): 142 | """ 143 | Create a new subgraph node instance. 144 | """ 145 | super().__init__(*args, **kwargs) 146 | self.requires_target = False 147 | 148 | def before_map(self, dmrs, nodeid): 149 | """ 150 | Removes the subgraph attached to the target node. 151 | :param dmrs Target DMRS graph (requires the top node specified). 152 | :param nodeid Target node id. 153 | """ 154 | assert dmrs.top is not None, 'Top node has to be specified for subgraph node to map.' 155 | node = dmrs[nodeid] 156 | dmrs.remove_node(nodeid) 157 | dmrs.remove_nodes(dmrs.disconnected_nodeids()) 158 | dmrs.add_node(node) 159 | 160 | 161 | class OptionalNode(AnchorNode): 162 | """ 163 | A DMRS anchor node which is not required. 164 | """ 165 | 166 | def __init__(self, *args, **kwargs): 167 | """ 168 | Create a new optional node instance. 169 | """ 170 | super().__init__(*args, **kwargs) 171 | self.required = False 172 | self.requires_target = False 173 | 174 | 175 | def dmrs_mapping(dmrs, search_dmrs, replace_dmrs, equalities=(), hierarchy=None, copy_dmrs=True, iterative=True, all_matches=True, require_connected=True, max_matches=100): 176 | """ 177 | Performs an exact DMRS (sub)graph matching of a (sub)graph against a containing graph. 178 | :param dmrs DMRS graph to map. 179 | :param search_dmrs DMRS subgraph to replace. 180 | :param replace_dmrs DMRS subgraph to replace with. 181 | :param equalities 182 | :param hierarchy An optional predicate hierarchy. 183 | :param copy_dmrs True if DMRS graph argument should be copied before being mapped. 184 | :param iterative True if all possible mappings should be performed iteratively to the same DMRS graph, instead of a separate copy per mapping (iterative=False requires copy_dmrs=True). 185 | :param all_matches True if all possible matches should be returned, instead of only the first (or None). 186 | :param require_connected True if mappings resulting in a disconnected DMRS graph should be ignored. 187 | :param max_matches: Maximum number of matches. 188 | :return Mapped DMRS graph (resp. a list of graphs in case of iterative=False and all_matches=True) 189 | """ 190 | assert copy_dmrs or iterative, 'Invalid argument combination.' 191 | 192 | # extract anchor node mapping between search_dmrs and replace_dmrs 193 | sub_mapping = {} 194 | optional_nodeids = [] 195 | for search_node in search_dmrs.iter_nodes(): 196 | if not isinstance(search_node, AnchorNode): 197 | continue 198 | if not search_node.required: 199 | optional_nodeids.append(search_node.nodeid) 200 | for replace_node in replace_dmrs.iter_nodes(): 201 | if not isinstance(replace_node, AnchorNode) or all(anchor not in replace_node.anchors for anchor in search_node.anchors): 202 | continue 203 | assert search_node.nodeid not in sub_mapping, 'Node matches multiple nodes.' + str(search_node) 204 | sub_mapping[search_node.nodeid] = replace_node.nodeid 205 | if search_node.nodeid not in sub_mapping: 206 | assert not search_node.requires_target, 'Un-matched anchor node.' 207 | 208 | # set up variables according to settings 209 | if iterative: 210 | result_dmrs = copy.deepcopy(dmrs) if copy_dmrs else dmrs 211 | matchings = dmrs_exact_matching(search_dmrs, dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True) 212 | else: 213 | matchings = dmrs_exact_matching(search_dmrs, dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True) 214 | if not iterative and all_matches: 215 | result = [] 216 | 217 | # continue while there is a match for search_dmrs 218 | count = 0 219 | for _ in range(max_matches): 220 | if iterative: 221 | pass 222 | # matchings = dmrs_exact_matching(search_dmrs, result_dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True) 223 | else: 224 | result_dmrs = copy.deepcopy(dmrs) if copy_dmrs else dmrs 225 | 226 | # return mapping(s) if there are no more matches left 227 | try: 228 | search_matching = next(matchings) 229 | count += 1 230 | except StopIteration: 231 | if not all_matches: 232 | if copy_dmrs: 233 | return None 234 | else: 235 | return False 236 | elif iterative: 237 | if not require_connected or result_dmrs.is_connected(): 238 | if copy_dmrs: 239 | return result_dmrs 240 | else: 241 | return count > 0 242 | else: 243 | if copy_dmrs: 244 | return None 245 | else: 246 | return False 247 | else: 248 | return result 249 | 250 | # remove nodes in the matched search_dmrs if they are no anchor nodes, otherwise perform mapping() 251 | # mapping() performs the mapping process (with whatever it involves) specific to this node type (e.g. fill underspecified values) 252 | for nodeid in search_dmrs: 253 | search_node = search_dmrs[nodeid] 254 | if isinstance(search_node, AnchorNode): 255 | search_node.before_map(result_dmrs, search_matching[nodeid]) 256 | replace_matching = {} 257 | for nodeid in search_matching: 258 | if nodeid in sub_mapping: 259 | replace_dmrs[sub_mapping[nodeid]].map(result_dmrs, search_matching[nodeid], hierarchy=hierarchy) 260 | replace_dmrs[sub_mapping[nodeid]].after_map(result_dmrs, search_matching[nodeid]) 261 | replace_matching[sub_mapping[nodeid]] = search_matching[nodeid] 262 | elif search_matching[nodeid] is not None: 263 | result_dmrs.remove_node(search_matching[nodeid]) 264 | 265 | # add copies of the non-anchor nodes for the matched replace_dmrs 266 | for nodeid in replace_dmrs: 267 | if nodeid in replace_matching: 268 | continue 269 | node = copy.deepcopy(replace_dmrs[nodeid]) 270 | node.nodeid = result_dmrs.free_nodeid() 271 | result_dmrs.add_node(node) 272 | replace_matching[nodeid] = node.nodeid 273 | 274 | # set top/index if specified in replace_dmrs 275 | if replace_dmrs.top is not None: 276 | result_dmrs.top = result_dmrs[replace_matching[replace_dmrs.top.nodeid]] 277 | if replace_dmrs.index is not None: 278 | result_dmrs.index = result_dmrs[replace_matching[replace_dmrs.index.nodeid]] 279 | 280 | # remove all links in the matched search_dmrs 281 | links = [] 282 | matching_values = set(search_matching.values()) 283 | for link in result_dmrs.iter_links(): 284 | if link.start in matching_values and link.end in matching_values: 285 | links.append(link) 286 | result_dmrs.remove_links(links) 287 | 288 | # add all links for the matched replace_dmrs 289 | for link in replace_dmrs.iter_links(): 290 | link = Link(replace_matching[link.start], replace_matching[link.end], link.rargname, link.post) 291 | result_dmrs.add_link(link) 292 | 293 | # add/return result 294 | if not require_connected or result_dmrs.is_connected(): 295 | if all_matches and not iterative: 296 | result.append(result_dmrs) 297 | elif not all_matches: 298 | if copy_dmrs: 299 | return result_dmrs 300 | else: 301 | return True 302 | 303 | raise Exception('More than {} matches!'.format(max_matches)) 304 | -------------------------------------------------------------------------------- /pydmrs/mapping/paraphrase.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pydmrs.core import Dmrs, ListDmrs 3 | from pydmrs.mapping.mapping import dmrs_mapping 4 | from pydmrs.graphlang.graphlang import parse_graphlang 5 | 6 | 7 | def read_paraphrases_file(filename): 8 | """ 9 | """ 10 | paraphrases = [] 11 | file = open(filename, 'r') 12 | lines = iter(file) 13 | for line in lines: 14 | try: 15 | # equalities etc 16 | paraphrases.append((parse_graphlang(line), parse_graphlang(next(lines)))) 17 | except StopIteration: 18 | assert False, 'Invalid paraphrases file format.' 19 | try: 20 | assert not next(lines) 21 | except StopIteration: 22 | break 23 | return paraphrases 24 | 25 | 26 | def paraphrase(dmrs, paraphrases, hierarchy=None): 27 | """ 28 | """ 29 | assert isinstance(dmrs, Dmrs), 'Object in dmrs_iter is not a Dmrs.' 30 | for (search_dmrs, replace_dmrs) in paraphrases: 31 | paraphrased_dmrs = dmrs_mapping(dmrs, search_dmrs, replace_dmrs, hierarchy=hierarchy) 32 | if paraphrased_dmrs is None: 33 | break 34 | else: 35 | dmrs = paraphrased_dmrs 36 | return dmrs 37 | 38 | 39 | if __name__ == '__main__': 40 | assert len(sys.argv) == 2 and not sys.stdin.isatty(), 'Invalid arguments' 41 | paraphrases = read_paraphrases_file(sys.argv[1]) 42 | for line in sys.stdin: 43 | dmrs = ListDmrs.loads_xml(line[:-1]) 44 | sys.stdout.write(str(paraphrase(dmrs, paraphrases)) + '\n') 45 | -------------------------------------------------------------------------------- /pydmrs/matching/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delph-in/pydmrs/795b35dba4986fa9084eaa81fb16206cb131a752/pydmrs/matching/__init__.py -------------------------------------------------------------------------------- /pydmrs/matching/aligned_matching.py: -------------------------------------------------------------------------------- 1 | from pydmrs.core import SortDictDmrs, span_pred_key, abstractSortDictDmrs 2 | from pydmrs.matching.common import are_equal_links 3 | 4 | 5 | # ------------------------------------------------------------------------ 6 | def match_nodes(nodes1, nodes2, excluded=[]): 7 | """ 8 | :param nodes1: A list of Nodes from the DMRS to be matched, sorted by span_pred_key. 9 | :param nodes2: A list of Nodes from the DMRS against which we match, sorted by span_pred_key. 10 | :param excluded: A list of nodeids which should not be used for matching. 11 | 12 | :return: A list of lists of nodeid pairs. The first element in the pair is from small DMRS, the second from the 13 | larger one. The pairs are listed in reverse span_pred_key order of the corresponding nodes. Returns [] if no 14 | match found. 15 | """ 16 | if not nodes1 or not nodes2: 17 | return [] 18 | matches = [] 19 | earliest = len(nodes1) 20 | longest = 0 21 | for i, node2 in enumerate(nodes2): 22 | if len(nodes2) - i < longest: # Not enough nodes left to beat the current longest match. 23 | break 24 | if excluded and node2.nodeid in excluded: 25 | continue 26 | for j, node1 in enumerate(nodes1): 27 | if j > earliest: # To avoid repetition. 28 | break 29 | if node1 == node2: 30 | best_matches = match_nodes(nodes1[j + 1:], nodes2[i + 1:], excluded=excluded) 31 | if best_matches: 32 | for match in best_matches: 33 | match.append((node1.nodeid, node2.nodeid)) 34 | else: 35 | best_matches = [[(node1.nodeid, node2.nodeid)]] 36 | earliest = j 37 | longest = max(longest, len(best_matches[0])) 38 | matches.extend(best_matches) 39 | if matches: 40 | max_len = len(max(matches, key=len)) 41 | return [m for m in matches if len(m) == max_len] 42 | else: 43 | return [] 44 | 45 | 46 | def add_quantifier_matches(dmrs1, dmrs2, longest_matches): 47 | for m in longest_matches: 48 | q_pairs = [] 49 | for nodeid1, nodeid2 in m: 50 | try: 51 | q_link1 = dmrs1.get_in(nodeid1, rargname='RSTR', post='H').pop() 52 | q_link2 = dmrs2.get_in(nodeid2, rargname='RSTR', post='H').pop() 53 | except KeyError: 54 | continue 55 | if dmrs1[q_link1.start] == dmrs2[q_link2.start]: 56 | q_pairs.append((q_link1.start, q_link2.start)) 57 | m.extend(q_pairs) 58 | 59 | 60 | def get_compounds(dmrs, compound_preds): 61 | compounds = [] 62 | for node in dmrs.iter_nodes(): 63 | if str(node.pred) in compound_preds: 64 | arg1 = dmrs.get_out_nodes(node.nodeid, rargname='ARG1').pop().nodeid 65 | arg2 = dmrs.get_out_nodes(node.nodeid, rargname='ARG2').pop().nodeid 66 | compounds.append({"node": node, "args": (arg1, arg2)}) 67 | return compounds 68 | 69 | 70 | def add_compound_matches(small_dmrs, large_dmrs, longest_matches, compound_preds): 71 | small_compounds = get_compounds(small_dmrs, compound_preds) 72 | large_compounds = get_compounds(large_dmrs, compound_preds) 73 | 74 | for m in longest_matches: 75 | cmpd_pairs = [] 76 | for small_cmpd in small_compounds: 77 | query_arg1 = None 78 | query_arg2 = None 79 | for small, large in m: 80 | if small == small_cmpd['args'][0]: 81 | query_arg1 = large 82 | elif small == small_cmpd['args'][1]: 83 | query_arg2 = large 84 | if query_arg1 and query_arg2: 85 | break 86 | else: 87 | continue 88 | for large_cmpd in large_compounds: 89 | if (query_arg1, query_arg2) == large_cmpd['args']: 90 | if small_cmpd['node'] == large_cmpd['node']: 91 | cmpd_pairs.append((small_cmpd['node'].nodeid, large_cmpd['node'].nodeid)) 92 | m.extend(cmpd_pairs) 93 | 94 | 95 | def find_extra_surface_nodeids(nodeids, large_dmrs): 96 | """ Finds nodeids present in the aligned matched region of the large DMRS, 97 | but which have no equivalents in the small DMRS. 98 | 99 | :param nodeids Nodeids from the large DMRS which have equivalents in the small one, sorted by span_pred_key of 100 | their nodes. 101 | :param large_dmrs The large DMRS. 102 | 103 | :return A list of additional nodeids sharing the span with nodeids but without equivalents in the small DMRS. 104 | """ 105 | max_cto = large_dmrs[nodeids[-1]].cto 106 | extra_nodeids = [] 107 | reached_start = False 108 | reached_end = False 109 | for i, node in enumerate(large_dmrs.nodes): 110 | if node.nodeid == nodeids[0]: 111 | first_overlap_orderid = i 112 | min_cfrom = node.cfrom 113 | max_cto = max(max_cto, node.cto) 114 | while True and first_overlap_orderid > 0: 115 | prev_node = large_dmrs.nodes[first_overlap_orderid - 1] 116 | prev_cfrom = prev_node.cfrom 117 | if prev_cfrom == min_cfrom and prev_node.cto <= max_cto: 118 | first_overlap_orderid -= 1 119 | extra_nodeids.append(prev_node.nodeid) 120 | max_cto = max(max_cto, prev_node.cto) 121 | else: 122 | break 123 | reached_start = True 124 | elif not reached_start: 125 | continue 126 | elif reached_end and node.cfrom >= max_cto: 127 | break 128 | else: 129 | max_cto = max(max_cto, node.cto) 130 | if node.nodeid not in nodeids and node.nodeid not in extra_nodeids: 131 | extra_nodeids.append(node.nodeid) 132 | if node.nodeid == nodeids[-1]: 133 | reached_end = True 134 | 135 | return extra_nodeids 136 | 137 | 138 | def get_links(dmrs, nodeids): 139 | """ 140 | :param dmrs: A Dmrs object. 141 | :param nodeids: A list of nodeids. 142 | :return: A list of all links starting and ending on a node from nodeids. 143 | """ 144 | links = [] 145 | eq_links = set() 146 | for nodeid in nodeids: 147 | node_links = dmrs.get_out(nodeid) 148 | for link in node_links: 149 | if link.end in nodeids: 150 | links.append(link) 151 | node_links = dmrs.get_eq(nodeid) 152 | for link in node_links: 153 | if link not in eq_links: 154 | eq_links.add(link) 155 | links.extend(eq_links) 156 | return links 157 | 158 | 159 | def get_subgraph(dmrs, subgraph_nodeids): 160 | """ Returns a subgraph of dmrs containing only nodes with subgraph_nodeids and all the links between them. 161 | :param dmrs: A Dmrs object. 162 | :param subgraph_nodeids: A list of nodeids. 163 | :return A SortDictDmrs containing only nodes with subgraph_nodeids and links between them. 164 | """ 165 | nodes = [dmrs[nodeid] for nodeid in subgraph_nodeids] 166 | return SortDictDmrs(nodes, links=get_links(dmrs, subgraph_nodeids), node_key=span_pred_key) 167 | 168 | 169 | # ------------------------------------------------------------------------------- 170 | 171 | def get_link_diff(small_dmrs, matched_subgraph, matching_nodeids): 172 | """ 173 | :param small_dmrs A Dmrs which we're matching. 174 | :param matched_subgraph A Dmrs. A subgraph of the larger DMRS returned as a match for small_dmrs. 175 | :param matching_nodeids A list of pairs of nodeids. The first nodeid in each pair comes from small_dmrs, the second 176 | comes from the large dmrs. 177 | :return three list of links: 178 | 1) links present only in the small dmrs 179 | 2) links present only in the matched subgraph 180 | 3) common links. 181 | """ 182 | both = [] 183 | small_only = [] 184 | subgraph_only = [] 185 | checked_eq_links = set() 186 | for small_nodeid, subgraph_nodeid in matching_nodeids: 187 | if small_nodeid: 188 | small_links = small_dmrs.get_out(small_nodeid) | small_dmrs.get_eq(small_nodeid) 189 | subgraph_links = list(matched_subgraph.get_out(subgraph_nodeid)) 190 | links_flag = [False] * len(subgraph_links) 191 | for link1 in small_links: 192 | # Check if the EQ has been counted already. 193 | if not link1.rargname: 194 | if link1 in checked_eq_links: 195 | continue 196 | checked_eq_links.add(link1) 197 | match_found = False 198 | for link2 in subgraph_links: 199 | if are_equal_links(link1, link2, small_dmrs, matched_subgraph): 200 | both.append(link1) 201 | match_found = True 202 | links_flag[subgraph_links.index(link2)] = True 203 | break 204 | if not match_found: 205 | small_only.append(link1) 206 | for i in range(0, len(subgraph_links)): 207 | if not links_flag[i]: 208 | subgraph_only.append(subgraph_links[i]) 209 | else: 210 | subgraph_only.extend(matched_subgraph.get_out(subgraph_nodeid)) 211 | 212 | checked_eq_links = set() 213 | for nodeid in small_dmrs: 214 | if nodeid not in list(zip(*matching_nodeids))[0]: 215 | small_only.extend(small_dmrs.get_out(nodeid)) 216 | eq_links = small_dmrs.get_eq(nodeid) 217 | small_only.extend({link for link in eq_links if link not in checked_eq_links}) 218 | checked_eq_links.update(eq_links) 219 | 220 | return small_only, subgraph_only, both 221 | 222 | 223 | # ------------------------------------------------------------------------------ 224 | ## IMPORTANT ## 225 | def get_matching_nodeids(small_dmrs, large_dmrs, all_surface=False, large_excluded=None): 226 | """ Finds matching pairs of nodeids between small_dmrs and large_dmrs. Starts by matching all 227 | nodes but quantifiers, then matches quantifiers for nouns with matches. 228 | :param small_dmrs A DMRS object used as a match query, 229 | :param large_dmrs A DMRS object to be searched for a match. 230 | :param all_surface If true, include all nodes from the aligned surface region. 231 | If false, find only the nodes with equivalents in small_dmrs. 232 | :param large_excluded The nodeids from the large DMRS to be ignored during matching. 233 | 234 | :return A list of lists of matched nodeid pairs (small_dmrs nodeid, large_dmrs nodeid). 235 | A list of lists, in case more than one best match found. 236 | """ 237 | # Convert DMRSs to SortDictDmrs with span_pred_key node if needed. 238 | if not isinstance(small_dmrs, SortDictDmrs) or (small_dmrs.node_key != span_pred_key): 239 | small_dmrs = small_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key)) 240 | if not isinstance(large_dmrs, SortDictDmrs) or (large_dmrs.node_key != span_pred_key): 241 | large_dmrs = large_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key)) 242 | 243 | # Filter quantifiers. 244 | small_no_qs = [n for n in small_dmrs.nodes if not small_dmrs.is_quantifier(n.nodeid)] 245 | large_no_qs = [n for n in large_dmrs.nodes if not large_dmrs.is_quantifier(n.nodeid)] 246 | # Filter compound_name and compund predicates. 247 | filtered_pred = ['compound', 'compound_name'] 248 | filtered_small = [n for n in small_no_qs if str(n.pred) not in filtered_pred] 249 | filtered_large = [n for n in large_no_qs if str(n.pred) not in filtered_pred] 250 | 251 | longest_matches = match_nodes(filtered_small, filtered_large, 252 | excluded=large_excluded) # list of lists of nodeid pairs 253 | add_quantifier_matches(small_dmrs, large_dmrs, longest_matches) 254 | add_compound_matches(small_dmrs, large_dmrs, longest_matches, filtered_pred) 255 | max_len = len(max(longest_matches, key=len)) if longest_matches else 0 256 | longest_matches = [m for m in longest_matches if len(m) == max_len] 257 | # Returned in reverse span_pred_key order. 258 | all_matched_nodeids = [] 259 | for match in longest_matches: 260 | matched_large_nodeids = list(reversed((list(zip(*match))[1]))) # span_pred_key order 261 | 262 | if all_surface: 263 | extra_overlap_nodeids = find_extra_surface_nodeids(matched_large_nodeids, 264 | large_dmrs) 265 | match.extend([(None, nodeid) for nodeid in extra_overlap_nodeids]) 266 | all_matched_nodeids.append(match) 267 | 268 | return all_matched_nodeids 269 | 270 | 271 | def get_matched_subgraph(matching_nodeids, large_dmrs): 272 | """ 273 | :param matching_nodeids: A list of pairs of matches nodeids from the small and large dmrs. 274 | :param large_dmrs: A Dmrs. 275 | :return: A Dmrs. A subgraph of large_dmrs containing only nodes with nodeids in matching_nodeids. 276 | """ 277 | present_large_nodeids = list(zip(*matching_nodeids))[1] 278 | return get_subgraph(large_dmrs, present_large_nodeids) 279 | 280 | 281 | def get_best_subgraph(nodeid_matches, small_dmrs, large_dmrs): 282 | best_fscore = 0 283 | best_score = 0, 0, 0 284 | best_graphs = [] 285 | for match in nodeid_matches: 286 | subgraph = get_matched_subgraph(match, large_dmrs) 287 | score = get_score(small_dmrs, subgraph, match) 288 | fscore = get_fscore(*score) 289 | if fscore > best_fscore: 290 | best_graphs = [subgraph] 291 | best_fscore = fscore 292 | best_score = score 293 | elif fscore == best_fscore: 294 | best_graphs.append(subgraph) 295 | return best_graphs, best_score 296 | 297 | 298 | def get_score(small_dmrs, matched_subgraph, matching_nodeids): 299 | num_extra_nodes = len([pair for pair in matching_nodeids if pair[0] is None]) 300 | num_matched_nodes = len(matching_nodeids) - num_extra_nodes 301 | num_missing_nodes = len( 302 | [nodeid for nodeid in small_dmrs if nodeid not in list(zip(*matching_nodeids))[0]]) 303 | 304 | only_small_links, only_subgraph_links, shared_links = get_link_diff(small_dmrs, 305 | matched_subgraph, 306 | matching_nodeids) 307 | num_extra_links = len(only_subgraph_links) 308 | num_missing_links = len(only_small_links) 309 | num_shared_links = len(shared_links) 310 | 311 | num_correct = num_matched_nodes + num_shared_links 312 | num_matched = num_correct + num_extra_links + num_extra_nodes 313 | num_expected = num_correct + num_missing_links + num_missing_nodes 314 | 315 | return num_correct, num_matched, num_expected 316 | 317 | 318 | def get_fscore(num_correct, num_matched, num_expected): 319 | precision = num_correct / num_matched if num_matched > 0 else 0 320 | recall = num_correct / num_expected if num_expected > 0 else 0 321 | return 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0 # fscore 322 | -------------------------------------------------------------------------------- /pydmrs/matching/common.py: -------------------------------------------------------------------------------- 1 | def are_equal_nodes(n1, n2, underspecified=True): 2 | """Returns True if nodes n1 and n2 have the same predicate and sortinfo. If underspecified, 3 | allow underspecification.""" 4 | if underspecified: 5 | if n1.is_less_specific(n2) or n2.is_less_specific(n1): 6 | return True 7 | return n1.pred == n2.pred and n1.sortinfo == n2.sortinfo and n1.carg == n2.carg 8 | 9 | 10 | def are_equal_links(l1, l2, dmrs1, dmrs2, underspecified=True): 11 | """Returns True if links l1 and l2 have the same link label and their 12 | starting and ending nodes respectively satisfy are_equal_nodes.""" 13 | if l1.label == l2.label: 14 | if l1.rargname is None: 15 | if (are_equal_nodes(dmrs1[l1.start], dmrs2[l2.start], underspecified) and 16 | are_equal_nodes(dmrs1[l1.end], dmrs2[l2.end], underspecified)) or ( 17 | are_equal_nodes(dmrs1[l1.start], dmrs2[l2.end], underspecified) 18 | and are_equal_nodes(dmrs1[l1.end], 19 | dmrs2[l2.start], underspecified)): 20 | return True 21 | else: 22 | if (are_equal_nodes(dmrs1[l1.start], dmrs2[l2.start], underspecified) and 23 | are_equal_nodes(dmrs1[l1.end], dmrs2[l2.end], underspecified)): 24 | return True 25 | else: 26 | return False 27 | -------------------------------------------------------------------------------- /pydmrs/matching/exact_matching.py: -------------------------------------------------------------------------------- 1 | from pydmrs.core import Dmrs 2 | 3 | 4 | def dmrs_exact_matching(sub_dmrs, dmrs, optional_nodeids=(), equalities=(), hierarchy=None, match_top_index=True): 5 | """ 6 | Performs an exact DMRS (sub)graph matching of a (sub)graph against a containing graph. 7 | :param sub_dmrs DMRS (sub)graph to match. 8 | :param dmrs DMRS graph to match against. 9 | :param optional_nodeids 10 | :param equalities 11 | :param hierarchy An optional predicate hierarchy. 12 | :param match_top_index 13 | :return Iterator of dictionaries, mapping node ids of the matched (sub)graph to the corresponding matching node id in the containing graph. 14 | """ 15 | hierarchy = hierarchy or dict() 16 | 17 | if not isinstance(sub_dmrs, Dmrs) or not isinstance(dmrs, Dmrs): 18 | return 19 | matching = {} 20 | matching_values = set() 21 | matches = {} 22 | 23 | # find matchable nodes and add unambiguous matchings 24 | for sub_node in sub_dmrs.iter_nodes(): 25 | match = [node.nodeid for node in dmrs.iter_nodes() if sub_node == node or sub_node.is_less_specific(node, hierarchy=hierarchy)] 26 | if match: 27 | if sub_node.nodeid in optional_nodeids: 28 | match.append(None) 29 | if len(match) == 1: 30 | matching[sub_node.nodeid] = match[0] 31 | matching_values.add(match[0]) 32 | continue 33 | matches[sub_node.nodeid] = match 34 | elif sub_node.nodeid not in optional_nodeids: 35 | return 36 | 37 | # match index and top 38 | if match_top_index: 39 | # if sub_dmrs.top is None: 40 | # if dmrs.top is not None: 41 | # top = dmrs.top.nodeid 42 | # if top in matching.values(): 43 | # return 44 | # for sub_nodeid, match in matches.items(): 45 | # if top in match: 46 | # match.remove(top) 47 | # if not match and sub_nodeid not in optional_nodeids: 48 | # return 49 | # else: 50 | if sub_dmrs.top is not None: 51 | if dmrs.top is None: 52 | return 53 | sub_top = sub_dmrs.top.nodeid 54 | top = dmrs.top.nodeid 55 | if sub_top in matching: 56 | if matching[sub_top] != top: 57 | return 58 | else: 59 | if top in matches[sub_top]: 60 | matching[sub_top] = top 61 | matching_values.add(top) 62 | del matches[sub_top] 63 | else: 64 | return 65 | # if sub_dmrs.index is None: 66 | # if dmrs.index is not None: 67 | # index = dmrs.index.nodeid 68 | # if index in matching.values(): 69 | # return 70 | # for sub_nodeid, match in matches.items(): 71 | # if index in match: 72 | # match.remove(index) 73 | # if not match and sub_nodeid not in optional_nodeids: 74 | # return 75 | # else: 76 | if sub_dmrs.index is not None: 77 | if dmrs.index is None: 78 | return 79 | sub_index = sub_dmrs.index.nodeid 80 | index = dmrs.index.nodeid 81 | if sub_index in matching: 82 | if matching[sub_index] != index: 83 | return 84 | else: 85 | if index in matches[sub_index]: 86 | matching[sub_index] = index 87 | matching_values.add(index) 88 | del matches[sub_index] 89 | else: 90 | return 91 | 92 | change = True 93 | while change: 94 | change = False 95 | for sub_nodeid, match in list(matches.items()): 96 | for k, m in enumerate(match): 97 | if m in matching_values: 98 | del match[k] 99 | if len(match) == 1: 100 | m = matches.pop(sub_nodeid)[0] 101 | matching[sub_nodeid] = m 102 | matching_values.add(m) 103 | change = True 104 | 105 | # optimisation for nodes with uniquely matching neighbour nodes 106 | for sub_nodeid, match in list(matches.items()): 107 | neighbours = [] 108 | for n in sub_dmrs.get_neighbours(sub_nodeid, nodeids=True): 109 | if n not in matching: 110 | break 111 | neighbours.append(matching[n]) 112 | else: # all neighbours in sub_dmrs match uniquely 113 | candidate = None 114 | for nodeid in match: 115 | if nodeid is None: # not possible if an optional node is present 116 | candidate = None 117 | break 118 | if nodeid in matching_values or any(n not in dmrs.get_neighbours(nodeid, nodeids=True) for n in neighbours): # node is already assigned or has invalid neighbourhood 119 | continue 120 | if candidate is not None: # can't optimise in case of more than one candidate 121 | break 122 | candidate = nodeid 123 | else: # loop finished (no break), i.e. candidate is unique or non-existent 124 | if candidate is not None: 125 | matching[sub_nodeid] = candidate 126 | matching_values.add(candidate) 127 | del matches[sub_nodeid] 128 | 129 | matches_items = list(matches.items()) 130 | 131 | # does an exhaustive search over all the left-over matches in matches_items 132 | def _exhaustive_search(n): 133 | if not n: 134 | if _check_links(): 135 | yield matching.copy() 136 | return 137 | n -= 1 138 | sub_nodeid, match = matches_items[n] 139 | for nodeid in match: # assign and recursively continue for every possible match 140 | if nodeid is None or nodeid in matching_values: 141 | continue 142 | matching[sub_nodeid] = nodeid 143 | matching_values.add(nodeid) 144 | for result in _exhaustive_search(n): 145 | yield result 146 | matching_values.remove(nodeid) 147 | matching.pop(sub_nodeid, None) 148 | if match[-1] is None: # without assigning if optional node is present 149 | for result in _exhaustive_search(n): 150 | yield result 151 | 152 | # checks whether the links match within the current node matching 153 | def _check_links(): 154 | count = 0 155 | for l1 in dmrs.iter_links(): 156 | if l1.start not in matching_values or l1.end not in matching_values: 157 | continue 158 | for l2 in sub_dmrs.iter_links(): 159 | if (l2.rargname == '?' or l2.rargname == l1.rargname or (l1.rargname and l2.rargname == l1.rargname[:3] == 'ARG')) and (l2.post == '?' or l2.post == l1.post) and matching[l2.start] == l1.start and matching[l2.end] == l1.end: 160 | count += 1 161 | break 162 | # reversed directionality for None/EQ links which (so far) are undirected 163 | if l1.rargname is l2.rargname is None and l1.post == l2.post == 'EQ' and matching[l2.start] == l1.end and matching[l2.end] == l1.start: 164 | count += 1 165 | break 166 | else: 167 | return False 168 | return count == sub_dmrs.count_links() 169 | 170 | if isinstance(equalities, dict): 171 | equalities = tuple(equalities.values()) 172 | for result in _exhaustive_search(len(matches_items)): 173 | if all(retriever(result, dmrs) == equality[0](result, dmrs) for equality in equalities for retriever in equality): 174 | yield result 175 | -------------------------------------------------------------------------------- /pydmrs/matching/general_matching.py: -------------------------------------------------------------------------------- 1 | from itertools import product, chain 2 | 3 | from pydmrs.components import RealPred 4 | from pydmrs.core import DictDmrs 5 | from pydmrs.matching.common import are_equal_nodes, are_equal_links 6 | from pydmrs.matching.match_evaluation import get_fscore 7 | 8 | 9 | class Match(object): 10 | """ A mapping between two DMRS objects. 11 | The nodeid_pairs is a list of nodeid tuples (nodeid1, nodeid2), where 12 | nodeid1 and nodeid2 come from different DMRS. 13 | The link_pairs is the link equivalent of the nodeid_pairs. 14 | """ 15 | 16 | def __init__(self, nodeid_pairs=None, link_pairs=None): 17 | self.nodeid_pairs = nodeid_pairs 18 | self.link_pairs = link_pairs 19 | 20 | def __str__(self): 21 | return "Nodes:{}; Links:{}".format(self.nodeid_pairs, self.link_pairs) 22 | 23 | def __len__(self): 24 | return len(self.nodeid_pairs) + len(self.link_pairs) 25 | 26 | def add(self, match): 27 | """Combines self with match, resolving any conflicts in favour of self.""" 28 | if self.is_compatible(match): 29 | self.nodeid_pairs.extend(match.nodeid_pairs) 30 | self.link_pairs.extend(match.link_pairs) 31 | else: 32 | nodesA, nodesB = map(list, zip(*self.nodeid_pairs)) 33 | for node_pair in match.nodeid_pairs: 34 | if node_pair[0] not in nodesA and node_pair[1] not in nodesB: 35 | self.nodeid_pairs.append(node_pair) 36 | nodesA.append(node_pair[0]) 37 | nodesB.append(node_pair[1]) 38 | 39 | linksA, linksB = map(set, zip(*self.link_pairs)) 40 | for link1, link2 in match.link_pairs: 41 | if link1 not in linksA and link2 not in linksB: 42 | if link1.start in nodesA and link1.end in nodesA: 43 | if link2.start in nodesB and link2.end in nodesB: 44 | self.link_pairs.append((link1, link2)) 45 | 46 | def is_compatible(self, match2): 47 | """ Checks if two matches are possible simultaneously. Two matches are conflicting 48 | if they pair nodes differently, e.g. (10001, 10003) in self and 49 | (10001, 10005) in match2. 50 | :param match2 Another Match object. 51 | :return True/False 52 | """ 53 | if len(self) == 0 or len(match2) == 0: 54 | return True 55 | nodeA_set1, nodeA_set2 = map(set, zip(*self.nodeid_pairs)) 56 | nodeB_set1, nodeB_set2 = map(set, zip(*match2.nodeid_pairs)) 57 | if nodeA_set1.isdisjoint(nodeB_set1) and nodeA_set2.isdisjoint(nodeB_set2): 58 | return True 59 | else: 60 | return False 61 | 62 | def get_first(self, nodeid): 63 | for nodeid1, nodeid2 in self.nodeid_pairs: 64 | if nodeid == nodeid2: 65 | return nodeid1 66 | return None 67 | 68 | def get_second(self, nodeid): 69 | for nodeid1, nodeid2 in self.nodeid_pairs: 70 | if nodeid == nodeid1: 71 | return nodeid2 72 | return None 73 | 74 | 75 | # ------------------------------------------------------------------------------ 76 | def group_same_nodes(nodes): 77 | """ Groups nodeids of equivalent nodes into sublists, using are_equal_nodes 78 | as the equivalency criterion. 79 | 80 | :param nodes A list of nodes. 81 | :return A list of tuples (pred, id list) sorted by pred. The pred is 82 | the shared predicate of the group; the id_list is a list of 83 | nodeids of equivalent nodes. 84 | """ 85 | grouped_nodes = [] 86 | group_node_type = None 87 | current_group = [] 88 | sorted_nodes = sorted(nodes, key=lambda n: str(n.pred)) 89 | for node in sorted_nodes: 90 | if not group_node_type: 91 | group_node_type = node 92 | current_group.append(node.nodeid) 93 | elif are_equal_nodes(node, group_node_type, underspecified=False): 94 | current_group.append(node.nodeid) 95 | else: 96 | grouped_nodes.append((group_node_type.pred, current_group)) 97 | current_group = [node.nodeid] 98 | group_node_type = node 99 | grouped_nodes.append((group_node_type.pred, current_group)) 100 | return grouped_nodes 101 | 102 | 103 | def pair_same_node_groups(dmrs1, dmrs2, underspecified): 104 | """ Finds which nodes in dmrs1 are equivalent to which nodes in dmrs2. Allow the nodes in dmrs1 to be 105 | underspecified, but not the other way. 106 | :param dmrs1 A DMRS object. For matching, the small dmrs. 107 | :param dmrs2 A DMRS object. For matching, the large dmrs. 108 | :param underspecified: If True, the underspecified nodes in dmrs1 will be matched to more specific ones in 109 | dmrs2. 110 | 111 | :return A list of tuples (pred, nodes from dmrs1, nodes from dmrs2). All 112 | nodes in nodes from dmrs1 and nodes form dmrs2 are quivalent. 113 | The pred is their common predicate. The list of tuples is sorted 114 | by pred. 115 | """ 116 | grouped_nodes1 = group_same_nodes(dmrs1.nodes) 117 | grouped_nodes2 = group_same_nodes(dmrs2.nodes) 118 | grouped_nodes = [] 119 | 120 | for pred1, node_list1 in grouped_nodes1: 121 | paired_nodes2 = [] 122 | for pred2, node_list2 in grouped_nodes2: 123 | if dmrs1[node_list1[0]] == dmrs2[node_list2[0]]: 124 | paired_nodes2 = node_list2 125 | break 126 | elif underspecified and dmrs1[node_list1[0]].is_less_specific(dmrs2[node_list2[0]]): 127 | paired_nodes2.extend(node_list2) 128 | grouped_nodes.append((pred1, node_list1, paired_nodes2)) 129 | return grouped_nodes 130 | 131 | 132 | def extend_match(match, start_nodeids, dmrs1, dmrs2, underspecified=True): 133 | """ Finds a match between dmrs1 and dmrs2. 134 | :param match: A Match object to be extended. 135 | :param start_nodeids: A tuple of matching nodeids with which to start to match extension. 136 | :param dmrs1 A DMRS object. For matching, the small dmrs. 137 | :param dmrs2 A DMRS object. For matching, the large dmrs. 138 | :param underspecified: If True (default), treat underspecified nodes as equal. 139 | 140 | The two start nodes should be equivalent by are_equal_nodes criterion. 141 | 142 | The function finds any links shared by the two start nodes (equivalent 143 | according to are"equal_links) and follows them. The pairs of nodes at 144 | other end of the links are added to a queue. Then the function calls 145 | itself recursively with the queued pairs of nodes as the start nodes. 146 | The recursion stops when no shared links are found and the queue is empty. 147 | 148 | :return A Match composed of updated matched_nodes, matched_links. 149 | """ 150 | match.nodeid_pairs.append(start_nodeids) 151 | matched_first = set(x[0] for x in match.nodeid_pairs) 152 | if match.link_pairs: 153 | matched_links1, matched_links2 = tuple(set(x) for x in zip(*match.link_pairs)) 154 | else: 155 | matched_links1, matched_links2 = set(), set() 156 | node_queue = [] 157 | start_id1, start_id2 = start_nodeids 158 | links1 = dmrs1.get_out(start_id1) 159 | links1.update(dmrs1.get_in(start_id1)) 160 | links1.update(dmrs1.get_eq(start_id1)) 161 | links2 = dmrs2.get_out(start_id2) 162 | links2.update(dmrs2.get_in(start_id2)) 163 | links2.update(dmrs2.get_eq(start_id2)) 164 | for link1 in links1: 165 | if link1 not in matched_links1: 166 | for link2 in links2: 167 | if link2 not in matched_links2: 168 | if are_equal_links(link1, link2, dmrs1, dmrs2): 169 | if link1.start in matched_first and match.get_second(link1.start) != link2.start: 170 | continue 171 | if link1.end in matched_first and match.get_second(link1.end) != link2.end: 172 | continue 173 | match.link_pairs.append((link1, link2)) 174 | matched_links1.add(link1) 175 | matched_links2.add(link2) 176 | paired1 = link1.start if link1.end == start_id1 else link1.end 177 | paired2 = link2.start if link2.end == start_id2 else link2.end 178 | node_queue.append((paired1, paired2)) 179 | break 180 | 181 | for nodeid1, nodeid2 in node_queue: 182 | if (nodeid1, nodeid2) not in match.nodeid_pairs and are_equal_nodes(dmrs1[nodeid1], dmrs2[nodeid2], 183 | underspecified): 184 | extend_match(match, (nodeid1, nodeid2), dmrs1, dmrs2, underspecified) 185 | 186 | 187 | def find_all_matches(dmrs1, dmrs2, underspecified=False): 188 | """ Finds all regions with potential matches between two DMRS graphs. 189 | :param dmrs1 A DMRS object. For matching, the small dmrs. 190 | :param dmrs2 A DMRS object. For matching, the large dmrs. 191 | :param underspecified: If True, the underspecified nodes in dmrs1 will be matched to more specific ones in 192 | dmrs2. 193 | 194 | The function initiates a extend_match top call and repeats it until all 195 | possible pairings are explored. GPreds and quantifiers 'a' and 'the' 196 | are not allowed as the start ndoes of extend_match to narrow down the search 197 | space. 198 | 199 | :return A list of Match objects where pairs come from (dmrs1, dmrs2). 200 | """ 201 | node_pairings = pair_same_node_groups(dmrs1, dmrs2, underspecified) 202 | matches = [] 203 | checked_node_pairs = [] 204 | 205 | # Sort pairs so that the ones with fewer matching combination are considered first. 206 | # Exclude GPreds and some quantifiers from the pool of start nodes. 207 | filter_func = lambda pairing: isinstance(pairing[0], RealPred) and pairing[0].lemma not in ['a', 208 | 'the'] 209 | filtered_pairings = filter(filter_func, node_pairings) 210 | sorted_pairings = sorted(filtered_pairings, 211 | key=lambda pairing: len(pairing[1]) * len(pairing[2])) 212 | 213 | if not sorted_pairings: 214 | sorted_pairings = node_pairings 215 | for pred, group1, group2 in sorted_pairings: 216 | all_pairs = product(group1, group2) 217 | for pair in all_pairs: 218 | if pair not in checked_node_pairs and are_equal_nodes(dmrs1[pair[0]], dmrs2[pair[1]], 219 | underspecified=underspecified): 220 | match = Match([], []) 221 | extend_match(match, (pair[0], pair[1]), dmrs1, dmrs2, underspecified) 222 | checked_node_pairs.extend(match.nodeid_pairs) 223 | matches.append(match) 224 | return matches # (matched_nodes, matched_links) 225 | 226 | 227 | def group_compatible_matches(matches): 228 | """ Groups matches into compatible sets of indices of non-conflicting matches. 229 | Indices are given by the positions in the matches list. 230 | :param matches A list of Matches. 231 | 232 | :return A list of sets of integers. Each set is unique and contains matches indices 233 | of compatible Matches. 234 | """ 235 | are_all_clashes = True 236 | clash_pairs = [] 237 | for i in range(len(matches)): 238 | for j in range(i + 1, len(matches)): 239 | if i != j: 240 | if matches[i].is_compatible(matches[j]): 241 | are_all_clashes = False 242 | else: 243 | clash_pairs.append((i, j)) 244 | clash_pairs.append((j, i)) 245 | 246 | combinations = [{i} for i in range(len(matches))] 247 | if are_all_clashes: 248 | return combinations 249 | 250 | for i in range(len(matches)): 251 | for comb in combinations: 252 | if i not in comb: 253 | if comb.union({i}) in combinations: 254 | combinations.remove(comb) 255 | break 256 | clash = False 257 | for match_id in comb: 258 | if (i, match_id) in clash_pairs: 259 | clash = True 260 | break 261 | if not clash: 262 | comb.add(i) 263 | return combinations # list of sets 264 | 265 | 266 | def find_biggest_disjoint_matches(matches): 267 | """ Finds collections of compatible matches which maximize the number of 268 | elements matches. Returns a list in case more than one combination scores 269 | the highest. 270 | :param matches A list of Matches. 271 | :return A list of tuples (group, Match, where group is a set of matches 272 | indices (see group_compatible_matches) and the Match combines 273 | all the Matches in the group. 274 | """ 275 | compatible_groups = group_compatible_matches(matches) 276 | best_score = 0 277 | best_groups = None 278 | for group in compatible_groups: 279 | group_score = sum(len(matches[i]) for i in group) 280 | if group_score > best_score: 281 | best_score = group_score 282 | best_groups = [group] 283 | elif group_score == best_score: 284 | best_groups.append(group) 285 | 286 | full_matches = [] 287 | for group in best_groups: 288 | nodes = list(chain(*[matches[i].nodeid_pairs for i in group])) 289 | links = list(chain(*[matches[i].link_pairs for i in group])) 290 | full_matches.append((group, Match(nodes, links))) 291 | return full_matches 292 | 293 | 294 | # -------------------------------------------------------------------------------\ 295 | # IMPORTANT 296 | 297 | def find_best_matches(small_dmrs, large_dmrs, exact=False, underspecified=False): 298 | """ Finds the best matches between two DMRS (in case more the one reached 299 | the same score). If disconnected matches found, it finds their optimal combination. 300 | :param small_dmrs A DMRS object. 301 | :param large_dmrs A DMRS object. 302 | :param exact: If True, only look for exact perfect matches. 303 | :param underspecified: If True, the underspecified nodes in small_dmrs will be matched to more specific ones in 304 | large_dmrs. 305 | :return A list of Matches. 306 | """ 307 | matches = find_all_matches(small_dmrs, large_dmrs, underspecified) 308 | if not matches: 309 | return None 310 | else: 311 | if exact: 312 | return [m for m in matches if get_fscore(m, small_dmrs) == 1] 313 | if len(matches) == 1: 314 | return matches 315 | best_combinations = [] 316 | indexed_best_combined_matches = find_biggest_disjoint_matches(matches) 317 | for index, match in indexed_best_combined_matches: 318 | leftovers = [matches[i] for i in range(len(matches)) if i not in index] 319 | for extra_match in leftovers: 320 | match.add(extra_match) 321 | best_combinations.append(match) 322 | return best_combinations 323 | 324 | 325 | def get_matched_subgraph(large_dmrs, match): 326 | """ Returns the subgraph of large_dmrs described by match. 327 | :param large_dmrs A DMRS object in which the match was found. 328 | :param match A Match object. 329 | 330 | :return A DMRS object containing only the matched elements from large_dmrs. 331 | The graph can be disconnected. 332 | """ 333 | links = list(zip(*match.link_pairs))[1] 334 | nodeids = list(zip(*match.nodeid_pairs))[1] 335 | nodes = [large_dmrs[nodeid] for nodeid in nodeids] 336 | return DictDmrs(nodes, links) 337 | -------------------------------------------------------------------------------- /pydmrs/matching/match_evaluation.py: -------------------------------------------------------------------------------- 1 | from pydmrs._exceptions import PydmrsTypeError 2 | 3 | 4 | def get_recall(match, dmrs): 5 | from pydmrs.matching.general_matching import Match 6 | if isinstance(match, list) and isinstance(match[0], Match): 7 | raise PydmrsTypeError("More than one match passed in an argument.") 8 | return len(match) / (len(dmrs.nodes) + len(dmrs.links)) 9 | 10 | 11 | def get_fscore(match, dmrs): 12 | # Precision always 1.0. for this algorithm. 13 | recall = get_recall(match, dmrs) 14 | return 2 * recall / (1.0 + recall) 15 | 16 | 17 | def get_missing_elements(match, dmrs): 18 | """ Returns a list of elements of dmrs for which no match was found.: 19 | :param match A Match object. 20 | :param dmrs A DMRS object for which the match was searched. 21 | :return A list of nodeids and links. 22 | """ 23 | matched_nodeids = list(zip(*match.nodeid_pairs))[1] 24 | matched_links = list(zip(*match.link_pairs))[1] 25 | not_matched = [] 26 | for nodeid in dmrs: 27 | if nodeid not in matched_nodeids: 28 | not_matched.append(nodeid) 29 | for link in dmrs.iter_links(): 30 | if link not in matched_links: 31 | not_matched.append(link) 32 | return not_matched 33 | -------------------------------------------------------------------------------- /pydmrs/matching/query.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pydmrs.core import Dmrs, ListDmrs 3 | from pydmrs.matching.exact_matching import dmrs_exact_matching 4 | from pydmrs.graphlang.graphlang import parse_graphlang 5 | 6 | 7 | # not all_matches then None if no match 8 | def dmrs_query(dmrs_iter, search_dmrs_graphlang, results_as_dict=False, results_per_dmrs=False): 9 | """ 10 | Queries DMRS graphs for an underspecified (sub)graph pattern and returns the values of named wildcards (of the form "?[Identifier]") as they are specified in the queried graph. 11 | :param dmrs_iter An iterator of DMRS graphs to query. 12 | :param search_dmrs_graphlang The query DMRS (sub)graph, given as a GraphLang string. 13 | :param results_as_dict True if a query result should be a dictionary, mapping identifiers to values. 14 | :param results_per_dmrs True if a (possibly empty) list per DMRS should be returned. 15 | :return Iterator of dicts containing the matching node ids. 16 | """ 17 | queries = {} 18 | search_dmrs = parse_graphlang(search_dmrs_graphlang, queries=queries) 19 | queries = [(key, queries[key]) for key in sorted(queries)] 20 | for dmrs in dmrs_iter: 21 | assert isinstance(dmrs, Dmrs), 'Object in dmrs_iter is not a Dmrs.' 22 | # perform an exact matching of search_dmrs against dmrs 23 | matchings = dmrs_exact_matching(search_dmrs, dmrs) 24 | if results_per_dmrs: 25 | results = [] 26 | for matching in matchings: 27 | # extract matched values 28 | if results_as_dict: 29 | result = {key: query(matching, dmrs) for key, query in queries} 30 | else: 31 | result = tuple(query(matching, dmrs) for _, query in queries) 32 | if results_per_dmrs: 33 | results.append(result) 34 | else: 35 | yield result 36 | if results_per_dmrs: 37 | yield results 38 | 39 | 40 | if __name__ == '__main__': 41 | assert len(sys.argv) == 2 and not sys.stdin.isatty(), 'Invalid arguments' 42 | search_dmrs = sys.argv[1] 43 | dmrs_iter = (ListDmrs.loads_xml(line[:-1]) for line in sys.stdin) 44 | sys.stdout.write(str(next(dmrs_query(dmrs_iter, search_dmrs, results_as_dict=True))) + '\n') 45 | -------------------------------------------------------------------------------- /pydmrs/pydelphin_interface.py: -------------------------------------------------------------------------------- 1 | 2 | from delphin import ace 3 | from delphin.mrs import from_dmrs 4 | from delphin.dmrs import from_mrs 5 | from delphin.codecs import simplemrs, dmrx 6 | 7 | from pydmrs.core import ListDmrs 8 | from pydmrs.utils import load_config, get_config_option 9 | 10 | DEFAULT_CONFIG_FILE = 'default_interface.conf' 11 | 12 | config = load_config(DEFAULT_CONFIG_FILE) 13 | DEFAULT_ERG_FILE = get_config_option(config, 'Grammar', 'ERG') 14 | 15 | 16 | def parse(sentence, cls=ListDmrs, erg_file=DEFAULT_ERG_FILE): 17 | results = [] 18 | for result in ace.parse(erg_file, sentence).results(): # cmdargs=['-r', 'root_informal'] 19 | mrs = result.mrs() 20 | _dmrs = from_mrs(mrs) 21 | dmrs_xml = dmrx.encode(_dmrs) 22 | dmrs = cls.loads_xml(dmrs_xml) 23 | results.append(dmrs) 24 | return results 25 | 26 | 27 | def generate(dmrs, erg_file=DEFAULT_ERG_FILE): 28 | dmrs_xml = dmrs.dumps_xml(encoding='utf-8') 29 | _dmrs = dmrx.decode(dmrs_xml) 30 | _mrs = from_dmrs(_dmrs) 31 | mrs = simplemrs.encode(_mrs) 32 | results = [] 33 | for result in ace.generate(erg_file, mrs).results(): 34 | sentence = result['surface'] 35 | results.append(sentence) 36 | return results 37 | -------------------------------------------------------------------------------- /pydmrs/rooted.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | from operator import attrgetter 3 | from collections import deque 4 | 5 | from pydmrs._exceptions import PydmrsError, PydmrsValueError 6 | from pydmrs.core import Link, Pred, Dmrs, ListDmrs, DictDmrs 7 | from pydmrs.utils import load_config, get_config_option 8 | 9 | DEFAULT_CONFIG_FILE = 'default_simplification.conf' 10 | 11 | config = load_config(DEFAULT_CONFIG_FILE) 12 | REVERSE_ARG1 = frozenset(Pred.from_string(x) for x in get_config_option(config, 'Rooted Conversion', 'reverse_arg1', opt_type=list)) 13 | 14 | 15 | def reverse_link(dmrs, link): 16 | """ 17 | Reverse a Link in a Dmrs graph. 18 | The start and end nodeids are switched, 19 | and "_REV" is appended to the rargname (or removed if already present) 20 | """ 21 | if link.rargname[-4:] == "_REV": 22 | new_rargname = link.rargname[:-4] 23 | else: 24 | new_rargname = link.rargname + "_REV" 25 | new_link = Link(link.end, link.start, new_rargname, link.post) 26 | dmrs.remove_link(link) 27 | dmrs.add_link(new_link) 28 | return new_link 29 | 30 | def is_root(dmrs, nodeid): 31 | """ 32 | Check if a node has no incoming links 33 | """ 34 | return not any(dmrs.get_in(nodeid, itr=True)) 35 | 36 | def is_leaf(dmrs, nodeid): 37 | """ 38 | Check if a node has no outgoing links 39 | """ 40 | return not any(dmrs.get_out(nodeid, itr=True)) 41 | 42 | def is_singleton(dmrs, nodeid): 43 | """ 44 | Check if a node has no links 45 | """ 46 | return not any(dmrs.get_links(nodeid, itr=True)) 47 | 48 | def iter_roots(dmrs): 49 | """ 50 | Find all nodes with no incoming links 51 | """ 52 | for n in dmrs.iter_nodes(): 53 | if is_root(dmrs, n.nodeid): 54 | yield n 55 | 56 | def iter_leaves(dmrs): 57 | """ 58 | Find all nodes with no outgoing links 59 | """ 60 | for n in dmrs.iter_nodes(): 61 | if is_leaf(dmrs, n.nodeid): 62 | yield n 63 | 64 | def is_rooted(dmrs, check_connected=True): 65 | """ 66 | Check if a dmrs has a single root 67 | """ 68 | if check_connected and not dmrs.is_connected(): 69 | return False 70 | return any(iter_roots(dmrs)) 71 | 72 | def is_acyclic(dmrs): 73 | """ 74 | Check if the graph is acyclic 75 | """ 76 | return not find_cycle(dmrs) 77 | 78 | def find_cycle(dmrs): 79 | """ 80 | If there is a cycle, return the nodeids in the largest subgraph with no roots or leaves. 81 | If there is no cycle, return False 82 | """ 83 | # There are no cycles iff iteratively removing all leaves leaves nothing 84 | trim_leaves = trimmable(dmrs, leaves=True) 85 | if len(trim_leaves) == len(dmrs): 86 | return False 87 | 88 | # If there is a cycle, do the same with roots 89 | trim_roots = trimmable(dmrs, leaves=False) 90 | return {n.nodeid for n in dmrs.iter_nodes()} - trim_leaves - trim_roots 91 | 92 | 93 | def trimmable(dmrs, leaves=True): 94 | """ 95 | Return the nodeids that can be removed by recursively trimming 96 | If leaves is True (by default), trim leaves; if False, trim roots 97 | """ 98 | if leaves: 99 | initial = iter_leaves 100 | forward = dmrs.get_in_nodes 101 | back = dmrs.get_out_nodes 102 | else: 103 | initial = iter_roots 104 | forward = dmrs.get_out_nodes 105 | back = dmrs.get_in_nodes 106 | 107 | # Iteratively remove all leaves from the graph 108 | discard = {n.nodeid for n in initial(dmrs)} 109 | parents = {p for leaf in discard \ 110 | for p in forward(leaf, nodeids=True, itr=True)} 111 | n = True 112 | while n: # Keep removing leaves until we can't remove any more 113 | n = 0 # Count how many leaves we can remove in this pass 114 | next_parents = set() # Parents for the next iteration 115 | for mother in parents: 116 | if back(mother, nodeids=True) - discard: # Has non-leaf children 117 | next_parents.add(mother) 118 | else: 119 | n += 1 120 | discard.add(mother) 121 | next_parents.update(forward(mother, nodeids=True, itr=True)) 122 | parents = next_parents 123 | 124 | return discard 125 | 126 | def connected_pair(dmrs, first_id, second_id): 127 | """ 128 | Check if a pair of nodes are connected to each other 129 | """ 130 | cover = set() # Nodes reachable from the first node 131 | queue = {first_id} # Queue of nodes to explore 132 | while queue: 133 | new = queue.pop() 134 | cover.add(new) 135 | for adjacent in dmrs.get_neighbours(new, nodeids=True, itr=True): 136 | if adjacent == second_id: 137 | return True 138 | elif adjacent not in cover: 139 | queue.add(adjacent) 140 | return False 141 | 142 | def components(dmrs): 143 | """ 144 | Find out how many connected components are in the graph 145 | """ 146 | comps = 0 # Number of connected components 147 | nodeids = {n.nodeid for n in dmrs.iter_nodes()} 148 | queue = copy(nodeids) # Queue of nodes to explore 149 | while queue: 150 | comps += 1 151 | queue = dmrs.disconnected_nodeids(removed_nodeids=(nodeids - queue)) 152 | return comps 153 | 154 | def iter_bottom_up(dmrs, check_acyclic=True, node_key=None): 155 | """ 156 | Iterate through the graph bottom up, 157 | i.e. nodes are only returned once all their children have been. 158 | By default, raises an error if the graph has cycles. 159 | By default, nodes are sorted by nodeid (or for SortDictDmrs, by node_key) 160 | """ 161 | # Check if the graph is acyclic 162 | if check_acyclic and not is_acyclic(dmrs): 163 | raise PydmrsError 164 | 165 | # Choose how to sort nodes 166 | if node_key is None: 167 | if hasattr(dmrs, 'node_key'): 168 | node_key = dmrs.node_key 169 | else: 170 | node_key = attrgetter('nodeid') 171 | 172 | returned = set() # Nodeids that have already been yielded 173 | queue = deque(sorted(iter_leaves(dmrs), key=node_key)) # Nodes to be considered next 174 | while queue: 175 | new = queue.popleft() 176 | if dmrs.get_out_nodes(new.nodeid, nodeids=True) - returned: # If the node has children yet to be returned 177 | queue.append(new) # Put back on the queue 178 | else: 179 | returned.add(new.nodeid) 180 | for parent in sorted(dmrs.get_in_nodes(new.nodeid, itr=True), key=node_key): 181 | if parent.nodeid not in returned and parent not in queue: 182 | queue.append(parent) 183 | yield new 184 | 185 | 186 | def make_rooted_local(dmrs, reverse_arg1=REVERSE_ARG1): 187 | """ 188 | Attempt to convert a DMRS graph to a rooted graph, 189 | by reversing links based on local properties. 190 | May leave cycles. 191 | """ 192 | # List of links to reverse (to avoid reversing back) 193 | to_reverse = set() 194 | 195 | # Reverse ARG1 links for particular predicates 196 | for node in dmrs.iter_nodes(): 197 | if node.pred in reverse_arg1: 198 | arg1 = dmrs.get_out(node.nodeid, rargname='ARG1') 199 | if len(arg1) > 1: 200 | raise PydmrsError('Multiple ARG1s') 201 | to_reverse.update(arg1) 202 | 203 | # Reverse modifiers (EQ links) 204 | to_reverse.update(dmrs.get_label(post='EQ')) 205 | 206 | # Reverse quantifiers 207 | to_reverse.update(dmrs.get_label(rargname='RSTR')) 208 | 209 | # Reverse the links! 210 | for link in to_reverse: 211 | reverse_link(dmrs, link) 212 | 213 | return dmrs 214 | 215 | def make_rooted_global(dmrs, root=None): 216 | """ 217 | Convert a DMRS graph to a rooted graph, 218 | by fixing one node to be the root. 219 | If no nodeid is given, defaults to top (and then index) 220 | May leave cycles. 221 | """ 222 | # Decide on the root 223 | if root is None: 224 | if dmrs.top: 225 | root = dmrs.top.nodeid 226 | elif dmrs.index: 227 | root = dmrs.index.nodeid 228 | else: 229 | raise PydmrsError('No root nodeid given, no top, and no index') 230 | 231 | previous = set() 232 | layer = {root} 233 | while layer: 234 | children = set() 235 | for nid in layer: 236 | for link in dmrs.get_in(nid): 237 | if link.start not in layer and link.start not in previous: 238 | reverse_link(dmrs, link) 239 | children.update(dmrs.get_out_nodes(nid, nodeids=True) - layer) 240 | previous = layer 241 | layer = children 242 | 243 | return dmrs 244 | 245 | def make_rooted_acyclic(dmrs, reverse_arg1=REVERSE_ARG1, root=None): 246 | """ 247 | Make a DMRS rooted and acyclic, first trying local changes, and then global changes. 248 | May leave cycles. 249 | """ 250 | if not dmrs.is_connected(): 251 | raise PydmrsValueError('DMRS is not connected') 252 | 253 | make_rooted_local(dmrs, reverse_arg1) 254 | 255 | if not (is_acyclic(dmrs) and is_rooted(dmrs, check_connected=False)): 256 | make_rooted_global(dmrs, root) 257 | 258 | if not (is_acyclic(dmrs) and is_rooted(dmrs, check_connected=False)): 259 | raise PydmrsError('Conversion to a rooted acyclic graph failed') 260 | 261 | 262 | class RootedMixin(Dmrs): 263 | """ 264 | Allows a Dmrs class access to the above functions as class methods 265 | """ 266 | for name, object in copy(globals()).items(): 267 | try: 268 | if object.__module__ == __name__: 269 | locals()[name] = object 270 | except AttributeError: 271 | continue 272 | 273 | class DictRootDmrs(RootedMixin, DictDmrs): 274 | pass 275 | class ListRootDmrs(RootedMixin, ListDmrs): 276 | pass 277 | -------------------------------------------------------------------------------- /pydmrs/serial.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | 3 | from pydmrs.core import Link, ListDmrs, Node 4 | from pydmrs._exceptions import PydmrsValueError 5 | 6 | 7 | def loads_xml(bytestring, encoding=None, cls=ListDmrs, convert_legacy_prontype=True, **kwargs): 8 | """ 9 | Currently processes "..." 10 | To be updated for "..."... 11 | Expects a bytestring; to load from a string instead, specify encoding 12 | Produces a ListDmrs by default; for a different type, specify cls 13 | """ 14 | if encoding: 15 | bytestring = bytestring.encode(encoding) 16 | xml = ET.XML(bytestring) 17 | 18 | dmrs = cls(**kwargs) 19 | 20 | dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None 21 | dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None 22 | dmrs.surface = xml.get('surface') 23 | dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None 24 | # top may be set as a graph attribute or as a link (see below) 25 | top_id = int(xml.get('top')) if 'top' in xml.attrib else None 26 | index_id = int(xml.get('index')) if 'index' in xml.attrib else None 27 | 28 | for elem in xml: 29 | if elem.tag == 'node': 30 | node = Node.from_xml(elem, convert_legacy_prontype) 31 | dmrs.add_node(node) 32 | 33 | elif elem.tag == 'link': 34 | link = Link.from_xml(elem) 35 | if link.start == 0: 36 | # this would overwrite any graph-level top attribute 37 | # (see above), but let's assume we won't encounter 38 | # both in the same graph 39 | top_id = link.end 40 | else: 41 | dmrs.add_link(link) 42 | else: 43 | raise PydmrsValueError(elem.tag) 44 | 45 | if top_id: 46 | dmrs.top = dmrs[top_id] 47 | if index_id: 48 | dmrs.index = dmrs[index_id] 49 | 50 | return dmrs 51 | 52 | 53 | def load_xml(filehandle, cls=ListDmrs, **kwargs): 54 | """ 55 | Load a DMRS from a file 56 | NB: read file as bytes! 57 | Produces a ListDmrs by default; for a different type, specify cls 58 | """ 59 | return cls.loads(filehandle.read(), cls=cls, **kwargs) 60 | 61 | 62 | def dumps_xml(dmrs, encoding=None): 63 | """ 64 | Currently creates "..." 65 | To be updated for "..."... 66 | Returns a bytestring; to return a string instead, specify encoding 67 | """ 68 | xdmrs = ET.Element('dmrs') 69 | if dmrs.index is not None: 70 | xdmrs.set('index', str(dmrs.index.nodeid)) 71 | if dmrs.cfrom is not None and dmrs.cto is not None: 72 | xdmrs.set('cfrom', str(dmrs.cfrom)) 73 | xdmrs.set('cto', str(dmrs.cto)) 74 | for nodeid in sorted(dmrs): 75 | node = dmrs[nodeid] 76 | xnode = node.to_xml() 77 | xdmrs.append(xnode) 78 | if dmrs.top is not None: 79 | xlink = ET.SubElement(xdmrs, 'link') 80 | xlink.set('from', '0') 81 | xlink.set('to', str(dmrs.top.nodeid)) 82 | xrargname = ET.SubElement(xlink, 'rargname') 83 | xpost = ET.SubElement(xlink, 'post') 84 | xpost.text = 'H' 85 | for link in dmrs.iter_links(): 86 | xlink = link.to_xml() 87 | xdmrs.append(xlink) 88 | bytestring = ET.tostring(xdmrs) 89 | if encoding: 90 | return bytestring.decode(encoding) 91 | return bytestring 92 | 93 | 94 | def dump_xml(filehandle, dmrs): 95 | """ 96 | Dump a DMRS to a file 97 | NB: write as a bytestring! 98 | """ 99 | filehandle.write(dumps_xml(dmrs)) 100 | 101 | 102 | def visualise(dmrs, format): 103 | """ 104 | Returns the bytestring of the chosen visualisation representation. 105 | Supported formats: dot 106 | """ 107 | if format == 'dot': 108 | dot_strs = [] 109 | dot_strs.append('digraph g {\n') 110 | if dmrs.top is not None: 111 | dot_strs.append('NodeTop [label="top",style=bold];\n') 112 | dot_strs.append('node[shape=box];\n') 113 | for nodeid in dmrs: 114 | node = dmrs[nodeid] 115 | dot_strs.append('Node{} [label=<{}{}{}>];\n'.format(str(nodeid).replace('-', 'M'), node.pred, '("{}")'.format(node.carg) if node.carg else '', node.sortinfo)) 116 | dot_strs.append('edge[fontsize=10];\n') 117 | if dmrs.top is not None: 118 | dot_strs.append('NodeTop -> Node{} [style=dotted];\n'.format(str(dmrs.top.nodeid).replace('-', 'M'))) 119 | for link in dmrs.links: 120 | dot_strs.append('Node{} -> Node{} [label="{}"];\n'.format(str(link.start).replace('-', 'M'), str(link.end).replace('-', 'M'), link.labelstring)) 121 | dot_strs.append('}\n') 122 | dot_str = ''.join(dot_strs) 123 | return dot_str.encode() 124 | else: 125 | raise PydmrsValueError('Visualisation format not supported') 126 | -------------------------------------------------------------------------------- /pydmrs/simplification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delph-in/pydmrs/795b35dba4986fa9084eaa81fb16206cb131a752/pydmrs/simplification/__init__.py -------------------------------------------------------------------------------- /pydmrs/simplification/gpred_filtering.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pydmrs.components import GPred 3 | from pydmrs.serial import loads_xml, dumps_xml 4 | from pydmrs.utils import get_config_option, load_config, split_dmrs_string 5 | 6 | DEFAULT_CONFIG_FILE = 'default_simplification.conf' 7 | 8 | # If run from the command line, load the specified file 9 | # Otherwise, load the default file 10 | 11 | if __name__ == '__main__': 12 | parser = argparse.ArgumentParser(description='DMRS simplification tool') 13 | parser.add_argument('-c', '--config', default=None, 14 | help='Path to simplifaction configuration file. By default, configuration in __config__/default_simplification.conf is used.') 15 | parser.add_argument('input_dmrs', help='Specify input DMRS file') 16 | parser.add_argument('output_dmrs', help='Specify output dmrs file.') 17 | args = parser.parse_args() 18 | if args.config is not None: # Load the given file 19 | config = load_config(args.config, default=False) 20 | else: 21 | config = load_config(DEFAULT_CONFIG_FILE) 22 | else: 23 | config = load_config(DEFAULT_CONFIG_FILE) 24 | 25 | DEFAULT_FILTER = frozenset(GPred.from_string(x) for x in get_config_option(config, 'General Predicate Filtering', 'filter', opt_type=list)) 26 | DEFAULT_ALLOW_DISC = get_config_option(config, 'General Predicate Filtering', 'allow_disconnected_dmrs') 27 | 28 | def gpred_filtering(dmrs, gpred_filter=DEFAULT_FILTER, allow_disconnected_dmrs=DEFAULT_ALLOW_DISC): 29 | """ 30 | Remove general predicate nodes on the filter list from the DMRS. 31 | :param dmrs_xml: Input DMRS object 32 | :param gpred_filter: A list of general predicates to filter (as strings) 33 | :param allow_disconnected_dmrs: Remove gpred nodes even if their removal would result in a disconnected DMRS. 34 | If DMRS was already disconnected, gpred nodes are removed regardless. 35 | :return: Output DMRS object 36 | """ 37 | 38 | filterable_nodeids = set() 39 | 40 | # Find general predicate nodes to filter 41 | for node in dmrs.iter_nodes(): 42 | if node.is_gpred_node and node.pred in gpred_filter: 43 | filterable_nodeids.add(node.nodeid) 44 | 45 | test_connectedness = not allow_disconnected_dmrs and dmrs.is_connected(ignored_nodeids=filterable_nodeids) 46 | 47 | # If DMRS should remain connected, check that removing filterable nodes will not result in a disconnected DMRS 48 | if test_connectedness: 49 | filtered_nodeids = set() 50 | for nodeid in filterable_nodeids: 51 | if dmrs.is_connected(removed_nodeids=filtered_nodeids|{nodeid}, ignored_nodeids=filterable_nodeids): 52 | filtered_nodeids.add(nodeid) 53 | 54 | else: 55 | filtered_nodeids = filterable_nodeids 56 | 57 | # Remove filtered nodes and their links from the DMRS 58 | for nodeid in filtered_nodeids: 59 | dmrs.remove_node(nodeid) 60 | 61 | return dmrs 62 | 63 | 64 | # If run from the command line, process the given file 65 | if __name__ == '__main__': 66 | 67 | with open(args.input_dmrs, 'r', encoding="utf-8") as fin, open(args.output_dmrs, 'w') as fout: 68 | content = fin.read().strip() 69 | 70 | for dmrs_string in split_dmrs_string(content): 71 | dmrs = loads_xml(dmrs_string) 72 | simplified_dmrs = gpred_filtering(dmrs) 73 | simplified_dmrs_string = dumps_xml(simplified_dmrs) 74 | fout.write('{}\n\n'.format(simplified_dmrs_string.decode('utf-8'))) 75 | -------------------------------------------------------------------------------- /pydmrs/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from configparser import ConfigParser, NoSectionError, NoOptionError 3 | import pydmrs 4 | 5 | CONFIG_DIR = os.path.normpath(os.path.join(pydmrs.__file__, '../__config__')) 6 | 7 | def get_config_option(config, section, option, opt_type=None, default=None): 8 | """ 9 | Safe read of config option that returns default value if the section or option are not present. 10 | :param config: ConfigParser object with existing configuration 11 | :param section: Section name string 12 | :param option: Option name string 13 | :param opt_type: Option python type. String by default. 14 | :param default: Default value to return if section/option do not exist. None by default. 15 | :return: Option value 16 | """ 17 | 18 | try: 19 | if opt_type is None: 20 | return config.get(section, option) 21 | elif opt_type == int: 22 | return config.getint(section, option) 23 | elif opt_type == float: 24 | return config.getfloat(section, option) 25 | elif opt_type == bool: 26 | return config.getboolean(section, option) 27 | elif opt_type == list: 28 | return parse_config(config.get(section, option)) 29 | 30 | except (NoSectionError, NoOptionError): 31 | return default 32 | 33 | 34 | def parse_config(config_string): 35 | """ 36 | Parse the config string to a list of strings. 37 | Lines starting with '#' are ignored. 38 | Strings are split on commas 39 | :param config_string: String as read from the config file 40 | :return: List of general predicate strings to filter 41 | """ 42 | 43 | strings = [] 44 | 45 | for line in config_string.split('\n'): 46 | line = line.strip() 47 | 48 | if line == '' or line.startswith('#'): 49 | continue 50 | 51 | string_group = [x.strip() for x in line.split(',') if x.strip() != ''] 52 | 53 | strings.extend(string_group) 54 | 55 | return strings 56 | 57 | def load_config(filename, default=True): 58 | """ 59 | Load a default config file 60 | :param filename: name of the file (in the config directory) 61 | :param default: if True, append filename to default config directory 62 | """ 63 | config = ConfigParser() 64 | if default: 65 | filename = os.path.join(CONFIG_DIR, filename) 66 | config.read(filename) 67 | return config 68 | 69 | 70 | def split_dmrs_string(content): 71 | """ 72 | Split a string of DMRS read from a file into indvidual DMRS strings. 73 | :param content: File content 74 | :return: List of DMRS XML strings 75 | """ 76 | 77 | content_split = content.split(' 2 | 3 | 4 | 5 | 6 | 7 | 8 | DMRS Visualizer 9 | 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | Visualize 38 | Reset 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /pydmrs/visualization/static/d3.min.js-LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-2015, Michael Bostock 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * The name Michael Bostock may not be used to endorse or promote products 15 | derived from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, 21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /pydmrs/visualization/static/dmrs.css: -------------------------------------------------------------------------------- 1 | /* The MIT License (MIT) 2 | 3 | Demophin and d3.arcdiagram.js both use the same terms of the MIT license. 4 | 5 | Demophin: Copyright (c) 2014 Michael Wayne Goodman 6 | (see https://github.com/goodmami/demophin) 7 | 8 | d3.arcdiagram.js: Copyright (c) 2015 Michael Wayne Goodman 9 | (see https://github.com/goodmami/d3-arcdiagram) 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy 12 | of this software and associated documentation files (the "Software"), to deal 13 | in the Software without restriction, including without limitation the rights 14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 | copies of the Software, and to permit persons to whom the Software is 16 | furnished to do so, subject to the following conditions: 17 | 18 | The above copyright notice and this permission notice shall be included in all 19 | copies or substantial portions of the Software. 20 | 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 | SOFTWARE.*/ 28 | 29 | html { 30 | height: 100%; 31 | } 32 | 33 | body { 34 | background-color: #DDD; 35 | height: 100%; 36 | } 37 | 38 | 39 | .grammarname { 40 | font-weight: bold; 41 | } 42 | 43 | /*#sentenceinput { 44 | font-size: 12pt; 45 | } 46 | 47 | #parseSubmit { 48 | height: 12pt; 49 | }*/ 50 | 51 | .sentence { 52 | text-align: center; 53 | font-size: larger; 54 | font-weight: bold; 55 | } 56 | 57 | .status { 58 | /*position: fixed; 59 | bottom: 100%; 60 | left: 0px; */ 61 | width: 100%; 62 | height: 20px; 63 | font-size: small; 64 | } 65 | 66 | .error { 67 | font-weight: bold; 68 | font-size: large; 69 | } 70 | 71 | .result { 72 | display: block; 73 | margin: 10px; 74 | } 75 | 76 | .dmrs { 77 | display: inline-block; 78 | background-color: #FFF; 79 | border-radius: 10px; 80 | overflow: auto; 81 | } 82 | 83 | .node { 84 | pointer-events: all; 85 | cursor: pointer; 86 | fill: #000; 87 | stroke: none; 88 | } 89 | 90 | .nodeText { 91 | stroke: none; 92 | font-family: sans-serif; 93 | font-size: 16px; 94 | text-anchor: middle; 95 | } 96 | 97 | .nodeText:hover { 98 | font-weight: bold; 99 | } 100 | 101 | .node.selected { 102 | font-weight: bold; 103 | } 104 | 105 | .nodeBox { 106 | stroke-width: 2px; 107 | fill: none; 108 | } 109 | 110 | .link { 111 | stroke: #000; 112 | fill: #000; 113 | stroke-opacity: .5; 114 | fill-opacity: .5; 115 | } 116 | 117 | .linkedge { 118 | fill: none; 119 | stroke-width: 2px; 120 | stroke-linejoin: round; 121 | marker-end: url(#arrowhead); 122 | } 123 | 124 | .eqedge { 125 | fill: none; 126 | stroke-width: 2px; 127 | stroke-linejoin: round; 128 | stroke-dasharray: 5,5; 129 | } 130 | 131 | .topedge { 132 | fill: #f00; 133 | stroke-width: 2px; 134 | stroke-linejoin: round; 135 | stroke-dasharray: 5,5; 136 | marker-end: url(#arrowhead); 137 | } 138 | 139 | .linkend { 140 | stroke-opacity: .5; 141 | fill-opacity: .5; 142 | } 143 | 144 | .rargname { 145 | fill: #000; 146 | stroke: none; 147 | stroke-width: 1px; 148 | font-family: sans-serif; 149 | font-size: 10px; 150 | text-anchor: middle; 151 | } 152 | 153 | .node.in { fill: red; } 154 | .node.out { fill: blue; } 155 | .node.labelset { stroke: gold; } 156 | .node.scope { stroke: violet; } 157 | .node.out.labelset { fill: green; stroke: gold;} 158 | .node.in.labelset { fill: darkorange; stroke: gold;} 159 | 160 | .link.in { fill: red; stroke: red; } 161 | .link.out { fill: blue; stroke: blue; } 162 | .link.labelset { fill: gold; stroke: gold; } 163 | .link.scope { fill: violet; stroke: violet; } 164 | .link.in.labelset { fill: darkorange; stroke: darkorange; } 165 | .link.out.labelset { fill: green; stroke: green; } 166 | 167 | /* thanks: http://bl.ocks.org/d3noob/a22c42db65eb00d4e369 */ 168 | .dmrs-tooltip { 169 | position: absolute; 170 | text-align: center; 171 | padding: 2px; 172 | font: 12px sans-serif; 173 | color: #FFF; 174 | background: #444; 175 | border: 0px; 176 | border-radius: 8px; 177 | opacity: 0; 178 | pointer-events: none; 179 | } 180 | 181 | .dmrs-tooltip td { 182 | padding: 2px 183 | } 184 | 185 | #visualizations { 186 | position: relative; 187 | padding: 100px; 188 | height: 100%; 189 | } 190 | 191 | #visualizations svg { 192 | position: absolute; 193 | top: -9999px; 194 | bottom: -9999px; 195 | left: -9999px; 196 | right: -9999px; 197 | margin: auto; 198 | } -------------------------------------------------------------------------------- /pydmrs/visualization/static/dmrs.js: -------------------------------------------------------------------------------- 1 | // The MIT License (MIT) 2 | // 3 | // Demophin and d3.arcdiagram.js both use the same terms of the MIT license. 4 | // 5 | // Demophin: Copyright (c) 2014 Michael Wayne Goodman 6 | // (see https://github.com/goodmami/demophin) 7 | // 8 | // d3.arcdiagram.js: Copyright (c) 2015 Michael Wayne Goodman 9 | // (see https://github.com/goodmami/d3-arcdiagram) 10 | // 11 | // Permission is hereby granted, free of charge, to any person obtaining a copy 12 | // of this software and associated documentation files (the "Software"), to deal 13 | // in the Software without restriction, including without limitation the rights 14 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 | // copies of the Software, and to permit persons to whom the Software is 16 | // furnished to do so, subject to the following conditions: 17 | // 18 | // The above copyright notice and this permission notice shall be included in all 19 | // copies or substantial portions of the Software. 20 | // 21 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 | // SOFTWARE. 28 | 29 | 30 | var maxWidth = 600, 31 | height = 300; 32 | 33 | var level_dy = 25, // vertical separation between edges 34 | edge_radius = 15, // rounded corner radius, 35 | edge_xoffset = 10, // outgoing edges aren't centered 36 | node_dx = 20; // horizontal separation between nodes 37 | 38 | var color = d3.scale.category20(); 39 | 40 | 41 | function dmrsDisplay(svgElem, graph) { 42 | // d3.json(url, function(error, graph) { 43 | // calculate source and target for links 44 | prepareGraph(graph); 45 | 46 | var tip = d3.select("#tooltip") 47 | .style("opacity", 0); 48 | 49 | var id = svgElem; 50 | var svg = d3.select(svgElem) 51 | .attr("height", ((graph.maxTopLevel - graph.maxBottomLevel + 3) * level_dy)); 52 | var g = svg.append("svg:g") 53 | .attr("transform", "translate(0," + ((graph.maxTopLevel + 2) * level_dy) + ")"); 54 | 55 | g.append("defs").append("marker") 56 | .attr("class", "linkend") 57 | .attr("id", "arrowhead") 58 | .attr("refX", 1) /*must be smarter way to calculate shift*/ 59 | .attr("refY", 2) 60 | .attr("markerWidth", 5) 61 | .attr("markerHeight", 4) 62 | .attr("orient", "auto") 63 | .append("path") 64 | .attr("d", "M0,0 L1,2 L0,4 L5,2 Z"); //this is actual shape for arrowhead 65 | 66 | var x_pos = 10; 67 | var nodes = g.selectAll(".node").order() 68 | .data(graph.nodes) 69 | .enter().append("svg:g") 70 | .attr("class", "node") 71 | .each(function(d) { 72 | var vps = []; 73 | for (var key in d.varprops) { 74 | vps.push("" + key + "=" + d.varprops[key] + ""); 75 | } 76 | d.tooltipText = "" + vps.join("") + ""; 77 | }); 78 | nodes.append("svg:text") 79 | .attr("class", "nodeText") 80 | .text(function(d) { 81 | if (d.carg) { 82 | return d.pred + "(" + d.carg + ")"; 83 | } else { 84 | return d.pred; 85 | } 86 | }) 87 | .attr("x", function(d, i) { 88 | d.bbox = this.getBBox(); 89 | halfLen = d.bbox.width / 2; 90 | x = x_pos + halfLen; 91 | x_pos = x + halfLen + node_dx; 92 | d.x = x; 93 | return x; 94 | }) 95 | .attr("y", function(d) { return 0; }) 96 | .attr("dy", function(d) { return d.bbox.height/5; }); 97 | nodes.insert("svg:rect", "text") 98 | .attr("class", "nodeBox") 99 | .attr("x", function(d) { return d.x - (d.bbox.width / 2) - 2; }) 100 | .attr("y", function(d) { return - (d.bbox.height / 2) - 2; }) 101 | .attr("width", function(d) { return d.bbox.width + 4; }) 102 | .attr("height", function(d) { return d.bbox.height + 4; }) 103 | .attr("rx", 4) 104 | .attr("ry", 4); 105 | nodes.on("mouseover", function(d) { 106 | if (!graph.sticky) { d3.select(this).classed("selected", true) }; 107 | updateHighlights(id); 108 | tip.html(d.tooltipText) 109 | .style("opacity", 0.8); 110 | }) 111 | .on("mousemove", function(d) { 112 | tip.style("left", (d3.event.pageX - 10) + "px") 113 | .style("top", (d3.event.pageY + 15) + "px"); 114 | }) 115 | .on("mouseout", function(d) { 116 | if (!d.sticky) { d3.select(this).classed("selected", false); } 117 | updateHighlights(id); 118 | tip.style("opacity", 0); 119 | }) 120 | .on("click", function(d) { 121 | stickyState = toggleSticky(id, this, d); 122 | graph.sticky = stickyState; 123 | updateHighlights(id); 124 | }); 125 | 126 | // not working... 127 | svg.attr("width", d3.sum(nodes.data(), function(d) { return d.bbox.width + node_dx; })); 128 | 129 | var links = g.selectAll(".link").order() 130 | .data(graph.links) 131 | .enter().append("svg:g") 132 | .attr("class", "link"); 133 | links.append("svg:path") 134 | .attr("class", function(d) { 135 | if (d.start == 0) { 136 | return "topedge"; 137 | } else if (d.rargname == "" && d.post == "EQ") { 138 | return "eqedge"; 139 | } else { 140 | return "linkedge"; 141 | } 142 | }) 143 | .attr("d", function(d) { 144 | return getPathSpec(d, graph); 145 | }) 146 | .attr("transform", function(d) { 147 | return "scale(1," + (d.dir * -1) + ")"; 148 | }) 149 | .style("marker-end", function(d) { 150 | return (d.rargname == "" && d.post == "EQ") ? "none" : "url(#arrowhead)"; 151 | }); 152 | links.append("svg:text") 153 | .attr("class", "rargname") 154 | .attr("x", function(d) { return d.midpoint.x; }) 155 | .attr("y", function(d) { return d.midpoint.y * (-1 * d.dir) - 3; }) 156 | .text(function(d) { return d.rargname + "/" + d.post; } ); 157 | // }); 158 | } 159 | 160 | 161 | function prepareGraph(graph) { 162 | var nodeIdx = {}, levelIdx = {}; 163 | graph.nodes.forEach(function(d, i) { 164 | nodeIdx[d.id] = i; 165 | levelIdx[[i,i+1].join()] = {}; // eg levelIdx["1,2"] = {} 166 | }); 167 | graph.links.forEach(function(d) { 168 | d.target = nodeIdx[d.end]; 169 | // start of 0 is TOP link 170 | if (d.start == 0) { 171 | d.dir = 1; // always on top 172 | return; 173 | } 174 | // the rest only apply to non-TOP links 175 | d.source = nodeIdx[d.start]; 176 | d.distance = Math.abs(d.source - d.target); 177 | // Quantifiers and undirected EQ links below preds 178 | d.dir = (d.rargname == "" || d.post.toUpperCase() == "H") ? -1 : 1 179 | }); 180 | graph.maxTopLevel = 0; 181 | graph.maxBottomLevel = 0; 182 | for (dist=0; dist d.level) { 190 | graph.maxBottomLevel = d.level; 191 | } 192 | }); 193 | } 194 | graph.sticky = false; 195 | } 196 | 197 | 198 | function nextAvailableLevel(source, target, dir, lvlIdx) { 199 | var level, curLevel, success; 200 | if (source > target) 201 | return nextAvailableLevel(target, source, dir, lvlIdx); 202 | level = 0; 203 | curLevel = dir; 204 | while (level == 0) { 205 | success = true; 206 | for (var i = source; i < target; i++) { 207 | if (curLevel in lvlIdx[[i, i+1].join()]) { 208 | success = false; 209 | break; 210 | } 211 | } 212 | if (success) { 213 | level = curLevel; 214 | for (var i = source; i < target; i++) { 215 | lvlIdx[[i, i+1].join()][level] = true; 216 | } 217 | } else { 218 | curLevel += dir; 219 | } 220 | } 221 | return level; 222 | } 223 | 224 | 225 | function getPathSpec(link, graph) { 226 | var x1, x2, y1, y2; 227 | // get these first, they apply for all links 228 | x2 = graph.nodes[link.target].x; 229 | y1 = graph.nodes[link.target].bbox.height; 230 | if (link.start == 0) { 231 | y2 = y1 + (((link.dir == 1 ? graph.maxTopLevel : graph.maxBottomLevel) + 1) * level_dy); 232 | link.midpoint = {"x": x2, 233 | "y": (y1 + y2) / 2}; 234 | return ["M", x2, y2, "L", x2, y1].join(' '); 235 | } 236 | // the following is only for non-TOP links 237 | x1 = graph.nodes[link.source].x; 238 | y2 = y1 + (Math.abs(link.level) * level_dy - 5); 239 | // side-effect! calculate this while we know it 240 | link.midpoint = {"x": (x1 + x2) / 2, 241 | "y": y2}; 242 | if (x1 < x2) { 243 | x1 += edge_xoffset; 244 | return ["M", x1, y1 - 5, 245 | "L", x1, (y2 - edge_radius), 246 | "Q", x1, y2, (x1 + edge_radius), y2, 247 | "L", (x2 - edge_radius), y2, 248 | "Q", x2, y2, x2, y2 - edge_radius, 249 | "L", x2, y1].join(' '); 250 | } else { 251 | x1 -= edge_xoffset; 252 | return ["M", x1, y1 - 5, 253 | "L", x1, (y2 - edge_radius), 254 | "Q", x1, y2, (x1 - edge_radius), y2, 255 | "L", (x2 + edge_radius), y2, 256 | "Q", x2, y2, x2, y2 - edge_radius, 257 | "L", x2, y1].join(' '); 258 | } 259 | } 260 | 261 | 262 | function updateHighlights(id) { 263 | clearHighlights(id); 264 | d3.select(id).selectAll(".node.selected").each(function(d){ 265 | var labelset = d3.set(), 266 | outs = d3.set(), 267 | ins = d3.set(), 268 | scopes = d3.set(); 269 | d3.select(id).selectAll(".link") 270 | .classed({ 271 | "out": function(_d) { 272 | if (_d.rargname && d.id == _d.start) { 273 | outs.add(_d.end); 274 | return true; 275 | } 276 | return false; 277 | }, 278 | "in": function(_d) { 279 | if (_d.rargname && d.id == _d.end) { 280 | ins.add(_d.start); 281 | return true; 282 | } 283 | return false; 284 | }, 285 | "labelset": function(_d) { 286 | if (_d.post == "EQ" && (_d.start == d.id || _d.end == d.id)) { 287 | labelset.add(_d.start); 288 | labelset.add(_d.end); 289 | return true; 290 | } 291 | return false 292 | }, 293 | "scope": function(_d) { 294 | if (_d.start == d.id && (_d.post == "H" || _d.post == "HEQ")) { 295 | scopes.add(_d.end); 296 | return true; 297 | } else if (_d.end == d.id && (_d.post == "H" || _d.post == "HEQ")) { 298 | return true; 299 | } 300 | return false; 301 | } 302 | }); 303 | var labelAdded = true; 304 | while (labelAdded) { 305 | labelAdded = false; 306 | d3.select(id).selectAll(".link").each(function(_d) { 307 | if (_d.post == "EQ") { 308 | if (labelset.has(_d.start) && !labelset.has(_d.end)) { 309 | labelset.add(_d.end); 310 | labelAdded = true; 311 | } else if (labelset.has(_d.end) && !labelset.has(_d.start)) { 312 | labelset.add(_d.start); 313 | labelAdded = true; 314 | } 315 | } 316 | }); 317 | } 318 | d3.select(id).selectAll(".node") 319 | .classed({ 320 | "out": function(_d) { return outs.has(_d.id); }, 321 | "in": function(_d) { return ins.has(_d.id); }, 322 | "labelset": function(_d) { return labelset.has(_d.id); }, 323 | "scope": function(_d) { return scopes.has(_d.id); } 324 | }); 325 | 326 | }); 327 | } 328 | 329 | 330 | function clearHighlights(id) { 331 | d3.select(id).selectAll(".node").classed( 332 | {"in": false, "out": false, "labelset": false, "scope": false} 333 | ); 334 | d3.select(id).selectAll(".link").classed( 335 | {"in": false, "out": false, "labelset": false, "scope": false} 336 | ); 337 | } 338 | 339 | 340 | function toggleSticky(id, node, d) { 341 | if (d.sticky) { 342 | d.sticky = false; 343 | d3.select(node).classed("selected", false); 344 | } else { 345 | d3.select(id).selectAll(".node.selected").each(function(_d) { 346 | _d.sticky = false; 347 | d3.select(this).classed("selected", false); 348 | }); 349 | d.sticky = true; 350 | d3.select(node).classed("selected", true); 351 | } 352 | return d.sticky; 353 | } 354 | 355 | 356 | function clearVizArtifacts() { 357 | d3.select("#visualizations").html(""); 358 | } 359 | -------------------------------------------------------------------------------- /pydmrs/visualization/static/visualization.js: -------------------------------------------------------------------------------- 1 | function visualizeSentence(xml_text) { 2 | clearVizArtifacts(); 3 | clearAlertMessage(); 4 | 5 | displayAlert('Visualizing...', 'alert-info'); 6 | 7 | if (xml_text) { 8 | d3_graph = parseXmlDMRS(xml_text) 9 | 10 | if (d3_graph) { 11 | showGraphs(d3_graph); 12 | clearAlertMessage(); 13 | } 14 | } else { 15 | displayAlert('Error: No XML provided.', 'alert-danger'); 16 | } 17 | } 18 | 19 | 20 | function clearAlertMessage() { 21 | $("#alert_placeholder").html(""); 22 | } 23 | 24 | 25 | function resetVisualizer() { 26 | clearVizArtifacts(); 27 | clearAlertMessage(); 28 | $("#dmrsinput").val(""); 29 | } 30 | 31 | 32 | function parseXmlDMRS(xml_text) { 33 | // Remove line breaks 34 | xml_text = xml_text.replace(/(\r\n|\n|\r)/gm,""); 35 | 36 | // Parse XML 37 | try { 38 | xmlDoc = $.parseXML(xml_text); 39 | } 40 | catch(err) { 41 | displayAlert('Error parsing XML. ', 'alert-danger'); 42 | return null 43 | } 44 | 45 | xml = $(xmlDoc); 46 | 47 | // Parse nodes into objects 48 | nodes = xml.find('node'); 49 | d3_nodes = []; 50 | 51 | node_map = {}; 52 | 53 | for (i = 0; i < nodes.length; i++) { 54 | node = nodes[i]; 55 | 56 | node_map[node.getAttribute('nodeid')] = i; 57 | 58 | sortinfo = node.getElementsByTagName('sortinfo')[0]; 59 | 60 | d3_node = { 61 | id: node.getAttribute('nodeid'), 62 | pred: parseNodePred(node), 63 | cfrom: node.getAttribute('cfrom'), 64 | cto: node.getAttribute('cto'), 65 | cvarsort: sortinfo.getAttribute('cvarsort'), 66 | carg: node.getAttribute('carg'), 67 | varprops: parseNodeProperties(node) 68 | }; 69 | 70 | d3_nodes.push(d3_node); 71 | } 72 | 73 | // Parse links into objects 74 | links = xml.find('link'); 75 | d3_links = []; 76 | for (i = 0; i < links.length; i++) { 77 | link = links[i]; 78 | rargname = link.getElementsByTagName('rargname')[0]; 79 | post = link.getElementsByTagName('post')[0]; 80 | 81 | d3_link = { 82 | source: node_map[link.getAttribute('from')], 83 | target: node_map[link.getAttribute('to')], 84 | start: link.getAttribute('from'), 85 | end: link.getAttribute('to'), 86 | rargname: rargname.textContent, 87 | post: post.textContent 88 | }; 89 | 90 | d3_links.push(d3_link); 91 | } 92 | 93 | d3_graph = { 94 | nodes: d3_nodes, 95 | links: d3_links 96 | }; 97 | 98 | return d3_graph; 99 | } 100 | 101 | 102 | function parseNodePred(node) { 103 | realpred = node.getElementsByTagName('realpred'); 104 | 105 | if (realpred.length > 0) { 106 | pred_elements = [ 107 | '', 108 | realpred[0].getAttribute('lemma'), 109 | realpred[0].getAttribute('pos'), 110 | realpred[0].getAttribute('sense') 111 | ]; 112 | pred = pred_elements.filter(function(val) { return val !== null;}).join('_') 113 | } 114 | else { 115 | gpred = node.getElementsByTagName('gpred')[0].textContent; 116 | pred = gpred.replace('_rel', '') 117 | } 118 | 119 | return pred; 120 | } 121 | 122 | 123 | function parseNodeProperties(node) { 124 | 125 | sortinfo = node.getElementsByTagName('sortinfo')[0]; 126 | 127 | property_names = ['ind', 'pers', 'num', 'gend', 'sf', 'mood', 'tense', 'prog', 'perf', 'pt']; 128 | properties = {}; 129 | 130 | for (j = 0; j < property_names.length; j++) { 131 | property_name = property_names[j]; 132 | if (sortinfo.getAttribute(property_name) != null) { 133 | properties[property_name.toUpperCase()] = sortinfo.getAttribute(property_name); 134 | }; 135 | } 136 | 137 | return properties; 138 | } 139 | 140 | 141 | function displayAlert(errorMessage, type) { 142 | bootstrap_alert = function() {} 143 | bootstrap_alert.warning = function(message) { 144 | $('#alert_placeholder').html(''+message+'') 145 | } 146 | 147 | bootstrap_alert.warning(errorMessage); 148 | } 149 | 150 | 151 | 152 | function showGraphs(graph) { 153 | 154 | var svg = d3.select("#visualizations") 155 | .selectAll('.result') 156 | .data([graph]) 157 | .enter() 158 | .append("svg") 159 | .attr("id", "dmrs") 160 | .attr("width", "100%") 161 | .attr("height", "100%") 162 | .attr("cursor", "grab") 163 | .call(d3.behavior.zoom().on("zoom", function () { 164 | svg.attr("transform", "translate(" + d3.event.translate + ")" + " scale(" + d3.event.scale + ")") 165 | })) 166 | .append("g") 167 | 168 | svg.attr("id", function(d, i) { return "dmrs" + i; }) 169 | .each(function(d, i) { dmrsDisplay(this, d); }); 170 | } -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | VERSION = '1.0.5' 5 | 6 | setup( 7 | name = 'pydmrs', 8 | version = VERSION, 9 | description = 'A library for manipulating DMRS graphs', 10 | author = 'Ann Copestake, Guy Emerson, Michael Wayne Goodman, Matic Horvat, Alex Kuhnle, Ewa Muszyńska', 11 | author_email = 'gete2@cam.ac.uk', 12 | license = 'MIT', 13 | url = 'https://github.com/delph-in/pydmrs', 14 | download_url = 'https://github.com/delph-in/pydmrs/tarball/'+VERSION, 15 | keywords = ['NLP', 'Natural Language Processing', 'Computational Linguistics', 'Semantics'], 16 | packages = find_packages(), 17 | package_data = {'pydmrs': ['__config__/*.conf']}, 18 | install_requires = [ 19 | 'pydelphin >= 1.0.1' 20 | ] 21 | ) 22 | -------------------------------------------------------------------------------- /tests/matching/test_aligned_matching.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from examples import examples_dmrs 4 | from pydmrs.core import span_pred_key, abstractSortDictDmrs, ListDmrs, Node, RealPred, \ 5 | InstanceSortinfo, Link 6 | from pydmrs.matching import aligned_matching 7 | 8 | 9 | class TestAlignedMatching(unittest.TestCase): 10 | def setUp(self): 11 | self.the_cat = examples_dmrs.the_cat().convert_to( 12 | abstractSortDictDmrs(node_key=span_pred_key)) 13 | # Checks if the matching code converts to SortDictDmrs with span_pred_key 14 | self.the_cat_chases_the_dog = examples_dmrs.the_cat_chases_the_dog().convert_to( 15 | abstractSortDictDmrs(node_key=span_pred_key)) 16 | self.the_dog_chases_the_cat = examples_dmrs.the_dog_chases_the_cat().convert_to( 17 | abstractSortDictDmrs(node_key=span_pred_key)) 18 | self.the_mouse = examples_dmrs.the_mouse() \ 19 | .convert_to(abstractSortDictDmrs(node_key=span_pred_key)) 20 | self.dog_cat = examples_dmrs.dog_cat() \ 21 | .convert_to(abstractSortDictDmrs(node_key=span_pred_key)) 22 | # All other DMRS used here should udnergo conversion as well. 23 | 24 | def test_match_nodes(self): 25 | nodes1 = self.the_dog_chases_the_cat.nodes 26 | nodes2 = self.the_cat_chases_the_dog.nodes 27 | matches = aligned_matching.match_nodes(nodes1, nodes2) 28 | self.assertEqual(len(matches), 1) 29 | self.assertListEqual(matches[0], [(4, 4), (3, 3), (1, 1)]) 30 | 31 | # Return [] if either of the nodes list empty. 32 | self.assertListEqual(aligned_matching.match_nodes([], nodes1), []) 33 | self.assertListEqual(aligned_matching.match_nodes(nodes1, []), []) 34 | 35 | nodes3 = self.the_cat.nodes 36 | matches = aligned_matching.match_nodes(nodes3, nodes1) 37 | self.assertEqual(len(matches), 2) 38 | self.assertListEqual(matches[0], [(2, 5), (1, 1)]) 39 | self.assertListEqual(matches[1], [(2, 5), (1, 4)]) 40 | 41 | def test_find_extra_surface_nodeids(self): 42 | nodeids = [1, 5] 43 | extras = aligned_matching.find_extra_surface_nodeids(nodeids, self.the_dog_chases_the_cat) 44 | self.assertListEqual(extras, [2, 3, 4]) 45 | 46 | # No extras. 47 | nodeids1 = [1, 2] 48 | extras1 = aligned_matching.find_extra_surface_nodeids(nodeids1, self.the_cat) 49 | self.assertListEqual(extras1, []) 50 | 51 | def test_get_matching_nodeids(self): 52 | # Match "the cat" onto "the dog chases the cat" (exact fit, only one match) 53 | matches1 = aligned_matching.get_matching_nodeids(self.the_cat, self.the_dog_chases_the_cat) 54 | self.assertEqual(len(matches1), 1) 55 | self.assertCountEqual(matches1[0], [(2, 5), (1, 4)]) 56 | 57 | # all_surface = True 58 | all_matches1 = aligned_matching.get_matching_nodeids(self.the_cat, 59 | self.the_dog_chases_the_cat, 60 | all_surface=True) 61 | # The same as earlier 62 | self.assertListEqual(matches1[0], all_matches1[0]) 63 | # Extra surface nodes: between dog and cat 64 | 65 | all_matches1 = aligned_matching.get_matching_nodeids(self.dog_cat, 66 | self.the_dog_chases_the_cat, 67 | all_surface=True) 68 | self.assertCountEqual(all_matches1[0], [(2, 5), (1, 2), (None, 3), (None, 4)]) 69 | 70 | # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit) 71 | matches2 = aligned_matching.get_matching_nodeids(self.the_dog_chases_the_cat, 72 | self.the_cat_chases_the_dog) 73 | # Two options: "the dog" matches or "the cat" matches, 'chases' doesn't because it's not part of the longest match 74 | self.assertEqual(len(matches2), 2) 75 | self.assertCountEqual(matches2, [[(5, 2), (4, 1)], [(2, 5), (1, 4)]]) 76 | 77 | # No match found 78 | matches = aligned_matching.get_matching_nodeids(self.the_mouse, self.dog_cat) 79 | self.assertListEqual(matches, []) 80 | 81 | # Should be the same as 'the cat'. 82 | mixed_cat = ListDmrs(surface='the cat') 83 | mixed_cat.add_node(Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, 84 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) 85 | mixed_cat.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3)) 86 | mixed_cat.add_link(Link(start=1, end=2, rargname='RSTR', post='H')) 87 | mixed = aligned_matching.get_matching_nodeids(mixed_cat, self.the_dog_chases_the_cat) 88 | self.assertListEqual(mixed, matches1) 89 | 90 | def test_get_score(self): 91 | matches = aligned_matching.get_matching_nodeids(self.the_cat, self.the_dog_chases_the_cat) 92 | subgraph1 = aligned_matching.get_matched_subgraph(matches[0], self.the_dog_chases_the_cat) 93 | score1 = aligned_matching.get_score(self.the_cat, subgraph1, matches[0]) 94 | self.assertEqual(score1, (3, 3, 3)) # 'the', 'cat' and the link 95 | 96 | # all_surface = True 97 | all_surface_matches = aligned_matching.get_matching_nodeids(self.dog_cat, 98 | self.the_dog_chases_the_cat, 99 | all_surface=True) 100 | subgraph1a = aligned_matching.get_matched_subgraph(all_surface_matches[0], 101 | self.the_dog_chases_the_cat) 102 | score1a = aligned_matching.get_score(self.the_cat, subgraph1a, all_surface_matches[0]) 103 | self.assertEqual(score1a, (2, 7, 3)) 104 | -------------------------------------------------------------------------------- /tests/matching/test_general_matching.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import pydmrs.matching.match_evaluation 4 | from examples import examples_dmrs 5 | from pydmrs._exceptions import PydmrsTypeError 6 | from pydmrs.components import InstanceSortinfo, RealPred 7 | from pydmrs.core import Link, DictDmrs, Node 8 | from pydmrs.matching import general_matching 9 | 10 | 11 | class TestMatch(unittest.TestCase): 12 | def setUp(self): 13 | self.match = general_matching.Match([(2, 3), (4, 2)], [(Link(4, 5, 'RSTR', 'H'), 14 | Link(1, 2, 'RSTR', 'H'))]) 15 | 16 | def test_Match_init(self): 17 | self.assertEqual(general_matching.Match().nodeid_pairs, []) 18 | self.assertEqual(general_matching.Match().link_pairs, []) 19 | self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2)]) 20 | self.assertCountEqual(self.match.link_pairs, [(Link(4, 5, 'RSTR', 'H'), 21 | Link(1, 2, 'RSTR', 'H'))]) 22 | 23 | def test_Match_len(self): 24 | self.assertEqual(len(self.match), 3) 25 | self.assertEqual(len(general_matching.Match()), 0) 26 | 27 | def test_Match_add(self): 28 | self.assertIsNone(self.match.add(general_matching.Match())) 29 | self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2)]) 30 | self.assertCountEqual(self.match.link_pairs, [(Link(4, 5, 'RSTR', 'H'), 31 | Link(1, 2, 'RSTR', 'H'))]) 32 | 33 | incompatible_match = general_matching.Match([(1, 2), (8, 1)], [(Link(1, 8, 'RSTR', 'H'), 34 | Link(1, 2, 'RSTR', 'H'))]) 35 | self.match.add(incompatible_match) 36 | self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2), (8, 1)]) 37 | self.assertCountEqual(self.match.link_pairs, [(Link(4, 5, 'RSTR', 'H'), 38 | Link(1, 2, 'RSTR', 'H'))]) 39 | 40 | compatible_match = general_matching.Match([(1, 5), (3, 4)], [(Link(1, 3, 'ARG1', 'NEQ'), 41 | Link(1, 5, 'ARG2', 'NEQ'))]) 42 | self.match.add(compatible_match) 43 | self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2), (1, 5), (8, 1), (3, 4)]) 44 | self.assertCountEqual(self.match.link_pairs, [(Link(4, 5, 'RSTR', 'H'), 45 | Link(1, 2, 'RSTR', 'H')), 46 | (Link(1, 3, 'ARG1', 'NEQ'), 47 | Link(1, 5, 'ARG2', 'NEQ'))]) 48 | 49 | 50 | class TestGeneralMatching(unittest.TestCase): 51 | def setUp(self): 52 | self.large_dmrs = examples_dmrs.the_dog_chases_the_cat_and_the_cat_chases_the_mouse() 53 | self.small_dmrs = examples_dmrs.the_dog_chases_the_cat() 54 | self.cat_dmrs = examples_dmrs.the_cat() 55 | self.reverse_dmrs = examples_dmrs.the_cat_chases_the_dog() 56 | 57 | def test_find_best_matches(self): 58 | # Match "the cat" onto "the dog chases the cat" (exact fit) 59 | matches = general_matching.find_best_matches(self.cat_dmrs, self.small_dmrs) 60 | 61 | self.assertEqual(len(matches), 1) 62 | self.assertCountEqual(matches[0].nodeid_pairs, [(2, 5), (1, 4)]) 63 | self.assertCountEqual(matches[0].link_pairs, 64 | [(Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))]) 65 | 66 | # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit) 67 | matches = general_matching.find_best_matches(self.small_dmrs, self.reverse_dmrs) 68 | self.assertEqual(len(matches), 1) 69 | self.assertCountEqual(matches[0].nodeid_pairs, [(5, 2), (4, 1), (3, 3), (2, 5), (1, 4)]) 70 | self.assertCountEqual(matches[0].link_pairs, 71 | [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H')), 72 | (Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))]) 73 | 74 | # No match found 75 | matches = general_matching.find_best_matches(examples_dmrs.the_mouse(), self.reverse_dmrs) 76 | self.assertIsNone(matches) 77 | 78 | # More than one match found. 79 | matches = general_matching.find_best_matches(self.cat_dmrs, self.large_dmrs) 80 | self.assertEqual(len(matches), 2) 81 | self.assertCountEqual(matches[0].nodeid_pairs, [(2, 5), (1, 4)]) 82 | self.assertCountEqual(matches[0].link_pairs, 83 | [(Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))]) 84 | self.assertCountEqual(matches[1].nodeid_pairs, [(2, 8), (1, 7)]) 85 | self.assertCountEqual(matches[1].link_pairs, 86 | [(Link(1, 2, 'RSTR', 'H'), Link(7, 8, 'RSTR', 'H'))]) 87 | 88 | def test_get_matched_subgraph(self): 89 | match = general_matching.find_best_matches(self.cat_dmrs, self.small_dmrs)[0] 90 | subgraph = general_matching.get_matched_subgraph(self.small_dmrs, match) 91 | expected = DictDmrs(nodes=[Node(nodeid=4, pred=RealPred('the', 'q')), 92 | Node(nodeid=5, pred=RealPred('cat', 'n', '1'), 93 | sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))], 94 | links=[Link(start=4, end=5, rargname='RSTR', post='H')]) 95 | self.assertListEqual(subgraph.nodes, expected.nodes) 96 | self.assertListEqual(subgraph.links, expected.links) 97 | 98 | def test_get_recall_fscore(self): 99 | exact_matches = general_matching.find_best_matches(self.cat_dmrs, self.small_dmrs) 100 | inexact_matches = general_matching.find_best_matches(self.small_dmrs, self.reverse_dmrs) 101 | # Exact 102 | self.assertEqual(pydmrs.matching.match_evaluation.get_recall(exact_matches[0], self.cat_dmrs), 1) 103 | self.assertEqual(pydmrs.matching.match_evaluation.get_fscore(exact_matches[0], self.cat_dmrs), 1) 104 | # Inexact 105 | self.assertAlmostEqual(pydmrs.matching.match_evaluation.get_recall(inexact_matches[0], self.small_dmrs), 106 | 7 / 9) 107 | self.assertAlmostEqual(pydmrs.matching.match_evaluation.get_fscore(inexact_matches[0], self.small_dmrs), 108 | 0.875) 109 | 110 | # List of matches instead of Match. 111 | with self.assertRaises(PydmrsTypeError): 112 | pydmrs.matching.match_evaluation.get_recall(exact_matches, self.cat_dmrs) 113 | with self.assertRaises(PydmrsTypeError): 114 | pydmrs.matching.match_evaluation.get_fscore(exact_matches, self.cat_dmrs) 115 | 116 | def test_get_missing_elements(self): 117 | match = general_matching.find_best_matches(examples_dmrs.the_dog_chases_the_mouse(), 118 | self.small_dmrs)[0] 119 | missing = pydmrs.matching.match_evaluation.get_missing_elements(match, 120 | examples_dmrs.the_dog_chases_the_mouse()) 121 | self.assertCountEqual(missing, [4, 5, Link(3, 5, 'ARG2', 'NEQ'), Link(4, 5, 'RSTR', 'H')]) 122 | -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import warnings 3 | 4 | from pydmrs._exceptions import PydmrsTypeError, PydmrsValueError 5 | from pydmrs.components import Pred, GPred, Sortinfo, EventSortinfo, InstanceSortinfo 6 | from pydmrs.core import ( 7 | Link, LinkLabel, 8 | Node, span_pred_key, abstractSortDictDmrs) 9 | from examples import examples_dmrs 10 | 11 | 12 | class TestLink(unittest.TestCase): 13 | """ 14 | Test methods of Link and LinkLabel classes 15 | """ 16 | 17 | def test_Link_new(self): 18 | """ 19 | Links should have exactly four slots (start, end, rargname, post). 20 | The constructor should take either positional or keyword arguments. 21 | The slots should be accessible by attribute names. 22 | """ 23 | # Check four arguments 24 | self.assert_ex_link(Link(0, 1, 'RSTR', 'H')) 25 | self.assert_ex_link(Link(start=0, end=1, rargname='RSTR', post='H')) 26 | 27 | # Check None values 28 | self.assertIsNone(Link(0, 1, '', 'H').rargname) 29 | self.assertIsNone(Link(0, 1, 'RSTR', 'NONE').post) 30 | self.assertIsNone(Link(0, 1, 'NULL', 'H').rargname) 31 | self.assertIsNone(Link(0, 1, 'RSTR', 'NIL').post) 32 | 33 | # Check wrong numbers of arguments 34 | with self.assertRaises(TypeError): 35 | Link(0, 1, 2) 36 | with self.assertRaises(TypeError): 37 | Link(0, 1, 2, 3, 4) 38 | 39 | # Check equal start and end 40 | with self.assertRaises(Warning): 41 | warnings.simplefilter('error') 42 | Link(0, 0, 1, 2) 43 | warnings.resetwarnings() 44 | 45 | # Helper function for test_Link_new 46 | def assert_ex_link(self, link): 47 | self.assertEqual(link.start, 0) 48 | self.assertEqual(link.end, 1) 49 | self.assertEqual(link.rargname, 'RSTR') 50 | self.assertEqual(link.post, 'H') 51 | 52 | def test_Link_str(self): 53 | """ 54 | The 'informal' string representation of a Link 55 | should show a labelled arrow pointing from the start to the end 56 | """ 57 | link = Link(0, 1, 'RSTR', 'H') 58 | self.assertEqual(str(link), "(0 - RSTR/H -> 1)") 59 | 60 | def test_Link_repr(self): 61 | """ 62 | The 'official' string representation of a Link 63 | should evaluate to an equivalent Link 64 | """ 65 | link = Link(0, 1, 'RSTR', 'H') 66 | self.assertEqual(link, eval(repr(link))) 67 | 68 | def test_Link_label(self): 69 | """ 70 | The label of a link should be a LinkLabel 71 | """ 72 | link = Link(0, 1, 'RSTR', 'H') 73 | label = LinkLabel('RSTR', 'H') 74 | self.assertIsInstance(link.label, LinkLabel) 75 | self.assertEqual(link.label, label) 76 | 77 | def test_Link_labelstring(self): 78 | """ 79 | The labelstring of a link should be its label's string 80 | """ 81 | link = Link(0, 1, 'RSTR', 'H') 82 | labelstring = 'RSTR/H' 83 | self.assertEqual(link.labelstring, labelstring) 84 | 85 | def test_Link_copy(self): 86 | """ 87 | copy.copy should return an equal Link 88 | copy.deepcopy should also return an equal Link 89 | """ 90 | from copy import copy, deepcopy 91 | link = Link(0, 1, 'RSTR', 'H') 92 | link_copy = copy(link) 93 | link_deep = deepcopy(link) 94 | self.assertEqual(link, link_copy) 95 | self.assertEqual(link, link_deep) 96 | self.assertIsNot(link, link_copy) 97 | self.assertIsNot(link, link_deep) 98 | # Note that it doesn't make sense to check 99 | # if link.end is not link_deep.end, 100 | # because identical strings and ints are considered to be the same 101 | 102 | def test_LinkLabel_new(self): 103 | """ 104 | LinkLabels should have exactly two slots (rargname, post). 105 | The constructor should take either positional or keyword arguments. 106 | The slots should be accessible by attribute names. 107 | """ 108 | # Check two arguments 109 | self.assert_rstr_h(LinkLabel('RSTR', 'H')) 110 | self.assert_rstr_h(LinkLabel(rargname='RSTR', post='H')) 111 | 112 | # Check wrong numbers of arguments 113 | with self.assertRaises(TypeError): 114 | LinkLabel(0, 1, 2) 115 | with self.assertRaises(TypeError): 116 | LinkLabel(0, 1, 2, 3, 4) 117 | 118 | # Helper function for test_LinkLabel_new 119 | def assert_rstr_h(self, linklabel): 120 | self.assertEqual(linklabel.rargname, 'RSTR') 121 | self.assertEqual(linklabel.post, 'H') 122 | 123 | def test_LinkLabel_str(self): 124 | """ 125 | The 'informal' string representation of a LinkLabel 126 | should have a slash between the rargname and post 127 | """ 128 | label = LinkLabel('RSTR', 'H') 129 | self.assertEqual(str(label), "RSTR/H") 130 | 131 | def test_LinkLabel_repr(self): 132 | """ 133 | The 'official' string representation of a LinkLabel 134 | should evaluate to an equivalent LinkLabel 135 | """ 136 | label = LinkLabel('RSTR', 'H') 137 | self.assertEqual(label, eval(repr(label))) 138 | 139 | def test_LinkLabel_copy(self): 140 | """ 141 | copy.copy should return an equal LinkLabel 142 | copy.deepcopy should also return an equal LinkLabel 143 | """ 144 | from copy import copy, deepcopy 145 | label = LinkLabel('RSTR', 'H') 146 | label_copy = copy(label) 147 | label_deep = deepcopy(label) 148 | self.assertEqual(label, label_copy) 149 | self.assertEqual(label, label_deep) 150 | self.assertIsNot(label, label_copy) 151 | self.assertIsNot(label, label_deep) 152 | # Note that it doesn't make sense to check 153 | # if label.post is not label_deep.post, 154 | # because identical strings are considered to be the same 155 | 156 | 157 | class TestNode(unittest.TestCase): 158 | """ 159 | Test methods for Node class. 160 | """ 161 | 162 | def test_Node_init(self): 163 | node = Node(nodeid=13, pred='the_q', surface='cat', base='x', cfrom=23, cto=27, 164 | carg='Kim', ) 165 | self.assertEqual(node.nodeid, 13) 166 | self.assertEqual(node.surface, 'cat') 167 | self.assertEqual(node.base, 'x') 168 | 169 | self.assertEqual(node.cfrom, 23) 170 | self.assertEqual(node.cto, 27) 171 | # Incorrect span 172 | with self.assertRaises(PydmrsValueError): 173 | Node(cfrom=22, cto=7) 174 | 175 | self.assertEqual(node.carg, 'Kim') 176 | # Fix carg with "". 177 | self.assertEqual(Node(carg='"Kim"').carg, 'Kim') 178 | # Unaccounted " in carg 179 | with self.assertRaises(PydmrsValueError): 180 | Node(carg='"Kim') 181 | 182 | # String pred. 183 | self.assertEqual(node.pred, GPred('the_q')) 184 | # Other pred 185 | self.assertEqual(Node(pred=GPred('the_q')).pred, GPred('the_q')) 186 | 187 | # Allow None for sortinfo. 188 | self.assertEqual(Node().sortinfo, None) 189 | # Dict sortinfo 190 | self.assertEqual(Node(sortinfo={'cvarsort': 'i', 'pers': '3'}).sortinfo, 191 | InstanceSortinfo(pers='3')) 192 | # Sortinfo sortinfo 193 | self.assertEqual(Node(sortinfo=InstanceSortinfo(pers='3')).sortinfo, 194 | InstanceSortinfo(pers='3')) 195 | # List sortinfo 196 | self.assertEqual(Node(sortinfo=[('cvarsort', 'i'), ('pers', '3')]).sortinfo, 197 | InstanceSortinfo(pers='3')) 198 | # But nothing else. 199 | with self.assertRaises(PydmrsTypeError): 200 | Node(sortinfo="x[pers=3, num=sg, ind=+]") 201 | 202 | def test_Node_str(self): 203 | node = Node() 204 | self.assertEqual(str(node), "None") 205 | node = Node(nodeid=2, pred='_dog_n_1', 206 | sortinfo=dict(cvarsort='i', pers='3', num='sg', ind='+'), carg='Pat') 207 | self.assertEqual(str(node), '_dog_n_1(Pat) x[pers=3, num=sg, ind=+]') 208 | 209 | def test_Node_eq(self): 210 | # Unspecified nodes are always equal. 211 | node1 = Node() 212 | node2 = Node() 213 | self.assertEqual(node1, node2) 214 | 215 | sortinfo1 = {'cvarsort': 'e', 'tense': 'past'} 216 | sortinfo2 = {'cvarsort': 'e', 'tense': 'pres'} 217 | 218 | # Two nodes are equal if they have the same pred, sortinfo and carg, 219 | # even if all the other elements are different 220 | node1 = Node(nodeid=23, pred='the_q', sortinfo=sortinfo1, cfrom=2, cto=22, carg='Kim', 221 | surface='cat', base='x') 222 | node2 = Node(nodeid=25, pred='the_q', sortinfo=sortinfo1, cfrom=15, carg='Kim', 223 | surface='mad', base='w') 224 | self.assertEqual(node1, node2) 225 | 226 | # Different carg 227 | node2 = Node(pred='the_q', sortinfo=sortinfo1, carg='Jane') 228 | self.assertNotEqual(node1, node2) 229 | 230 | # Different pred 231 | node2 = Node(pred='_smile_v', sortinfo=sortinfo1, carg='Kim') 232 | self.assertNotEqual(node1, node2) 233 | 234 | # Different sortinfo. 235 | node2 = Node(pred='_the_q', sortinfo=sortinfo2, carg='Kim') 236 | self.assertNotEqual(node1, node2) 237 | 238 | def test_Node_underspecification(self): 239 | with self.assertRaises(TypeError): 240 | Node(pred='_the_q').is_more_specific(4) 241 | # complete underspecification 242 | self.assertFalse(Node().is_more_specific(Node())) 243 | self.assertFalse(Node().is_less_specific(Node())) 244 | # pred underspecification 245 | self.assertFalse(Node(pred=Pred()).is_more_specific(Node())) 246 | self.assertTrue(Node(pred=Pred()).is_less_specific(Node())) 247 | self.assertTrue(Node().is_more_specific(Node(pred=Pred()))) 248 | self.assertFalse(Node().is_less_specific(Node(pred=Pred()))) 249 | self.assertFalse(Node(pred=Pred()).is_more_specific(Node(pred=Pred()))) 250 | self.assertFalse(Node(pred=Pred()).is_less_specific(Node(pred=Pred()))) 251 | self.assertFalse(Node(pred=Pred()).is_more_specific(Node(pred=GPred(name='abc')))) 252 | self.assertTrue(Node(pred=Pred()).is_less_specific(Node(pred=GPred(name='abc')))) 253 | self.assertTrue(Node(pred=GPred(name='abc')).is_more_specific(Node(pred=Pred()))) 254 | self.assertFalse(Node(pred=GPred(name='abc')).is_less_specific(Node(pred=Pred()))) 255 | # carg underspecification 256 | self.assertFalse(Node(carg='?').is_more_specific(Node())) 257 | self.assertTrue(Node(carg='?').is_less_specific(Node())) 258 | self.assertTrue(Node().is_more_specific(Node(carg='?'))) 259 | self.assertFalse(Node().is_less_specific(Node(carg='?'))) 260 | self.assertFalse(Node(carg='?').is_more_specific(Node(carg='?'))) 261 | self.assertFalse(Node(carg='?').is_less_specific(Node(carg='?'))) 262 | self.assertFalse(Node(carg='?').is_more_specific(Node(carg='abc'))) 263 | self.assertTrue(Node(carg='?').is_less_specific(Node(carg='abc'))) 264 | self.assertTrue(Node(carg='abc').is_more_specific(Node(carg='?'))) 265 | self.assertFalse(Node(carg='abc').is_less_specific(Node(carg='?'))) 266 | # sortinfo underspecification 267 | self.assertFalse(Node(sortinfo=Sortinfo()).is_more_specific(Node())) 268 | self.assertTrue(Node(sortinfo=Sortinfo()).is_less_specific(Node())) 269 | self.assertTrue(Node().is_more_specific(Node(sortinfo=Sortinfo()))) 270 | self.assertFalse(Node().is_less_specific(Node(sortinfo=Sortinfo()))) 271 | self.assertFalse(Node(sortinfo=Sortinfo()).is_more_specific(Node(sortinfo=Sortinfo()))) 272 | self.assertFalse(Node(sortinfo=Sortinfo()).is_less_specific(Node(sortinfo=Sortinfo()))) 273 | self.assertFalse( 274 | Node(sortinfo=Sortinfo()).is_more_specific(Node(sortinfo=EventSortinfo(sf='abc')))) 275 | self.assertTrue( 276 | Node(sortinfo=Sortinfo()).is_less_specific(Node(sortinfo=EventSortinfo(sf='abc')))) 277 | self.assertTrue( 278 | Node(sortinfo=EventSortinfo(sf='abc')).is_more_specific(Node(sortinfo=Sortinfo()))) 279 | self.assertFalse( 280 | Node(sortinfo=EventSortinfo(sf='abc')).is_less_specific(Node(sortinfo=Sortinfo()))) 281 | # mixed specification 282 | self.assertFalse(Node(pred=Pred()).is_more_specific(Node(carg='?'))) 283 | self.assertFalse(Node(pred=Pred()).is_less_specific(Node(carg='?'))) 284 | self.assertFalse(Node(pred=Pred()).is_more_specific(Node(sortinfo=Sortinfo()))) 285 | self.assertFalse(Node(pred=Pred()).is_less_specific(Node(sortinfo=Sortinfo()))) 286 | self.assertFalse(Node(carg='?').is_more_specific(Node(sortinfo=Sortinfo()))) 287 | self.assertFalse(Node(carg='?').is_less_specific(Node(sortinfo=Sortinfo()))) 288 | 289 | def test_Node_span(self): 290 | node = Node(cfrom=2, cto=15) 291 | self.assertEqual(node.span, (2, 15)) 292 | 293 | def test_Node_isgpred_realpred_node(self): 294 | gnode = Node(pred='the_q') 295 | realnode = Node(pred='_cat_n') 296 | self.assertTrue(gnode.is_gpred_node) 297 | self.assertTrue(realnode.is_realpred_node) 298 | self.assertFalse(gnode.is_realpred_node) 299 | self.assertFalse(realnode.is_gpred_node) 300 | 301 | 302 | class TestDmrs(unittest.TestCase): 303 | def setUp(self): 304 | self.test_dmrs = examples_dmrs.the_dog_chases_the_cat() 305 | 306 | def test_contains(self): 307 | self.assertTrue(4 in self.test_dmrs) 308 | self.assertFalse(16 in self.test_dmrs) 309 | 310 | def test_iter_outgoing(self): 311 | with self.assertRaises(PydmrsValueError): 312 | self.test_dmrs.iter_outgoing(15) 313 | 314 | self.test_dmrs.add_link(Link(3, 4, 'None', 'EQ')) 315 | out_it = self.test_dmrs.iter_outgoing(3) 316 | # Check that an iterator returned 317 | self.assertTrue(hasattr(out_it, '__next__')) 318 | # EQ link counted as outgoing 319 | self.assertCountEqual(list(out_it), [Link(3, 5, 'ARG2', 'NEQ'), Link(3, 2, 'ARG1', 'NEQ'), 320 | Link(3, 4, None, 'EQ')]) 321 | # TODO: Treat EQ links symmetrically or not at all, as long as it's consistent. 322 | # Test e.g. 323 | # self.test_dmrs.add_link(Link(4, 3, 'None', 'EQ')) 324 | # out_it = self.test_dmrs.iter_outgoing(3) 325 | # self.assertIn(Link(4, 3, 'None', 'EQ'), list(out_it)) 326 | 327 | # No outgoing links 328 | out_it = self.test_dmrs.iter_outgoing(2) 329 | with self.assertRaises(StopIteration): 330 | next(out_it) 331 | 332 | def test_iter_incoming(self): 333 | with self.assertRaises(PydmrsValueError): 334 | self.test_dmrs.iter_incoming(15) 335 | 336 | self.test_dmrs.add_link(Link(4, 2, 'None', 'EQ')) 337 | in_it = self.test_dmrs.iter_incoming(2) 338 | # Check that an iterator returned 339 | self.assertTrue(hasattr(in_it, '__next__')) 340 | # EQ link counted as incoming 341 | self.assertCountEqual(list(in_it), [Link(1, 2, 'RSTR', 'H'), Link(3, 2, 'ARG1', 'NEQ'), 342 | Link(4, 2, None, 'EQ')]) 343 | 344 | # TODO: Treat EQ links somehow. 345 | # Test e.g. 346 | # self.test_dmrs.add_link(Link(2, 4, 'None', 'EQ')) 347 | # in_it = self.test_dmrs.iter_incoming(2) 348 | # self.assertIn(Link(2, 4, 'None', 'EQ'), list(in_it)) 349 | 350 | # No incoming links 351 | in_it = self.test_dmrs.iter_incoming(3) 352 | with self.assertRaises(StopIteration): 353 | next(in_it) 354 | --------------------------------------------------------------------------------