├── .DS_Store ├── README.md ├── convert_to_mrp.sh ├── dataset ├── .DS_Store ├── labeled_edge_mrp │ ├── .DS_Store │ ├── ace_en │ │ └── .DS_Store │ ├── ace_p_en │ │ └── .DS_Store │ ├── ace_pp_en │ │ └── .DS_Store │ └── ace_ppp_en │ │ └── .DS_Store ├── raw │ ├── .DS_Store │ ├── ace_en │ │ └── .DS_Store │ ├── ace_p_en │ │ └── .DS_Store │ └── ace_ppp_en │ │ └── .DS_Store ├── splits │ ├── dev.txt │ ├── test.txt │ └── train.txt └── splits2 │ ├── dev.doc.txt │ ├── test.doc.txt │ └── train.doc.txt ├── evaluation ├── .DS_Store ├── evaluate.py └── evaluate_single_dataset.py ├── mtool ├── .DS_Store ├── .appveyor.yml ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── __pycache__ │ ├── analyzer.cpython-37.pyc │ ├── analyzer.cpython-38.pyc │ ├── analyzer.cpython-39.pyc │ ├── graph.cpython-37.pyc │ ├── graph.cpython-38.pyc │ ├── graph.cpython-39.pyc │ ├── inspector.cpython-37.pyc │ ├── inspector.cpython-38.pyc │ ├── inspector.cpython-39.pyc │ ├── main.cpython-37.pyc │ ├── treewidth.cpython-37.pyc │ ├── treewidth.cpython-38.pyc │ └── treewidth.cpython-39.pyc ├── analyzer.py ├── codec │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── ace.cpython-39.pyc │ │ ├── amr.cpython-37.pyc │ │ ├── amr.cpython-38.pyc │ │ ├── amr.cpython-39.pyc │ │ ├── conllu.cpython-37.pyc │ │ ├── conllu.cpython-38.pyc │ │ ├── conllu.cpython-39.pyc │ │ ├── eds.cpython-37.pyc │ │ ├── eds.cpython-38.pyc │ │ ├── eds.cpython-39.pyc │ │ ├── mrp.cpython-37.pyc │ │ ├── mrp.cpython-38.pyc │ │ ├── mrp.cpython-39.pyc │ │ ├── norec.cpython-37.pyc │ │ ├── norec.cpython-38.pyc │ │ ├── norec.cpython-39.pyc │ │ ├── pmb.cpython-37.pyc │ │ ├── pmb.cpython-38.pyc │ │ ├── pmb.cpython-39.pyc │ │ ├── sdp.cpython-37.pyc │ │ ├── sdp.cpython-38.pyc │ │ ├── sdp.cpython-39.pyc │ │ ├── treex.cpython-37.pyc │ │ ├── treex.cpython-38.pyc │ │ ├── treex.cpython-39.pyc │ │ ├── ucca.cpython-37.pyc │ │ ├── ucca.cpython-38.pyc │ │ └── ucca.cpython-39.pyc │ ├── ace.py │ ├── amr.py │ ├── conllu.py │ ├── eds.py │ ├── mrp.py │ ├── norec.py │ ├── pmb.py │ ├── sdp.py │ ├── treex.py │ └── ucca.py ├── data │ ├── .DS_Store │ ├── sample │ │ ├── Makefile │ │ ├── README.txt │ │ ├── amr │ │ │ ├── wsj.amr │ │ │ └── wsj.mrp │ │ ├── dm │ │ │ ├── wsj.mrp │ │ │ └── wsj.sdp │ │ ├── eds │ │ │ ├── wsj.eds │ │ │ └── wsj.mrp │ │ ├── norec │ │ │ └── train.json │ │ ├── psd │ │ │ ├── wsj.mrp │ │ │ └── wsj.sdp │ │ ├── ucca │ │ │ ├── wsj.mrp │ │ │ └── xml │ │ │ │ ├── files.txt │ │ │ │ ├── wsj_0001.1.xml │ │ │ │ ├── wsj_0001.2.xml │ │ │ │ ├── wsj_0002.1.xml │ │ │ │ ├── wsj_0003.1.xml │ │ │ │ ├── wsj_0003.10.xml │ │ │ │ ├── wsj_0003.11.xml │ │ │ │ ├── wsj_0003.12.xml │ │ │ │ ├── wsj_0003.13.xml │ │ │ │ ├── wsj_0003.14.xml │ │ │ │ ├── wsj_0003.15.xml │ │ │ │ ├── wsj_0003.16.xml │ │ │ │ ├── wsj_0003.17.xml │ │ │ │ ├── wsj_0003.18.xml │ │ │ │ ├── wsj_0003.19.xml │ │ │ │ ├── wsj_0003.2.xml │ │ │ │ ├── wsj_0003.20.xml │ │ │ │ ├── wsj_0003.21.xml │ │ │ │ ├── wsj_0003.22.xml │ │ │ │ ├── wsj_0003.23.xml │ │ │ │ ├── wsj_0003.24.xml │ │ │ │ ├── wsj_0003.25.xml │ │ │ │ ├── wsj_0003.26.xml │ │ │ │ ├── wsj_0003.27.xml │ │ │ │ ├── wsj_0003.28.xml │ │ │ │ ├── wsj_0003.29.xml │ │ │ │ ├── wsj_0003.3.xml │ │ │ │ ├── wsj_0003.30.xml │ │ │ │ ├── wsj_0003.4.xml │ │ │ │ ├── wsj_0003.5.xml │ │ │ │ ├── wsj_0003.7.xml │ │ │ │ ├── wsj_0003.8.xml │ │ │ │ ├── wsj_0003.9.xml │ │ │ │ ├── wsj_0004.1.xml │ │ │ │ ├── wsj_0004.10.xml │ │ │ │ ├── wsj_0004.11.xml │ │ │ │ ├── wsj_0004.12.xml │ │ │ │ ├── wsj_0004.14.xml │ │ │ │ ├── wsj_0004.15.xml │ │ │ │ ├── wsj_0004.16.xml │ │ │ │ ├── wsj_0004.17.xml │ │ │ │ ├── wsj_0004.2.xml │ │ │ │ ├── wsj_0004.4.xml │ │ │ │ ├── wsj_0004.5.xml │ │ │ │ ├── wsj_0004.6.xml │ │ │ │ ├── wsj_0004.7.xml │ │ │ │ ├── wsj_0004.8.xml │ │ │ │ ├── wsj_0004.9.xml │ │ │ │ ├── wsj_0005.1.xml │ │ │ │ ├── wsj_0005.2.xml │ │ │ │ ├── wsj_0005.3.xml │ │ │ │ ├── wsj_0007.1.xml │ │ │ │ ├── wsj_0007.2.xml │ │ │ │ ├── wsj_0007.3.xml │ │ │ │ ├── wsj_0007.4.xml │ │ │ │ ├── wsj_0008.1.xml │ │ │ │ ├── wsj_0008.2.xml │ │ │ │ ├── wsj_0008.3.xml │ │ │ │ ├── wsj_0008.4.xml │ │ │ │ ├── wsj_0008.5.xml │ │ │ │ ├── wsj_0008.6.xml │ │ │ │ ├── wsj_0009.1.xml │ │ │ │ ├── wsj_0009.2.xml │ │ │ │ ├── wsj_0009.3.xml │ │ │ │ ├── wsj_0009.4.xml │ │ │ │ ├── wsj_0010.1.xml │ │ │ │ ├── wsj_0010.10.xml │ │ │ │ ├── wsj_0010.11.xml │ │ │ │ ├── wsj_0010.12.xml │ │ │ │ ├── wsj_0010.13.xml │ │ │ │ ├── wsj_0010.15.xml │ │ │ │ ├── wsj_0010.16.xml │ │ │ │ ├── wsj_0010.17.xml │ │ │ │ ├── wsj_0010.18.xml │ │ │ │ ├── wsj_0010.19.xml │ │ │ │ ├── wsj_0010.2.xml │ │ │ │ ├── wsj_0010.20.xml │ │ │ │ ├── wsj_0010.3.xml │ │ │ │ ├── wsj_0010.6.xml │ │ │ │ ├── wsj_0010.7.xml │ │ │ │ ├── wsj_0010.8.xml │ │ │ │ ├── wsj_0011.1.xml │ │ │ │ ├── wsj_0011.2.xml │ │ │ │ ├── wsj_0011.4.xml │ │ │ │ ├── wsj_0011.5.xml │ │ │ │ ├── wsj_0011.6.xml │ │ │ │ ├── wsj_0011.7.xml │ │ │ │ ├── wsj_0011.8.xml │ │ │ │ ├── wsj_0012.1.xml │ │ │ │ ├── wsj_0012.2.xml │ │ │ │ ├── wsj_0012.3.xml │ │ │ │ ├── wsj_0012.4.xml │ │ │ │ └── wsj_0012.5.xml │ │ ├── wsj.ids │ │ └── wsj.txt │ ├── score │ │ ├── .DS_Store │ │ ├── Makefile │ │ ├── amr │ │ │ ├── 233.gold.amr │ │ │ ├── 233.gold.dot │ │ │ ├── 233.gold.pdf │ │ │ ├── 233.system.amr │ │ │ ├── 233.system.dot │ │ │ ├── 233.system.pdf │ │ │ ├── coli.gold.amr │ │ │ ├── coli.system.amr │ │ │ ├── first.gold.amr │ │ │ ├── first.system.amr │ │ │ ├── partial.gold.mrp │ │ │ ├── partial.system.mrp │ │ │ ├── test1.amr │ │ │ ├── test1.mrp │ │ │ ├── test2.amr │ │ │ └── test2.mrp │ │ ├── dm │ │ │ ├── empty.gold.mrp │ │ │ ├── empty.peking.mrp │ │ │ └── peking.wsj.sdp │ │ ├── eds │ │ │ ├── lpps.102990.png │ │ │ ├── lpps.peking.mrp │ │ │ ├── wsj.pet.eds │ │ │ └── wsj.pet.mrp │ │ ├── lpps.mrp │ │ ├── psd │ │ │ ├── 107480.foxik.mrp │ │ │ ├── 107480.gold.mrp │ │ │ └── peking.brown.sdp │ │ ├── revisions.txt │ │ ├── test.slurm │ │ └── ucca │ │ │ ├── anchors.gold.mrp │ │ │ ├── anchors.tupa.mrp │ │ │ ├── ewt.gold.mrp │ │ │ ├── ewt.tupa.mrp │ │ │ ├── id.mrp │ │ │ ├── koller.mrp │ │ │ ├── small.gold.mrp │ │ │ ├── small.gold.pdf │ │ │ ├── small.tupa.mrp │ │ │ ├── small.tupa.pdf │ │ │ ├── test.gold.mrp │ │ │ ├── test.gold.pdf │ │ │ ├── test.tupa.mrp │ │ │ └── test.tupa.pdf │ ├── validate │ │ ├── Makefile │ │ └── eds │ │ │ └── wsj.mrp │ └── wsj.txt ├── graph.py ├── inspector.py ├── main.py ├── score │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── core.cpython-37.pyc │ │ ├── core.cpython-38.pyc │ │ ├── core.cpython-39.pyc │ │ ├── edm.cpython-37.pyc │ │ ├── edm.cpython-38.pyc │ │ ├── edm.cpython-39.pyc │ │ ├── mces.cpython-37.pyc │ │ ├── mces.cpython-38.pyc │ │ ├── mces.cpython-39.pyc │ │ ├── sdp.cpython-37.pyc │ │ ├── sdp.cpython-38.pyc │ │ ├── sdp.cpython-39.pyc │ │ ├── smatch.cpython-37.pyc │ │ ├── smatch.cpython-38.pyc │ │ ├── smatch.cpython-39.pyc │ │ ├── ucca.cpython-37.pyc │ │ ├── ucca.cpython-38.pyc │ │ └── ucca.cpython-39.pyc │ ├── core.py │ ├── edm.py │ ├── mces.py │ ├── rrhc.py │ ├── sdp.py │ ├── smatch.py │ └── ucca.py ├── setup.py ├── smatch │ ├── LICENSE.txt │ ├── README.md │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── amr.cpython-37.pyc │ │ ├── amr.cpython-38.pyc │ │ ├── amr.cpython-39.pyc │ │ ├── smatch.cpython-37.pyc │ │ ├── smatch.cpython-38.pyc │ │ └── smatch.cpython-39.pyc │ ├── amr.py │ └── smatch.py ├── treewidth.py ├── ucca │ ├── README.md │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── convert.cpython-37.pyc │ │ ├── convert.cpython-38.pyc │ │ ├── convert.cpython-39.pyc │ │ ├── core.cpython-37.pyc │ │ ├── core.cpython-38.pyc │ │ ├── core.cpython-39.pyc │ │ ├── ioutil.cpython-37.pyc │ │ ├── ioutil.cpython-38.pyc │ │ ├── ioutil.cpython-39.pyc │ │ ├── layer0.cpython-37.pyc │ │ ├── layer0.cpython-38.pyc │ │ ├── layer0.cpython-39.pyc │ │ ├── layer1.cpython-37.pyc │ │ ├── layer1.cpython-38.pyc │ │ ├── layer1.cpython-39.pyc │ │ ├── normalization.cpython-37.pyc │ │ ├── normalization.cpython-38.pyc │ │ ├── normalization.cpython-39.pyc │ │ ├── textutil.cpython-37.pyc │ │ ├── textutil.cpython-38.pyc │ │ └── textutil.cpython-39.pyc │ ├── convert.py │ ├── core.py │ ├── ioutil.py │ ├── layer0.py │ ├── layer1.py │ ├── normalization.py │ └── textutil.py ├── validate │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── amr.cpython-37.pyc │ │ ├── amr.cpython-38.pyc │ │ ├── amr.cpython-39.pyc │ │ ├── core.cpython-37.pyc │ │ ├── core.cpython-38.pyc │ │ ├── core.cpython-39.pyc │ │ ├── eds.cpython-37.pyc │ │ ├── eds.cpython-38.pyc │ │ ├── eds.cpython-39.pyc │ │ ├── sdp.cpython-37.pyc │ │ ├── sdp.cpython-38.pyc │ │ ├── sdp.cpython-39.pyc │ │ ├── ucca.cpython-37.pyc │ │ ├── ucca.cpython-38.pyc │ │ ├── ucca.cpython-39.pyc │ │ ├── utilities.cpython-37.pyc │ │ ├── utilities.cpython-38.pyc │ │ └── utilities.cpython-39.pyc │ ├── amr.py │ ├── core.py │ ├── eds.py │ ├── sdp.py │ ├── ucca.py │ └── utilities.py └── version.py ├── perin ├── .DS_Store ├── config │ ├── .DS_Store │ ├── __init__.py │ ├── edge_ace_e.yaml │ ├── edge_ace_e_p.yaml │ ├── edge_ace_e_pp.yaml │ ├── edge_ace_e_ppp.yaml │ └── params.py ├── convert.sh ├── data │ ├── .DS_Store │ ├── __init__.py │ ├── batch.py │ ├── dataset.py │ ├── field │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── anchor_field.py │ │ ├── anchored_label_field.py │ │ ├── basic_field.py │ │ ├── bert_field.py │ │ ├── edge_field.py │ │ ├── edge_label_field.py │ │ ├── field.py │ │ ├── label_field.py │ │ ├── mini_torchtext │ │ │ ├── __pycache__ │ │ │ │ ├── example.cpython-39.pyc │ │ │ │ ├── field.cpython-39.pyc │ │ │ │ ├── pipeline.cpython-39.pyc │ │ │ │ ├── utils.cpython-39.pyc │ │ │ │ └── vocab.cpython-39.pyc │ │ │ ├── example.py │ │ │ ├── field.py │ │ │ ├── pipeline.py │ │ │ ├── utils.py │ │ │ └── vocab.py │ │ └── nested_field.py │ └── parser │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── from_mrp │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── abstract_parser.py │ │ ├── evaluation_parser.py │ │ ├── labeled_edge_parser.py │ │ └── request_parser.py │ │ ├── json_parser.py │ │ └── to_mrp │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── abstract_parser.py │ │ └── labeled_edge_parser.py ├── inference.py ├── model │ ├── .DS_Store │ ├── __init__.py │ ├── head │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── abstract_head.py │ │ └── labeled_edge_head.py │ ├── model.py │ └── module │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── anchor_classifier.py │ │ ├── biaffine.py │ │ ├── bilinear.py │ │ ├── char_embedding.py │ │ ├── edge_classifier.py │ │ ├── encoder.py │ │ └── transformer.py ├── run.sh ├── run_infer.sh ├── train.py └── utility │ ├── .DS_Store │ ├── __init__.py │ ├── autoclip.py │ ├── cross_entropy.py │ ├── hungarian_matching.py │ ├── initialize.py │ ├── loading_bar.py │ ├── log.py │ ├── parser_utils.py │ ├── predict.py │ ├── schedule │ ├── __init__.py │ ├── linear_lr.py │ └── multi_scheduler.py │ ├── subtokenize.py │ └── utils.py └── preprocess ├── .DS_Store ├── convert_dygie.py ├── convert_oneie.py ├── extract_ace_events.py └── extractor.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/.DS_Store -------------------------------------------------------------------------------- /convert_to_mrp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #indata -> input json file to be converted 4 | #outdata -> output mrp file of converted graphs 5 | # $1 -> dataset: ace_en, ace_p_en, ace_pp_en, ace_ppp_en 6 | 7 | 8 | for split in train test dev; do 9 | indata=dataset/raw/"$1"/"$split".json 10 | outdata=dataset/labeled_edge_mrp/"$1"/"$split".mrp 11 | 12 | python mtool/main.py --strings --ids --read ace --write mrp "$indata" "$outdata" 13 | done; -------------------------------------------------------------------------------- /dataset/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/.DS_Store -------------------------------------------------------------------------------- /dataset/labeled_edge_mrp/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/labeled_edge_mrp/.DS_Store -------------------------------------------------------------------------------- /dataset/labeled_edge_mrp/ace_en/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/labeled_edge_mrp/ace_en/.DS_Store -------------------------------------------------------------------------------- /dataset/labeled_edge_mrp/ace_p_en/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/labeled_edge_mrp/ace_p_en/.DS_Store -------------------------------------------------------------------------------- /dataset/labeled_edge_mrp/ace_pp_en/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/labeled_edge_mrp/ace_pp_en/.DS_Store -------------------------------------------------------------------------------- /dataset/labeled_edge_mrp/ace_ppp_en/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/labeled_edge_mrp/ace_ppp_en/.DS_Store -------------------------------------------------------------------------------- /dataset/raw/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/raw/.DS_Store -------------------------------------------------------------------------------- /dataset/raw/ace_en/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/raw/ace_en/.DS_Store -------------------------------------------------------------------------------- /dataset/raw/ace_p_en/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/raw/ace_p_en/.DS_Store -------------------------------------------------------------------------------- /dataset/raw/ace_ppp_en/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/raw/ace_ppp_en/.DS_Store -------------------------------------------------------------------------------- /dataset/splits/dev.txt: -------------------------------------------------------------------------------- 1 | CNN_CF_20030303_1900_02 2 | CNN_IP_20030329_1600_00_2 3 | CNN_IP_20030402_1600_00_1 4 | CNN_IP_20030405_1600_01_1 5 | CNN_IP_20030409_1600_02 6 | marcellapr_20050228_2219 7 | rec_games_chess_politics_20041216_1047 8 | rec_games_chess_politics_20041217_2111 9 | soc_org_nonprofit_20050218_1902 10 | FLOPPINGACES_20050217_1237_014 11 | AGGRESSIVEVOICEDAILY_20041116_1347 12 | FLOPPINGACES_20041117_2002_024 13 | FLOPPINGACES_20050203_1953_038 14 | TTRACY_20050223_1049 15 | CNNHL_ENG_20030304_142751_10 16 | CNNHL_ENG_20030424_123502_25 17 | CNNHL_ENG_20030513_220910_32 18 | CNN_ENG_20030304_173120_16 19 | CNN_ENG_20030328_150609_10 20 | CNN_ENG_20030424_070008_15 21 | CNN_ENG_20030512_170454_13 22 | CNN_ENG_20030620_085840_7 23 | AFP_ENG_20030304_0250 24 | AFP_ENG_20030305_0918 25 | AFP_ENG_20030311_0491 26 | AFP_ENG_20030314_0238 27 | AFP_ENG_20030319_0879 28 | AFP_ENG_20030320_0722 29 | AFP_ENG_20030327_0022 30 | AFP_ENG_20030327_0224 31 | -------------------------------------------------------------------------------- /dataset/splits/test.txt: -------------------------------------------------------------------------------- 1 | AFP_ENG_20030401_0476 2 | AFP_ENG_20030413_0098 3 | AFP_ENG_20030415_0734 4 | AFP_ENG_20030417_0004 5 | AFP_ENG_20030417_0307 6 | AFP_ENG_20030417_0764 7 | AFP_ENG_20030418_0556 8 | AFP_ENG_20030425_0408 9 | AFP_ENG_20030427_0118 10 | AFP_ENG_20030428_0720 11 | AFP_ENG_20030429_0007 12 | AFP_ENG_20030430_0075 13 | AFP_ENG_20030502_0614 14 | AFP_ENG_20030504_0248 15 | AFP_ENG_20030508_0118 16 | AFP_ENG_20030508_0357 17 | AFP_ENG_20030509_0345 18 | AFP_ENG_20030514_0706 19 | AFP_ENG_20030519_0049 20 | AFP_ENG_20030519_0372 21 | AFP_ENG_20030522_0878 22 | AFP_ENG_20030527_0616 23 | AFP_ENG_20030528_0561 24 | AFP_ENG_20030530_0132 25 | AFP_ENG_20030601_0262 26 | AFP_ENG_20030607_0030 27 | AFP_ENG_20030616_0715 28 | AFP_ENG_20030617_0846 29 | AFP_ENG_20030625_0057 30 | AFP_ENG_20030630_0271 31 | APW_ENG_20030304_0555 32 | APW_ENG_20030306_0191 33 | APW_ENG_20030308_0314 34 | APW_ENG_20030310_0719 35 | APW_ENG_20030311_0775 36 | APW_ENG_20030318_0689 37 | APW_ENG_20030319_0545 38 | APW_ENG_20030322_0119 39 | APW_ENG_20030324_0768 40 | APW_ENG_20030325_0786 41 | -------------------------------------------------------------------------------- /dataset/splits2/dev.doc.txt: -------------------------------------------------------------------------------- 1 | CNN_CF_20030303.1900.02 2 | CNN_IP_20030329.1600.00-2 3 | CNN_IP_20030402.1600.00-1 4 | CNN_IP_20030405.1600.01-1 5 | CNN_IP_20030409.1600.02 6 | marcellapr_20050228.2219 7 | rec.games.chess.politics_20041217.2111 8 | soc.org.nonprofit_20050218.1902 9 | FLOPPINGACES_20050217.1237.014 10 | AGGRESSIVEVOICEDAILY_20041116.1347 11 | FLOPPINGACES_20041117.2002.024 12 | FLOPPINGACES_20050203.1953.038 13 | TTRACY_20050223.1049 14 | CNNHL_ENG_20030304_142751.10 15 | CNNHL_ENG_20030424_123502.25 16 | CNNHL_ENG_20030513_220910.32 17 | CNN_ENG_20030304_173120.16 18 | CNN_ENG_20030328_150609.10 19 | CNN_ENG_20030424_070008.15 20 | CNN_ENG_20030512_170454.13 21 | CNN_ENG_20030620_085840.7 22 | AFP_ENG_20030305.0918 23 | AFP_ENG_20030311.0491 24 | AFP_ENG_20030314.0238 25 | AFP_ENG_20030319.0879 26 | AFP_ENG_20030320.0722 27 | AFP_ENG_20030327.0022 28 | AFP_ENG_20030327.0224 29 | -------------------------------------------------------------------------------- /dataset/splits2/test.doc.txt: -------------------------------------------------------------------------------- 1 | AFP_ENG_20030401.0476 2 | AFP_ENG_20030413.0098 3 | AFP_ENG_20030415.0734 4 | AFP_ENG_20030417.0004 5 | AFP_ENG_20030417.0307 6 | AFP_ENG_20030417.0764 7 | AFP_ENG_20030418.0556 8 | AFP_ENG_20030425.0408 9 | AFP_ENG_20030427.0118 10 | AFP_ENG_20030428.0720 11 | AFP_ENG_20030429.0007 12 | AFP_ENG_20030430.0075 13 | AFP_ENG_20030502.0614 14 | AFP_ENG_20030504.0248 15 | AFP_ENG_20030508.0118 16 | AFP_ENG_20030508.0357 17 | AFP_ENG_20030509.0345 18 | AFP_ENG_20030514.0706 19 | AFP_ENG_20030519.0049 20 | AFP_ENG_20030519.0372 21 | AFP_ENG_20030522.0878 22 | AFP_ENG_20030527.0616 23 | AFP_ENG_20030528.0561 24 | AFP_ENG_20030530.0132 25 | AFP_ENG_20030601.0262 26 | AFP_ENG_20030607.0030 27 | AFP_ENG_20030616.0715 28 | AFP_ENG_20030617.0846 29 | AFP_ENG_20030625.0057 30 | AFP_ENG_20030630.0271 31 | APW_ENG_20030304.0555 32 | APW_ENG_20030306.0191 33 | APW_ENG_20030308.0314 34 | APW_ENG_20030310.0719 35 | APW_ENG_20030311.0775 36 | APW_ENG_20030318.0689 37 | APW_ENG_20030319.0545 38 | APW_ENG_20030322.0119 39 | APW_ENG_20030324.0768 40 | APW_ENG_20030325.0786 41 | -------------------------------------------------------------------------------- /evaluation/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/evaluation/.DS_Store -------------------------------------------------------------------------------- /evaluation/evaluate_single_dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | from evaluate import convert_event_to_tuple, trigger_f1, argument_f1, argument_span_f1 3 | import argparse 4 | 5 | 6 | def evaluate(gold_file, pred_file): 7 | 8 | with open(gold_file) as f: 9 | gold = json.load(f) 10 | 11 | with open(pred_file) as f: 12 | preds = json.load(f) 13 | 14 | tgold = dict([(s["sent_id"], convert_event_to_tuple(s)) for s in gold]) 15 | tpreds = dict([(s["sent_id"], convert_event_to_tuple(s)) for s in preds]) 16 | 17 | g = sorted(tgold.keys()) 18 | p = sorted(tpreds.keys()) 19 | 20 | if g != p: 21 | print("Missing some sentences!") 22 | return 0.0, 0.0, 0.0 23 | 24 | trigger_idf = trigger_f1(tgold, tpreds, classification=False) 25 | trigger_cls = trigger_f1(tgold, tpreds, classification=True) 26 | 27 | 28 | 29 | argument_idf = argument_f1(tgold, tpreds, classification=False) 30 | argument_cls = argument_f1(tgold, tpreds, classification=True) 31 | 32 | results = { 33 | 'trigger_identification': trigger_idf, 34 | 'trigger_classification': trigger_cls, 35 | 'argument_identification': argument_idf, 36 | 'argument_classification': argument_cls 37 | } 38 | 39 | return results 40 | 41 | 42 | def evaluate_span(gold_file, pred_file, overlap=0.75): 43 | 44 | with open(gold_file) as f: 45 | gold = json.load(f) 46 | 47 | with open(pred_file) as f: 48 | preds = json.load(f) 49 | 50 | tgold = dict([(s["sent_id"], convert_event_to_tuple(s)) for s in gold]) 51 | tpreds = dict([(s["sent_id"], convert_event_to_tuple(s)) for s in preds]) 52 | 53 | g = sorted(tgold.keys()) 54 | p = sorted(tpreds.keys()) 55 | 56 | if g != p: 57 | print("Missing some sentences!") 58 | return 0.0, 0.0, 0.0 59 | 60 | trigger_idf = trigger_f1(tgold, tpreds, classification=False) 61 | trigger_cls = trigger_f1(tgold, tpreds, classification=True) 62 | 63 | 64 | 65 | argument_idf = argument_span_f1(tgold, tpreds, classification=False, overlap=overlap) 66 | argument_cls = argument_span_f1(tgold, tpreds, classification=True, overlap=overlap) 67 | 68 | results = { 69 | 'trigger_identification': trigger_idf, 70 | 'trigger_classification': trigger_cls, 71 | 'argument_identification': argument_idf, 72 | 'argument_classification': argument_cls 73 | } 74 | 75 | return results 76 | 77 | def main(): 78 | parser = argparse.ArgumentParser() 79 | parser.add_argument("gold_file", help="gold json file") 80 | parser.add_argument("pred_file", help="prediction json file") 81 | parser.add_argument("--span_overlap", help="argument overlap ratio", default=1, type=float) 82 | 83 | args = parser.parse_args() 84 | 85 | if args.span_overlap < 1: 86 | results = evaluate_span(args.gold_file, args.pred_file, overlap=args.span_overlap) 87 | print(f"Evaluate arguments with span overlap ratio of: {args.span_overlap}\n") 88 | else: 89 | results = evaluate(args.gold_file, args.pred_file) 90 | 91 | print(json.dumps(results, indent=2)) 92 | print() 93 | print(list(results.values())) 94 | 95 | 96 | if __name__ == "__main__": 97 | main() 98 | -------------------------------------------------------------------------------- /mtool/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/.DS_Store -------------------------------------------------------------------------------- /mtool/.appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | PYTHON: C:\Python37-x64 3 | matrix: 4 | - TEST: "score dm.edm.json" 5 | - TEST: "score eds.edm.json" 6 | - TEST: "score eds.smatch.json" 7 | - TEST: "score eds.mrp.json" 8 | - TEST: "score dm.sdp.json" 9 | - TEST: "score ucca.ucca.json" 10 | - TEST: "score ucca.smatch.json" 11 | - TEST: "score ucca.mrp.json" 12 | - TEST: "score test.smatch.json" 13 | - TEST: "score coli.smatch.json" 14 | - TEST: "score coli.mrp.json" 15 | - TEST: "score unit" 16 | - TEST: "sample all" 17 | - TEST: "validate all" 18 | 19 | init: 20 | - cmd: choco install make 21 | - set PATH=%PYTHON%;%PYTHON%\Scripts;%PATH% 22 | - cmd: copy %PYTHON%\python.exe %PYTHON%\python3.exe 23 | 24 | install: 25 | - pip install . 26 | 27 | build: off 28 | 29 | test_script: 30 | - make -C data/%TEST% 31 | -------------------------------------------------------------------------------- /mtool/.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: false 3 | group: edge 4 | language: python 5 | python: 3.6 6 | install: pip install . 7 | env: 8 | - TEST="score dm.edm.json" 9 | - TEST="score eds.edm.json" 10 | - TEST="score eds.smatch.json" 11 | - TEST="score eds.mrp.json" 12 | - TEST="score dm.sdp.json" 13 | - TEST="score ucca.ucca.json" 14 | - TEST="score ucca.smatch.json" 15 | - TEST="score ucca.mrp.json" 16 | - TEST="score test.smatch.json" 17 | - TEST="score coli.smatch.json" 18 | - TEST="score coli.mrp.json" 19 | - TEST="score unit" 20 | - TEST="sample all" 21 | - TEST="validate all" 22 | script: 23 | - make -C data/$TEST 24 | -------------------------------------------------------------------------------- /mtool/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: history regression 2 | 3 | history: 4 | git log --pretty=tformat:"%H %ae %ai %s" -- score/mces.py 5 | 6 | regression: 7 | [ -d etc ] || mkdir etc; \ 8 | [ -d tmp ] || mkdir tmp; \ 9 | for i in $$(awk '{print $$1}' data/score/revisions.txt); do \ 10 | [ -d etc/$${i} ] || mkdir etc/$${i}; \ 11 | ( cd tmp; \ 12 | [ -d $${i} ] || git clone git@github.com:cfmrp/mtool.git $${i}; \ 13 | cd $${i}; git checkout $${i}; \ 14 | cd data/score; sbatch ../../../../data/score/test.slurm; ) \ 15 | done 16 | -------------------------------------------------------------------------------- /mtool/__pycache__/analyzer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/analyzer.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/analyzer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/analyzer.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/analyzer.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/analyzer.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/graph.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/graph.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/graph.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/graph.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/graph.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/graph.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/inspector.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/inspector.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/inspector.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/inspector.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/inspector.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/inspector.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/main.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/main.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/treewidth.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/treewidth.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/treewidth.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/treewidth.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/__pycache__/treewidth.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/treewidth.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__init__.py -------------------------------------------------------------------------------- /mtool/codec/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/ace.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/ace.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/amr.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/amr.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/amr.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/amr.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/amr.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/amr.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/conllu.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/conllu.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/conllu.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/conllu.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/conllu.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/conllu.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/eds.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/eds.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/eds.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/eds.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/eds.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/eds.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/mrp.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/mrp.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/mrp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/mrp.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/mrp.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/mrp.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/norec.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/norec.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/norec.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/norec.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/norec.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/norec.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/pmb.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/pmb.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/pmb.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/pmb.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/pmb.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/pmb.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/sdp.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/sdp.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/sdp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/sdp.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/sdp.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/sdp.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/treex.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/treex.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/treex.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/treex.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/treex.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/treex.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/ucca.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/ucca.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/ucca.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/ucca.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/codec/__pycache__/ucca.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/ucca.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/codec/ace.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | 4 | from graph import Graph 5 | 6 | def read(fp, text=None): 7 | def anchor(node): 8 | anchors = list() 9 | for string in node[1]: 10 | string = string.split(":") 11 | anchors.append({"from": int(string[0]), "to": int(string[1])}) 12 | return anchors 13 | 14 | for native in json.load(fp): 15 | map = dict() 16 | 17 | try: 18 | graph = Graph(native["sent_id"], flavor=1, framework="ace") 19 | graph.add_input(native["text"]) 20 | 21 | top = graph.add_node(top=True) 22 | 23 | for event in native['events']: 24 | 25 | trigger = event["trigger"] 26 | 27 | key = tuple(event['trigger'][1]) 28 | if key in map: 29 | trigger = map[key] 30 | else: 31 | trigger = graph.add_node( 32 | anchors=anchor(trigger) 33 | ) 34 | map[key] = trigger 35 | 36 | graph.add_edge(top.id, trigger.id, event["event_type"]) 37 | 38 | 39 | arguments = event["arguments"] 40 | 41 | if len(arguments): 42 | for argument in arguments: 43 | arg_role = argument[-1] 44 | key = tuple(argument[1]) 45 | if key in map: 46 | argument = map[key] 47 | else: 48 | argument = graph.add_node( 49 | anchors=anchor(argument) 50 | ) 51 | map[key] = argument 52 | 53 | graph.add_edge(trigger.id, argument.id, arg_role) 54 | yield graph, None 55 | 56 | except Exception as error: 57 | print( 58 | f"codec.ace.read(): ignoring {native}: {error}", 59 | file=sys.stderr 60 | ) 61 | 62 | 63 | def get_text_span(node, text): 64 | anchored_text = [text[anchor['from']:anchor['to']] for anchor in node.anchors] 65 | anchors = [f"{anchor['from']}:{anchor['to']}" for anchor in node.anchors] 66 | return anchored_text, anchors 67 | 68 | 69 | 70 | def write(graph, input): 71 | try: 72 | return write_labeled_edge(graph, input) 73 | except Exception as error: 74 | print(f"Problem with decoding sentence {graph.id}") 75 | raise error 76 | 77 | 78 | def write_labeled_edge(graph, input): 79 | 80 | nodes = {node.id: node for node in graph.nodes} 81 | 82 | # create events 83 | events = {} 84 | for edge in graph.edges: 85 | 86 | if edge.src == 0: 87 | node = nodes[edge.tgt] 88 | events[node.id] = { 89 | 'event_type': edge.lab, 90 | 'trigger': [*get_text_span(node, input)], 91 | 'arguments': [] 92 | } 93 | 94 | # add event arguments 95 | for edge in graph.edges: 96 | if edge.src != 0: 97 | 98 | node = nodes[edge.tgt] 99 | anchored_text, anchors = get_text_span(node, input) 100 | 101 | events[edge.src]['arguments'].append([anchored_text, anchors, edge.lab]) 102 | 103 | sentence = { 104 | "sent_id": graph.id, 105 | "text": input, 106 | "events": list(events.values()) 107 | } 108 | return sentence 109 | 110 | -------------------------------------------------------------------------------- /mtool/codec/eds.py: -------------------------------------------------------------------------------- 1 | import os.path; 2 | import re; 3 | 4 | from graph import Graph; 5 | 6 | EDS_MATCHER = re.compile(r'(.+?)(?$"); 10 | 11 | def read_instances(fp): 12 | top_handle, predicates = None, []; 13 | sentence_id = None; 14 | try: 15 | sentence_id = int(os.path.splitext(os.path.basename(fp.name))[0]); 16 | except: 17 | pass; 18 | first_curly = True 19 | for line in fp: 20 | line = line.strip() 21 | if len(line) == 0: 22 | pass 23 | elif line.startswith("#"): 24 | sentence_id = line[1:] 25 | first_curly = True 26 | elif line.startswith("{"): 27 | colon = line.index(":") 28 | assert colon >= 0 29 | top_handle = line[1:colon].strip() 30 | elif line.endswith("}"): 31 | assert len(line) == 1 32 | if first_curly: 33 | assert sentence_id is not None 34 | assert top_handle is not None 35 | assert len(predicates) > 0 36 | yield (sentence_id, top_handle, predicates) 37 | sentence_id, top_handle, predicates = None, None, [] 38 | first_curly = False 39 | else: 40 | match = EDS_MATCHER.match(line) 41 | assert match is not None 42 | node_id, label, arguments = match.groups() 43 | arguments = [tuple(arg.split()) for arg in arguments.split(',') if len(arg) > 0] 44 | predicates.append((node_id, label.strip(), arguments)) 45 | 46 | def instance2graph(instance, reify = False, text = None): 47 | sentence_id, top, predicates = instance; 48 | anchors = None; 49 | graph = Graph(sentence_id, flavor = 1, framework = "eds"); 50 | if text: graph.add_input(text); 51 | handle2node = {}; 52 | for handle, label, _ in predicates: 53 | assert handle not in handle2node 54 | properties = None; 55 | values = None; 56 | match = PROPERTIES_MATCHER.search(label); 57 | if match: 58 | label = label[:match.start()]; 59 | fields = match.group(1).replace(",", "").split(); 60 | properties, values = list(), list(); 61 | for i, field in enumerate(fields[1:]): 62 | if i % 2 == 0: properties.append(field); 63 | else: values.append(field); 64 | carg = None; 65 | match = CARG_MATCHER.search(label); 66 | if match: 67 | label = label[:match.start()]; 68 | if not reify: 69 | properties = ["CARG"] + properties; 70 | values = [match.group(1)] + values; 71 | else: 72 | carg = match.group(1); 73 | anchors = None; 74 | match = LNK_MATCHER.search(label); 75 | if match: 76 | label = label[:match.start()]; 77 | anchors = [{"from": int(match.group(1)), "to": int(match.group(2))}]; 78 | handle2node[handle] = \ 79 | graph.add_node(label = label, properties = properties, values = values, anchors = anchors); 80 | if carg and reify: 81 | carg = graph.add_node(label = carg, anchors = anchors); 82 | source = handle2node[handle].id; 83 | target = carg.id; 84 | graph.add_edge(source, target, "CARG"); 85 | handle2node[top].is_top = True 86 | for src_handle, _, arguments in predicates: 87 | src = handle2node[src_handle].id 88 | for relation, tgt_handle in arguments: 89 | tgt = handle2node[tgt_handle].id 90 | graph.add_edge(src, tgt, relation) 91 | return graph 92 | 93 | def read(fp, reify = False, text = None): 94 | for instance in read_instances(fp): 95 | yield instance2graph(instance, reify, text), None 96 | -------------------------------------------------------------------------------- /mtool/codec/mrp.py: -------------------------------------------------------------------------------- 1 | import json; 2 | import operator; 3 | import os; 4 | import sys; 5 | 6 | from graph import Graph 7 | 8 | def read(fp, text = None, robust = False): 9 | input, i = None, 0; 10 | def compute(form): 11 | nonlocal i; 12 | m = None; 13 | j = input.find(form, i); 14 | if j >= i: 15 | i, m = j, len(form); 16 | else: 17 | base = form; 18 | k, l = len(input), 0; 19 | for old, new in {("‘", "`"), ("‘", "'"), ("’", "'"), ("`", "'"), 20 | ("“", "\""), ("”", "\""), 21 | ("–", "--"), ("–", "---"), ("—", "---"), 22 | ("…", "..."), ("…", ". . .")}: 23 | form = base.replace(old, new); 24 | j = input.find(form, i); 25 | if j >= i and j < k: k, l = j, len(form); 26 | if k < len(input): i, m = k, l; 27 | if m: 28 | match = {"from": i, "to": i + m}; 29 | i += m; 30 | return match; 31 | else: 32 | raise Exception("failed to anchor |{}| in |{}|{}| ({})" 33 | "".format(form, input[:i], input[i:], i)); 34 | 35 | def anchor(graph, old, new): 36 | nonlocal input, i; 37 | strings = dict(); 38 | for node in graph.nodes: 39 | for j in range(len(node.anchors) if node.anchors else 0): 40 | start, end = node.anchors[j]["from"], node.anchors[j]["to"]; 41 | strings[(start, end)] = old[start:end]; 42 | input, i = new, 0; 43 | for key in sorted(strings.keys(), key = operator.itemgetter(0, 1)): 44 | strings[key] = compute(strings[key]); 45 | for node in graph.nodes: 46 | for j in range(len(node.anchors) if node.anchors else 0): 47 | node.anchors[j] \ 48 | = strings[(node.anchors[j]["from"], node.anchors[j]["to"])]; 49 | 50 | for j, line in enumerate(fp): 51 | try: 52 | graph = Graph.decode(json.loads(line.rstrip()), robust = robust); 53 | if text is not None: 54 | if graph.input in text: 55 | graph.id = text[graph.input]; 56 | else: 57 | old = graph.input; 58 | graph.add_input(text); 59 | anchor(graph, old, graph.input); 60 | yield graph, None; 61 | except Exception as error: 62 | print("codec.mrp.read(): ignoring line {}: {}" 63 | "".format(j, error), file = sys.stderr); 64 | -------------------------------------------------------------------------------- /mtool/codec/sdp.py: -------------------------------------------------------------------------------- 1 | from graph import Graph; 2 | 3 | def read_matrix(file): 4 | rows = []; 5 | for line in file: 6 | line = line.rstrip(); 7 | if len(line) == 0: 8 | return rows; 9 | else: 10 | rows.append(line.split("\t")); 11 | return rows or None 12 | 13 | def read_matrices(file): 14 | file.readline().rstrip(); 15 | matrix = read_matrix(file); 16 | while matrix: 17 | yield matrix; 18 | matrix = read_matrix(file); 19 | 20 | def matrix2graph(matrix, framework = None, text = None): 21 | graph = Graph(matrix[0][0][1:], flavor = 0, framework = framework); 22 | predicates = []; 23 | for id, row in enumerate(matrix[1:]): 24 | lemma, pos, frame, top = row[2], row[3], row[6], row[4] == '+'; 25 | if lemma == "_": lemma = row[1]; 26 | properties = {"pos": pos}; 27 | if frame != "_": properties["frame"] = frame; 28 | node = graph.add_node(id, label = lemma, 29 | properties = list(properties.keys()), 30 | values = list(properties.values()), 31 | top = top, anchors = [row[1]] if text else None); 32 | if row[5] == '+': 33 | predicates.append(id); 34 | for tgt, row in enumerate(matrix[1:]): 35 | for pred, label in enumerate(row[7:]): 36 | if label != '_': 37 | src = predicates[pred]; 38 | edge = graph.add_edge(src, tgt, label); 39 | if text: 40 | graph.add_input(text); 41 | graph.anchor(); 42 | # 43 | # finally, purge singleton (isolated) nodes 44 | # 45 | graph.nodes = [node for node in graph.nodes if not node.is_singleton()]; 46 | return graph; 47 | 48 | def read(fp, framework = None, text = None): 49 | for matrix in read_matrices(fp): 50 | yield matrix2graph(matrix, framework, text), None; 51 | -------------------------------------------------------------------------------- /mtool/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/.DS_Store -------------------------------------------------------------------------------- /mtool/data/sample/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: amr/pdf dm/pdf eds/pdf psd/pdf ucca/pdf \ 2 | clean release all 3 | 4 | amr/wsj.mrp: wsj.ids ../wsj.txt amr/wsj.amr 5 | for i in $$(cat wsj.ids); do \ 6 | ../../main.py --text ../wsj.txt --read amr \ 7 | --id $$i --write mrp ./amr/wsj.amr; \ 8 | done > $@; 9 | 10 | amr/pdf: 11 | [ ! -d amr/dot ] && mkdir amr/dot; 12 | [ ! -d amr/pdf ] && mkdir amr/pdf; 13 | for i in $$(cat wsj.ids); do \ 14 | ../../main.py --text ../wsj.txt --read amr \ 15 | --id $$i --write dot \ 16 | ./amr/wsj.amr ./amr/dot/$$i.dot; \ 17 | done 18 | rm $$(find ./amr/dot -size 0); 19 | for i in ./amr/dot/*.dot; do \ 20 | j=$$(basename $$i .dot); \ 21 | dot -Tpdf $$i > ./amr/pdf/$${j}.pdf; \ 22 | done 23 | 24 | dm/wsj.mrp: wsj.ids ../wsj.txt dm/wsj.sdp 25 | for i in $$(cat wsj.ids); do \ 26 | ../../main.py --text ../wsj.txt --read dm \ 27 | --id $$i --write mrp ./dm/wsj.sdp; \ 28 | done > $@; 29 | 30 | dm/pdf: 31 | [ ! -d dm/dot ] && mkdir dm/dot; 32 | [ ! -d dm/pdf ] && mkdir dm/pdf; 33 | for i in $$(cat wsj.ids); do \ 34 | ../../main.py --text ../wsj.txt --read dm \ 35 | --id $$i --write dot \ 36 | ./dm/wsj.sdp ./dm/dot/$$i.dot; \ 37 | done 38 | for i in ./dm/dot/*.dot; do \ 39 | j=$$(basename $$i .dot); \ 40 | dot -Tpdf $$i > ./dm/pdf/$${j}.pdf; \ 41 | done 42 | 43 | eds/wsj.mrp: wsj.ids ../wsj.txt eds/wsj.eds 44 | for i in $$(cat wsj.ids); do \ 45 | ../../main.py --text ../wsj.txt --read eds \ 46 | --id $$i --write mrp ./eds/wsj.eds; \ 47 | done > $@; 48 | 49 | eds/pdf: 50 | [ ! -d eds/dot ] && mkdir eds/dot; 51 | [ ! -d eds/pdf ] && mkdir eds/pdf; 52 | for i in $$(cat wsj.ids); do \ 53 | ../../main.py --text ../wsj.txt --read eds \ 54 | --id $$i --write dot \ 55 | ./eds/wsj.eds ./eds/dot/$$i.dot; \ 56 | done 57 | for i in ./eds/dot/*.dot; do \ 58 | j=$$(basename $$i .dot); \ 59 | dot -Tpdf $$i > ./eds/pdf/$${j}.pdf; \ 60 | done 61 | 62 | psd/wsj.mrp: wsj.ids ../wsj.txt psd/wsj.sdp 63 | for i in $$(cat wsj.ids); do \ 64 | ../../main.py --text ../wsj.txt --read psd \ 65 | --id $$i --write mrp ./psd/wsj.sdp; \ 66 | done > $@; 67 | 68 | psd/pdf: 69 | [ ! -d psd/dot ] && mkdir psd/dot; 70 | [ ! -d psd/pdf ] && mkdir psd/pdf; 71 | for i in $$(cat wsj.ids); do \ 72 | ../../main.py --text ../wsj.txt --read dm \ 73 | --id $$i --write dot \ 74 | ./psd/wsj.sdp ./psd/dot/$$i.dot; \ 75 | done 76 | for i in ./psd/dot/*.dot; do \ 77 | j=$$(basename $$i .dot); \ 78 | dot -Tpdf $$i > ./psd/pdf/$${j}.pdf; \ 79 | done 80 | 81 | ucca/wsj.mrp: wsj.ids ../wsj.txt ucca/xml/files.txt ucca/xml/*.xml 82 | for i in $$(cat wsj.ids); do \ 83 | ../../main.py --text ../wsj.txt --read ucca \ 84 | --id $$i --write mrp ./ucca/xml/files.txt; \ 85 | done > $@; 86 | 87 | ucca/pdf: 88 | [ ! -d ucca/dot ] && mkdir ucca/dot; 89 | [ ! -d ucca/pdf ] && mkdir ucca/pdf; 90 | for i in $$(cat wsj.ids); do \ 91 | ../../main.py --text ../wsj.txt --read ucca \ 92 | --id $$i --write dot --strings \ 93 | ./ucca/xml/files.txt ./ucca/dot/$$i.dot; \ 94 | done 95 | rm $$(find ./ucca/dot -size 0); 96 | for i in ./ucca/dot/*.dot; do \ 97 | j=$$(basename $$i .dot); \ 98 | dot -Tpdf $$i > ./ucca/pdf/$${j}.pdf; \ 99 | done 100 | 101 | clean: 102 | rm */wsj.mrp */dot/*.dot */pdf/*pdf 103 | 104 | release: 105 | tar zpScvf ../public/sample.tgz --transform='s@^@mrp/2019/sample/@'\ 106 | README.txt Makefile \ 107 | amr/wsj.mrp dm/wsj.mrp eds/wsj.mrp psd/wsj.mrp ucca/wsj.mrp \ 108 | amr/dot amr/pdf dm/dot dm/pdf eds/dot eds/pdf \ 109 | psd/dot psd/pdf ucca/dot ucca/pdf 110 | 111 | all: amr/wsj.mrp dm/wsj.mrp eds/wsj.mrp psd/wsj.mrp ucca/wsj.mrp 112 | 113 | -------------------------------------------------------------------------------- /mtool/data/sample/README.txt: -------------------------------------------------------------------------------- 1 | 2 | CoNLL 2019 Shared Task: Meaning Representation Parsing --- Sample Graphs 3 | 4 | Version 0.9; April 9, 2019 5 | 6 | 7 | Overview 8 | ======== 9 | 10 | This directory contains a collection of 89 sample graphs in the five framworks 11 | represented in the task: AMR, DM, EDS, PSD, and UCCA. The sentences are drawn 12 | from Section 00 of (the Penn Treebank selection from) the venerable Wall Street 13 | Journal (WSJ) Corpus. We only include sentences for which all five graph banks 14 | provide annotations. 15 | 16 | The purpose of this sample data is twofold: (a) exemplify the uniform graph 17 | representation format (serialized in JSON) adopted for the task and (b) enable 18 | in-depth linguistic comparison across frameworks. 19 | 20 | For general information on the file format, please see: 21 | 22 | http://mrp.nlpl.eu/index.php?page=4#format 23 | 24 | 25 | Contents 26 | ======== 27 | 28 | The main contents in this release are the JSON files: 29 | 30 | $ ls -l */*.mrp 31 | -rw-r--r--. 1 oe oe 145935 Apr 8 00:11 amr/wsj.mrp 32 | -rw-r--r--. 1 oe oe 290495 Apr 8 00:12 dm/wsj.mrp 33 | -rw-r--r--. 1 oe oe 334885 Apr 8 00:13 eds/wsj.mrp 34 | -rw-r--r--. 1 oe oe 225669 Apr 8 00:14 psd/wsj.mrp 35 | -rw-r--r--. 1 oe oe 254101 Apr 9 16:07 ucca/wsj.mrp 36 | 37 | Each file contains the 89 graphs in the intersection of all frameworks (87 in 38 | the case for UCCA, for the time being). These graph serializations are in what 39 | is called the JSON Lines format, effectively a stream of JSON objects with line 40 | breaks as the separator character between objects. 41 | 42 | To ease human inspection of these graphs, this package also provides graphical 43 | renderings of all graphs, as separate files (one per sentence) in the ‘dot/’ 44 | and ‘pdf/’ sub-directories for each framework. These visualizations have been 45 | created using the MRP graph toolkit, which will be released by mid-May 2019. 46 | 47 | 48 | Known Limitations 49 | ================= 50 | 51 | None, for the time being. 52 | 53 | 54 | Release History 55 | =============== 56 | 57 | [Version 0.9; April 9, 2018] 58 | 59 | + First release of sample graphs in five frameworks: AMR, DM, EDS, UCCA, and PSD. 60 | 61 | 62 | Contact 63 | ======= 64 | 65 | For questions or comments, please do not hesitate to email the task organizers 66 | at: ‘mrp-organizers@nlpl.eu’. 67 | 68 | Omri Abend 69 | Jan Hajič 70 | Daniel Hershcovich 71 | Marco Kuhlmann 72 | Stephan Oepen 73 | Tim O'Gorman 74 | Nianwen Xue 75 | -------------------------------------------------------------------------------- /mtool/data/sample/ucca/xml/files.txt: -------------------------------------------------------------------------------- 1 | wsj_0001.1.xml 2 | wsj_0001.2.xml 3 | wsj_0002.1.xml 4 | wsj_0003.1.xml 5 | wsj_0003.2.xml 6 | wsj_0003.3.xml 7 | wsj_0003.4.xml 8 | wsj_0003.5.xml 9 | wsj_0003.7.xml 10 | wsj_0003.8.xml 11 | wsj_0003.9.xml 12 | wsj_0003.10.xml 13 | wsj_0003.11.xml 14 | wsj_0003.12.xml 15 | wsj_0003.13.xml 16 | wsj_0003.14.xml 17 | wsj_0003.15.xml 18 | wsj_0003.16.xml 19 | wsj_0003.17.xml 20 | wsj_0003.18.xml 21 | wsj_0003.19.xml 22 | wsj_0003.20.xml 23 | wsj_0003.21.xml 24 | wsj_0003.22.xml 25 | wsj_0003.23.xml 26 | wsj_0003.24.xml 27 | wsj_0003.25.xml 28 | wsj_0003.26.xml 29 | wsj_0003.27.xml 30 | wsj_0003.28.xml 31 | wsj_0003.29.xml 32 | wsj_0003.30.xml 33 | wsj_0004.1.xml 34 | wsj_0004.2.xml 35 | wsj_0004.4.xml 36 | wsj_0004.5.xml 37 | wsj_0004.6.xml 38 | wsj_0004.7.xml 39 | wsj_0004.8.xml 40 | wsj_0004.9.xml 41 | wsj_0004.10.xml 42 | wsj_0004.11.xml 43 | wsj_0004.12.xml 44 | wsj_0004.14.xml 45 | wsj_0004.15.xml 46 | wsj_0004.16.xml 47 | wsj_0004.17.xml 48 | wsj_0005.1.xml 49 | wsj_0005.2.xml 50 | wsj_0005.3.xml 51 | wsj_0007.1.xml 52 | wsj_0007.2.xml 53 | wsj_0007.3.xml 54 | wsj_0007.4.xml 55 | wsj_0008.1.xml 56 | wsj_0008.2.xml 57 | wsj_0008.3.xml 58 | wsj_0008.4.xml 59 | wsj_0008.5.xml 60 | wsj_0008.6.xml 61 | wsj_0009.1.xml 62 | wsj_0009.2.xml 63 | wsj_0009.3.xml 64 | wsj_0009.4.xml 65 | wsj_0010.1.xml 66 | wsj_0010.2.xml 67 | wsj_0010.3.xml 68 | wsj_0010.6.xml 69 | wsj_0010.7.xml 70 | wsj_0010.8.xml 71 | wsj_0010.10.xml 72 | wsj_0010.11.xml 73 | wsj_0010.12.xml 74 | wsj_0010.13.xml 75 | wsj_0010.15.xml 76 | wsj_0010.16.xml 77 | wsj_0010.17.xml 78 | wsj_0010.18.xml 79 | wsj_0010.19.xml 80 | wsj_0010.20.xml 81 | wsj_0011.1.xml 82 | wsj_0011.2.xml 83 | wsj_0011.4.xml 84 | wsj_0011.5.xml 85 | wsj_0011.6.xml 86 | wsj_0011.7.xml 87 | wsj_0011.8.xml 88 | wsj_0012.1.xml 89 | wsj_0012.2.xml 90 | wsj_0012.3.xml 91 | wsj_0012.4.xml 92 | wsj_0012.5.xml -------------------------------------------------------------------------------- /mtool/data/sample/ucca/xml/wsj_0010.2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /mtool/data/sample/ucca/xml/wsj_0010.8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /mtool/data/sample/wsj.ids: -------------------------------------------------------------------------------- 1 | 20001001 2 | 20001002 3 | 20003001 4 | 20003002 5 | 20003003 6 | 20003005 7 | 20003007 8 | 20003008 9 | 20003009 10 | 20003010 11 | 20003011 12 | 20003012 13 | 20003013 14 | 20003014 15 | 20003015 16 | 20003016 17 | 20003017 18 | 20003018 19 | 20003019 20 | 20003020 21 | 20003021 22 | 20003022 23 | 20003023 24 | 20003024 25 | 20003025 26 | 20003026 27 | 20003027 28 | 20003028 29 | 20003029 30 | 20003030 31 | 20004001 32 | 20004002 33 | 20004004 34 | 20004005 35 | 20004006 36 | 20004007 37 | 20004008 38 | 20004009 39 | 20004010 40 | 20004011 41 | 20004012 42 | 20004014 43 | 20004015 44 | 20004016 45 | 20004017 46 | 20005001 47 | 20005002 48 | 20005003 49 | 20006001 50 | 20006002 51 | 20007002 52 | 20007003 53 | 20007004 54 | 20008001 55 | 20008002 56 | 20008003 57 | 20008004 58 | 20008005 59 | 20008006 60 | 20009001 61 | 20009002 62 | 20009003 63 | 20009004 64 | 20010001 65 | 20010002 66 | 20010003 67 | 20010006 68 | 20010007 69 | 20010008 70 | 20010010 71 | 20010011 72 | 20010012 73 | 20010013 74 | 20010015 75 | 20010016 76 | 20010017 77 | 20010018 78 | 20010019 79 | 20010020 80 | 20011001 81 | 20011002 82 | 20011004 83 | 20011005 84 | 20011006 85 | 20011007 86 | 20011008 87 | 20012002 88 | 20012004 89 | 20012005 90 | -------------------------------------------------------------------------------- /mtool/data/score/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/.DS_Store -------------------------------------------------------------------------------- /mtool/data/score/amr/233.gold.amr: -------------------------------------------------------------------------------- 1 | (j / join-up-02 :ARG0 (c / country :name (n / name :op1 "U.S.") :mod (p2 / person :ARG0-of (o / observe-01))) :ARG1 (p / project)) 2 | 3 | -------------------------------------------------------------------------------- /mtool/data/score/amr/233.gold.dot: -------------------------------------------------------------------------------- 1 | digraph "233" { 2 | top [ style=invis ]; 3 | top -> 0; 4 | 0 [ label=<
join-up-02
> ]; 5 | 1 [ label=<
country
> ]; 6 | 2 [ label=<
name
op1U.S.
> ]; 7 | 3 [ label=<
person
> ]; 8 | 4 [ label=<
observe-01
> ]; 9 | 5 [ label=<
project
> ]; 10 | 0 -> 1 [ label="ARG0" ]; 11 | 3 -> 4 [ label="(ARG0)-of" ]; 12 | 1 -> 2 [ label="name" ]; 13 | 1 -> 3 [ label="mod (domain)" ]; 14 | 0 -> 5 [ label="ARG1" ]; 15 | } 16 | 17 | -------------------------------------------------------------------------------- /mtool/data/score/amr/233.gold.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/amr/233.gold.pdf -------------------------------------------------------------------------------- /mtool/data/score/amr/233.system.amr: -------------------------------------------------------------------------------- 1 | (f / join-up-02 :ARG1 (e / project) :prep-as (u_1104 / observe-01 :ARG0 (c4 / country :name (n2 / name :op1 "U.S.") :ARG0-of f))) 2 | 3 | -------------------------------------------------------------------------------- /mtool/data/score/amr/233.system.dot: -------------------------------------------------------------------------------- 1 | digraph "233" { 2 | top [ style=invis ]; 3 | top -> 0; 4 | 0 [ label=<
join-up-02
> ]; 5 | 1 [ label=<
project
> ]; 6 | 2 [ label=<
observe-01
> ]; 7 | 3 [ label=<
country
> ]; 8 | 4 [ label=<
name
op1U.S.
> ]; 9 | 0 -> 1 [ label="ARG1" ]; 10 | 3 -> 4 [ label="name" ]; 11 | 0 -> 2 [ label="prep-as" ]; 12 | 2 -> 3 [ label="ARG0" ]; 13 | 3 -> 0 [ label="(ARG0)-of" ]; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /mtool/data/score/amr/233.system.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/amr/233.system.pdf -------------------------------------------------------------------------------- /mtool/data/score/amr/first.gold.amr: -------------------------------------------------------------------------------- 1 | (c / claim-01 :ARG0 (p / partisan :poss (p2 / person :name (n / name :op1 "Ronald" :op2 "Reagan"))) :ARG1 (w / win-01 :ARG0 p2 :ARG2 (w2 / war :name (n2 / name :op1 "Cold" :op2 "War"))) :time (c2 / collapse-01 :ARG1 (c3 / country :name (n3 / name :op1 "Soviet" :op2 "Union")) :time (d / date-entity :year 1991))) 2 | 3 | -------------------------------------------------------------------------------- /mtool/data/score/amr/first.system.amr: -------------------------------------------------------------------------------- 1 | (f / claim-01 :ARG0 (u_2 / person :ARG0-of (o / partisan :ARG1 (p / person :name (n / name :op1 (explicitanon3 / Ronald :year-of (d / date-entity :time-of (s3 / collapse-01 :ARG1 (c4 / country :name (n2 / name :op1 "Soviet" :op2 "Union")) :time-of f))) :op2 "Reagan")) :ARG0-of (a2 / win-01 :ARG2 (e / war-01 :mod (u_1 / cold)) :ARG1-of f))) 2 | 3 | -------------------------------------------------------------------------------- /mtool/data/score/amr/partial.gold.mrp: -------------------------------------------------------------------------------- 1 | {"edges":[{"label":"ARG1","source":1,"target":2},{"label":"op2","source":0,"target":3},{"label":"ARG1","source":3,"target":4},{"label":"op1","source":0,"target":1}],"flavor":2,"framework":"amr","id":"bolt-eng-DF-170-181103-8882762_0111.33","input":"Lowering wages/Breaking Unions.","nodes":[{"id":0,"label":"slash"},{"id":1,"label":"lower-05"},{"id":2,"label":"wage"},{"id":3,"label":"break-01"},{"id":4,"label":"union"}],"time":"2019-04-10 (20:10)","tops":[0],"version":"0.9"} 2 | -------------------------------------------------------------------------------- /mtool/data/score/amr/partial.system.mrp: -------------------------------------------------------------------------------- 1 | {"edges":[{"label":"ARG1","source":1,"target":2},{"label":"op2","source":0,"target":3},{"label":"ARG1","source":3,"target":4},{"label":"op1","source":0,"target":1}],"flavor":2,"framework":"amr","id":"bolt-eng-DF-170-181103-8882762_0111.33","input":"Lowering wages/Breaking Unions.","nodes":[{"id":0,"label":"slash"},{"id":1,"label":"lower-05"},{"id":2,"label":"wage"},{"id":3,"label":"break-01", "anchors" : []},{"id":4,"label":"union"}],"time":"2019-04-10 (20:10)","tops":[0],"version":"0.9"} 2 | -------------------------------------------------------------------------------- /mtool/data/score/amr/test1.amr: -------------------------------------------------------------------------------- 1 | # ::id isi_0001.1 ::date 2012-05-14T21:45:29 2 | # ::snt The boy wants the girl to believe him. 3 | (w / want-01 4 | :ARG0 (b / boy) 5 | :ARG1 (b2 / believe-01 6 | :ARG0 (g / girl) 7 | :ARG1 b)) 8 | 9 | # ::id isi_0001.25 ::date 2012-05-14T21:59:17 10 | # ::snt The boy is a hard worker. 11 | (p / person 12 | :domain (b / boy) 13 | :ARG0-of (w / work-01 14 | :manner (h / hard))) 15 | 16 | # ::id isi_0002.209 ::date 2013-05-16T17:19:07 17 | # ::snt The poet William Shakespeare was born in Stratford-upon-Avon. 18 | (b / bear-02 19 | :ARG1 (p / poet :name (n / name :op1 "William" :op2 "Shakespeare")) 20 | :location (c / city :name (n2 / name :op1 "Stratford-upon-Avon"))) 21 | 22 | -------------------------------------------------------------------------------- /mtool/data/score/amr/test1.mrp: -------------------------------------------------------------------------------- 1 | {"id": "isi_0001.1", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:13)", "input": "The boy wants the girl to believe him.", "tops": [0], "nodes": [{"id": 0, "label": "want-01"}, {"id": 1, "label": "boy"}, {"id": 2, "label": "believe-01"}, {"id": 3, "label": "girl"}], "edges": [{"source": 2, "target": 3, "label": "ARG0"}, {"source": 2, "target": 1, "label": "ARG1"}, {"source": 0, "target": 1, "label": "ARG0"}, {"source": 0, "target": 2, "label": "ARG1"}]} 2 | {"id": "isi_0001.25", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:13)", "input": "The boy is a hard worker.", "tops": [0], "nodes": [{"id": 0, "label": "person"}, {"id": 1, "label": "boy"}, {"id": 2, "label": "work-01"}, {"id": 3, "label": "hard"}], "edges": [{"source": 0, "target": 1, "label": "domain"}, {"source": 2, "target": 3, "label": "manner"}, {"source": 0, "target": 2, "label": "ARG0-of", "normal": "ARG0"}]} 3 | {"id": "isi_0002.209", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:13)", "input": "The poet William Shakespeare was born in Stratford-upon-Avon.", "tops": [0], "nodes": [{"id": 0, "label": "bear-02"}, {"id": 1, "label": "poet"}, {"id": 2, "label": "name", "properties": ["op1", "op2"], "values": ["William", "Shakespeare"]}, {"id": 3, "label": "city"}, {"id": 4, "label": "name", "properties": ["op1"], "values": ["Stratford-upon-Avon"]}], "edges": [{"source": 0, "target": 3, "label": "location"}, {"source": 3, "target": 4, "label": "name"}, {"source": 1, "target": 2, "label": "name"}, {"source": 0, "target": 1, "label": "ARG1"}]} 4 | -------------------------------------------------------------------------------- /mtool/data/score/amr/test2.amr: -------------------------------------------------------------------------------- 1 | # ::id isi_0001.1 ::date 2012-05-14T21:45:29 2 | # ::snt The boy wants the girl to believe him. 3 | (w / want-01 4 | :ARG0 (b / boy) 5 | :ARG1 (b2 / believe-01 6 | :ARG0 (g / girl) 7 | :ARG1 (h / he))) 8 | 9 | # ::id isi_0001.25 ::date 2012-05-14T21:59:17 10 | # ::snt The boy is a hard worker. 11 | (w / worker 12 | :mod (h / hard) 13 | :domain (b / boy)) 14 | 15 | # ::id isi_0002.209 ::date 2013-05-16T17:19:07 16 | # ::snt The poet William Shakespeare was born in Stratford-upon-Avon. 17 | (b / bear-02 18 | :ARG1 (p / poet :name (n / name :op1 william :op2 "shakespeare")) 19 | :location (c / city :name (n2 / name :op1 "Stratford-upon-Avon"))) 20 | 21 | -------------------------------------------------------------------------------- /mtool/data/score/amr/test2.mrp: -------------------------------------------------------------------------------- 1 | {"id": "isi_0001.1", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:16)", "input": "The boy wants the girl to believe him.", "tops": [0], "nodes": [{"id": 0, "label": "want-01"}, {"id": 1, "label": "boy"}, {"id": 2, "label": "believe-01"}, {"id": 3, "label": "girl"}, {"id": 4, "label": "he"}], "edges": [{"source": 0, "target": 1, "label": "ARG0"}, {"source": 2, "target": 3, "label": "ARG0"}, {"source": 2, "target": 4, "label": "ARG1"}, {"source": 0, "target": 2, "label": "ARG1"}]} 2 | {"id": "isi_0001.25", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:16)", "input": "The boy is a hard worker.", "tops": [0], "nodes": [{"id": 0, "label": "worker"}, {"id": 1, "label": "hard"}, {"id": 2, "label": "boy"}], "edges": [{"source": 0, "target": 2, "label": "domain"}, {"source": 0, "target": 1, "label": "mod", "normal": "domain"}]} 3 | {"id": "isi_0002.209", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:16)", "input": "The poet William Shakespeare was born in Stratford-upon-Avon.", "tops": [0], "nodes": [{"id": 0, "label": "bear-02"}, {"id": 1, "label": "poet"}, {"id": 2, "label": "name", "properties": ["op1", "op2"], "values": ["william", "shakespeare"]}, {"id": 3, "label": "city"}, {"id": 4, "label": "name", "properties": ["op1"], "values": ["Stratford-upon-Avon"]}], "edges": [{"source": 3, "target": 4, "label": "name"}, {"source": 1, "target": 2, "label": "name"}, {"source": 0, "target": 3, "label": "location"}, {"source": 0, "target": 1, "label": "ARG1"}]} 4 | -------------------------------------------------------------------------------- /mtool/data/score/dm/empty.gold.mrp: -------------------------------------------------------------------------------- 1 | {"id": "22100001", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-06-23", "input": "Consumers may want to move their telephones a little closer to the TV set.", "nodes": [], "edges": []} 2 | {"id": "22100002", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-06-23", "input": "Couch-potato jocks watching ABC's \"Monday Night Football\" can now vote during halftime for the greatest play in 20 years from among four or five filmed replays.", "tops": [], "nodes": null, "edges": null} 3 | {"id": "22100003", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-06-23", "input": "Two weeks ago, viewers of several NBC daytime consumer segments started calling a 900 number for advice on various life-style issues.", "tops": [11], "nodes": [{"id": 0, "label": "two", "properties": ["pos", "frame"], "values": ["CD", "card:i-i-c"], "anchors": [{"from": 0, "to": 3}]}, {"id": 1, "label": "week", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 4, "to": 9}]}, {"id": 2, "label": "ago", "properties": ["pos", "frame"], "values": ["RB", "p:e-i-u"], "anchors": [{"from": 10, "to": 13}]}, {"id": 4, "label": "viewer", "properties": ["pos", "frame"], "values": ["NNS", "n_of:x-i"], "anchors": [{"from": 15, "to": 22}]}, {"id": 6, "label": "several", "properties": ["pos", "frame"], "values": ["JJ", "a:e-p"], "anchors": [{"from": 26, "to": 33}]}, {"id": 7, "label": "NBC", "properties": ["pos", "frame"], "values": ["NNP", "named:x-c"], "anchors": [{"from": 34, "to": 37}]}, {"id": 8, "label": "daytime", "properties": ["pos", "frame"], "values": ["JJ", "n:x"], "anchors": [{"from": 38, "to": 45}]}, {"id": 9, "label": "consumer", "properties": ["pos", "frame"], "values": ["NN", "n_of:x-i"], "anchors": [{"from": 46, "to": 54}]}, {"id": 10, "label": "segment", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 55, "to": 63}]}, {"id": 11, "label": "start", "properties": ["pos", "frame"], "values": ["VBD", "v:e-h"], "anchors": [{"from": 64, "to": 71}]}, {"id": 12, "label": "call", "properties": ["pos", "frame"], "values": ["VBG", "v:e-i-p"], "anchors": [{"from": 72, "to": 79}]}, {"id": 13, "label": "a", "properties": ["pos", "frame"], "values": ["DT", "q:i-h-h"], "anchors": [{"from": 80, "to": 81}]}, {"id": 14, "label": "900", "properties": ["pos", "frame"], "values": ["CD", "card:i-i-c"], "anchors": [{"from": 82, "to": 85}]}, {"id": 15, "label": "number", "properties": ["pos", "frame"], "values": ["NN", "n_of:x"], "anchors": [{"from": 86, "to": 92}]}, {"id": 16, "label": "for", "properties": ["pos", "frame"], "values": ["IN", "p:e-u-i"], "anchors": [{"from": 93, "to": 96}]}, {"id": 17, "label": "advice", "properties": ["pos", "frame"], "values": ["NN", "n:x"], "anchors": [{"from": 97, "to": 103}]}, {"id": 18, "label": "on", "properties": ["pos", "frame"], "values": ["IN", "p:e-u-i"], "anchors": [{"from": 104, "to": 106}]}, {"id": 19, "label": "various", "properties": ["pos", "frame"], "values": ["JJ", "a:e-p"], "anchors": [{"from": 107, "to": 114}]}, {"id": 20, "label": "style", "properties": ["pos", "frame"], "values": ["NN", "n_of:x"], "anchors": [{"from": 115, "to": 125}]}, {"id": 21, "label": "issue", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 126, "to": 132}]}], "edges": [{"source": 2, "target": 11, "label": "ARG1"}, {"source": 8, "target": 10, "label": "compound"}, {"source": 16, "target": 12, "label": "ARG1"}, {"source": 13, "target": 15, "label": "BV"}, {"source": 0, "target": 1, "label": "ARG1"}, {"source": 9, "target": 10, "label": "compound"}, {"source": 14, "target": 15, "label": "ARG1"}, {"source": 12, "target": 4, "label": "ARG1"}, {"source": 18, "target": 17, "label": "ARG1"}, {"source": 2, "target": 1, "label": "ARG2"}, {"source": 12, "target": 15, "label": "ARG2"}, {"source": 19, "target": 21, "label": "ARG1"}, {"source": 11, "target": 12, "label": "ARG1"}, {"source": 6, "target": 10, "label": "ARG1"}, {"source": 20, "target": 21, "label": "compound"}, {"source": 4, "target": 10, "label": "ARG1"}, {"source": 16, "target": 17, "label": "ARG2"}, {"source": 7, "target": 10, "label": "compound"}, {"source": 18, "target": 21, "label": "ARG2"}]} 4 | -------------------------------------------------------------------------------- /mtool/data/score/eds/lpps.102990.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/eds/lpps.102990.png -------------------------------------------------------------------------------- /mtool/data/score/psd/107480.foxik.mrp: -------------------------------------------------------------------------------- 1 | {"id": "107480", "flavor": 0, "framework": "psd", "version": 1.0, "time": "2019-08-01 (16:21)", "input": "I own three volcanoes, which I clean out every week (for I also clean out the one that is extinct; one never knows).", "tops": [1], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 1}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 1, "anchors": [{"from": 2, "to": 5}], "label": "own", "properties": ["pos", "frame"], "values": ["VBP", "ev-w2176f1"]}, {"id": 2, "anchors": [{"from": 6, "to": 11}], "label": "three", "properties": ["pos"], "values": ["CD"]}, {"id": 3, "anchors": [{"from": 12, "to": 21}], "label": "volcanoe", "properties": ["pos"], "values": ["NNS"]}, {"id": 4, "anchors": [{"from": 23, "to": 28}], "label": "which", "properties": ["pos"], "values": ["WDT"]}, {"id": 5, "anchors": [{"from": 29, "to": 30}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 6, "anchors": [{"from": 31, "to": 36}], "label": "clean_out", "properties": ["pos", "frame"], "values": ["VBP", "ev-w544f1"]}, {"id": 7, "anchors": [{"from": 41, "to": 46}], "label": "every", "properties": ["pos"], "values": ["DT"]}, {"id": 8, "anchors": [{"from": 47, "to": 51}], "label": "week", "properties": ["pos"], "values": ["NN"]}, {"id": 9, "anchors": [{"from": 57, "to": 58}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 10, "anchors": [{"from": 59, "to": 63}], "label": "also", "properties": ["pos"], "values": ["RB"]}, {"id": 11, "anchors": [{"from": 64, "to": 69}], "label": "clean_out", "properties": ["pos", "frame"], "values": ["VBP", "ev-w544f1"]}, {"id": 12, "anchors": [{"from": 78, "to": 81}], "label": "one", "properties": ["pos"], "values": ["NN"]}, {"id": 13, "anchors": [{"from": 82, "to": 86}], "label": "that", "properties": ["pos"], "values": ["WDT"]}, {"id": 14, "anchors": [{"from": 90, "to": 97}], "label": "extinct", "properties": ["pos"], "values": ["JJ"]}, {"id": 15, "anchors": [{"from": 97, "to": 98}], "label": "#Semicolon", "properties": ["pos"], "values": [":"]}, {"id": 16, "anchors": [{"from": 99, "to": 102}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 17, "anchors": [{"from": 103, "to": 108}], "label": "never", "properties": ["pos"], "values": ["RB"]}, {"id": 18, "anchors": [{"from": 109, "to": 114}], "label": "know", "properties": ["pos", "frame"], "values": ["VBZ", "ev-w1810f1"]}], "edges": [{"source": 1, "target": 0, "label": "ACT-arg"}, {"source": 1, "target": 3, "label": "PAT-arg"}, {"source": 1, "target": 11, "label": "CAUS"}, {"source": 3, "target": 2, "label": "RSTR"}, {"source": 3, "target": 6, "label": "DESCR"}, {"source": 6, "target": 4, "label": "PAT-arg"}, {"source": 6, "target": 5, "label": "ACT-arg"}, {"source": 6, "target": 8, "label": "THO"}, {"source": 8, "target": 7, "label": "RSTR"}, {"source": 11, "target": 4, "label": "PAT-arg"}, {"source": 11, "target": 5, "label": "ACT-arg"}, {"source": 11, "target": 9, "label": "ACT-arg"}, {"source": 11, "target": 10, "label": "RHEM"}, {"source": 11, "target": 12, "label": "PAT-arg"}, {"source": 15, "target": 11, "label": "CSQ.member"}, {"source": 15, "target": 18, "label": "CONJ.member"}, {"source": 18, "target": 16, "label": "ACT-arg"}, {"source": 18, "target": 17, "label": "TWHEN"}, {"source": 1, "target": 0, "label": "ACT-arg"}, {"source": 3, "target": 2, "label": "RSTR"}, {"source": 1, "target": 3, "label": "PAT-arg"}, {"source": 6, "target": 4, "label": "PAT-arg"}, {"source": 11, "target": 4, "label": "PAT-arg"}, {"source": 6, "target": 5, "label": "ACT-arg"}, {"source": 11, "target": 5, "label": "ACT-arg"}, {"source": 3, "target": 6, "label": "DESCR"}, {"source": 8, "target": 7, "label": "RSTR"}, {"source": 6, "target": 8, "label": "THO"}, {"source": 11, "target": 9, "label": "ACT-arg"}, {"source": 11, "target": 10, "label": "RHEM"}, {"source": 1, "target": 11, "label": "CAUS"}, {"source": 15, "target": 11, "label": "CSQ.member"}, {"source": 11, "target": 12, "label": "PAT-arg"}, {"source": 18, "target": 16, "label": "ACT-arg"}, {"source": 18, "target": 17, "label": "TWHEN"}, {"source": 15, "target": 18, "label": "CONJ.member"}]} 2 | -------------------------------------------------------------------------------- /mtool/data/score/psd/107480.gold.mrp: -------------------------------------------------------------------------------- 1 | {"id": "107480", "flavor": 0, "framework": "psd", "version": 1.0, "time": "2019-06-23", "input": "I own three volcanoes, which I clean out every week (for I also clean out the one that is extinct; one never knows).", "tops": [1], "nodes": [{"id": 0, "label": "#PersPron", "properties": ["pos"], "values": ["PRP"], "anchors": [{"from": 0, "to": 1}]}, {"id": 1, "label": "own", "properties": ["pos"], "values": ["VBP"], "anchors": [{"from": 2, "to": 5}]}, {"id": 2, "label": "three", "properties": ["pos"], "values": ["CD"], "anchors": [{"from": 6, "to": 11}]}, {"id": 3, "label": "volcano", "properties": ["pos"], "values": ["NNS"], "anchors": [{"from": 12, "to": 21}]}, {"id": 5, "label": "which", "properties": ["pos"], "values": ["WDT"], "anchors": [{"from": 23, "to": 28}]}, {"id": 6, "label": "#PersPron", "properties": ["pos"], "values": ["PRP"], "anchors": [{"from": 29, "to": 30}]}, {"id": 7, "label": "clean_out", "properties": ["pos"], "values": ["VBP"], "anchors": [{"from": 31, "to": 36}]}, {"id": 9, "label": "every", "properties": ["pos"], "values": ["DT"], "anchors": [{"from": 41, "to": 46}]}, {"id": 10, "label": "week", "properties": ["pos"], "values": ["NN"], "anchors": [{"from": 47, "to": 51}]}, {"id": 13, "label": "#PersPron", "properties": ["pos"], "values": ["PRP"], "anchors": [{"from": 57, "to": 58}]}, {"id": 14, "label": "also", "properties": ["pos"], "values": ["RB"], "anchors": [{"from": 59, "to": 63}]}, {"id": 15, "label": "clean_out", "properties": ["pos"], "values": ["VBP"], "anchors": [{"from": 64, "to": 69}]}, {"id": 18, "label": "one", "properties": ["pos"], "values": ["NN"], "anchors": [{"from": 78, "to": 81}]}, {"id": 19, "label": "that", "properties": ["pos"], "values": ["WDT"], "anchors": [{"from": 82, "to": 86}]}, {"id": 20, "label": "be", "properties": ["pos"], "values": ["VBZ"], "anchors": [{"from": 87, "to": 89}]}, {"id": 21, "label": "extinct", "properties": ["pos"], "values": ["JJ"], "anchors": [{"from": 90, "to": 97}]}, {"id": 23, "label": "one", "properties": ["pos"], "values": ["CD"], "anchors": [{"from": 99, "to": 102}]}, {"id": 24, "label": "never", "properties": ["pos"], "values": ["RB"], "anchors": [{"from": 103, "to": 108}]}, {"id": 25, "label": "know", "properties": ["pos"], "values": ["VBZ"], "anchors": [{"from": 109, "to": 114}]}], "edges": [{"source": 7, "target": 6, "label": "ACT-arg"}, {"source": 18, "target": 20, "label": "RSTR"}, {"source": 15, "target": 18, "label": "PAT-arg"}, {"source": 15, "target": 13, "label": "ACT-arg"}, {"source": 25, "target": 24, "label": "TWHEN"}, {"source": 15, "target": 25, "label": "CAUS"}, {"source": 3, "target": 7, "label": "RSTR"}, {"source": 15, "target": 14, "label": "RHEM"}, {"source": 1, "target": 15, "label": "CAUS"}, {"source": 20, "target": 19, "label": "ACT-arg"}, {"source": 1, "target": 0, "label": "ACT-arg"}, {"source": 25, "target": 23, "label": "ACT-arg"}, {"source": 3, "target": 2, "label": "RSTR"}, {"source": 20, "target": 21, "label": "PAT-arg"}, {"source": 7, "target": 10, "label": "THO"}, {"source": 7, "target": 5, "label": "PAT-arg"}, {"source": 1, "target": 3, "label": "PAT-arg"}, {"source": 10, "target": 9, "label": "RSTR"}]} 2 | -------------------------------------------------------------------------------- /mtool/data/score/psd/peking.brown.sdp: -------------------------------------------------------------------------------- 1 | Representation type: PSD 2 | # Evaluation 3 | 4 | Gold standard file: ../test/en.ood.psd.sdp 5 | System output file: Peking/en.ood.closed.psd.1.sdp 6 | 7 | ## Scores including virtual dependencies to top nodes 8 | 9 | ### Labeled scores 10 | 11 | Number of edges in gold standard: 21396 12 | Number of edges in system output: 19411 13 | Number of edges in common: 14877 14 | 15 | LP: 0.766421 16 | LR: 0.695317 17 | LF: 0.729140 18 | LM: 0.171444 19 | 20 | ### Unlabeled scores 21 | 22 | Number of unlabeled edges in gold standard: 21396 23 | Number of unlabeled edges in system output: 19411 24 | Number of unlabeled edges in common: 17432 25 | 26 | UP: 0.898047 27 | UR: 0.814732 28 | UF: 0.854363 29 | UM: 0.358031 30 | 31 | ### Complete predications 32 | 33 | Number of complete predications in gold standard: 3919 34 | Number of complete predications in system output: 3900 35 | Number of complete predications in common: 2048 36 | 37 | PP: 0.525128 38 | PR: 0.522582 39 | PF: 0.523852 40 | 41 | ### Semantic frames 42 | 43 | Number of semantic frames in gold standard: 3919 44 | Number of semantic frames in system output: 3900 45 | Number of semantic frames in common: 1322 46 | 47 | FP: 0.338974 48 | FR: 0.337331 49 | FF: 0.338151 50 | 51 | ### Senses 52 | 53 | Number of senses in gold standard: 3919 54 | Number of senses in system output: 3900 55 | Number of senses in common: 2171 56 | 57 | SP: 0.556667 58 | SR: 0.553968 59 | SF: 0.555314 60 | 61 | ## Scores excluding virtual dependencies to top nodes 62 | 63 | ### Labeled scores 64 | 65 | Number of edges in gold standard: 19058 66 | Number of edges in system output: 17181 67 | Number of edges in common: 12790 68 | 69 | LP: 0.744427 70 | LR: 0.671109 71 | LF: 0.705869 72 | LM: 0.173067 73 | 74 | ### Unlabeled scores 75 | 76 | Number of unlabeled edges in gold standard: 19058 77 | Number of unlabeled edges in system output: 17181 78 | Number of unlabeled edges in common: 15345 79 | 80 | UP: 0.893138 81 | UR: 0.805174 82 | UF: 0.846878 83 | UM: 0.362358 84 | 85 | ### Complete predications 86 | 87 | Number of complete predications in gold standard: 3919 88 | Number of complete predications in system output: 3900 89 | Number of complete predications in common: 2048 90 | 91 | PP: 0.525128 92 | PR: 0.522582 93 | PF: 0.523852 94 | 95 | ### Semantic frames 96 | 97 | Number of semantic frames in gold standard: 3919 98 | Number of semantic frames in system output: 3900 99 | Number of semantic frames in common: 1322 100 | 101 | FP: 0.338974 102 | FR: 0.337331 103 | FF: 0.338151 104 | 105 | ### Senses 106 | 107 | Number of senses in gold standard: 3919 108 | Number of senses in system output: 3900 109 | Number of senses in common: 2171 110 | 111 | SP: 0.556667 112 | SR: 0.553968 113 | SF: 0.555314 114 | -------------------------------------------------------------------------------- /mtool/data/score/revisions.txt: -------------------------------------------------------------------------------- 1 | 54c0499f55874555c22827a7e61d79aeb8d29906 oe@ifi.uio.no 2019-07-05 23:49:38 +0200 cosmetics; so much for tonight ... 2 | f9ceb0a2090742a67ca89ed26b293fbdcfc292cb daniel.hershcovich@gmail.com 2019-07-05 21:57:08 +0200 Fix dominated dict lookup to be by node id rather than index 3 | 8df18be265c92c11a7fac788d727a2c879e142c4 milan@strakovi.com 2019-07-05 10:13:02 +0200 Another fix for evaluation of empty graphs. 4 | 15187440752dec7819093fa79849ff4b48d7a3d4 oe@ifi.uio.no 2019-07-05 00:55:58 +0200 fine-tuning default limits for MRP and SMATCH scorers; disable RRHC-based initialization for UCCA graphs; allow better control of RRHC and MCES limits from the command line 5 | 0d20656f47ad86352d6de86ce5b193295a3442bd oe@ifi.uio.no 2019-07-03 12:57:38 +0200 cosmetics 6 | 1e2fa352c1384ea6a1005c193ebf1d449a0de1dd oe@ifi.uio.no 2019-07-03 01:41:40 +0200 disable more assertions: is_injective() actually fails on the UCCA test (when initializing from SMATCH) 7 | 8aaa494d5794abc849965dda6fd70208a530c3db oe@ifi.uio.no 2019-07-02 21:33:43 +0200 bug fix: over-counting can apply on the same set of correspondences too 8 | 3cccda87794669573018f08a3717461b6deedfab oe@ifi.uio.no 2019-07-02 17:46:36 +0200 allow initialization from SMATCH hill-climbing; guard against over-counting (see my email to tim of june 30, 2019) 9 | 6c863c9e6233b8d3e81f39e0015333c4c75d5264 daniel.hershcovich@gmail.com 2019-07-01 14:22:24 +0200 Normalization: drop (attribute, value) pairs whose value is the default value 10 | b2145c4fc9ec79624fc84955f373b3387ca02d75 oe@ifi.uio.no 2019-06-30 01:33:24 +0200 give more weight to anchor overlap in UCCA initialization and rewards 11 | c31601c31b0e17639aa9557559d5655bfd55c371 oe@ifi.uio.no 2019-06-30 01:15:07 +0200 bug fix in sorted_splits(); streamlined smatch() interface; cosmetics 12 | 210da9b2e9eff2be7adf988d2865ab77c5ec3447 oe@ifi.uio.no 2019-06-27 22:38:06 +0200 close #20 (prior to scoring, normalize graphs according to the description on the web page) 13 | 1a61ea4484e77a458030a62a62e751e0668e7f11 oe@ifi.uio.no 2019-06-27 13:15:25 +0200 generalize anchor treatment in SMATCH wrapper 14 | b4db1996a894ad70dcb8bc83ba46ddfa354db44e daniel.hershcovich@gmail.com 2019-06-25 11:04:54 +0200 #26 Require leaf status of matched nodes to be the same in UCCA MCES 15 | 8696ffe1fa154acd03a4adbb1813354f198dfeb9 oe@ifi.uio.no 2019-06-20 10:34:00 +0200 fix copy-paste error (owing to a missing generalization) 16 | 274890bdccf3e3e502b755386b7af7fecf39284d oe@ifi.uio.no 2019-06-18 23:59:10 +0200 bug fix: edge attributes 17 | 09c48bd4a8ab8b72d05cea9571000a2e3524bb1b oe@ifi.uio.no 2019-06-18 00:59:52 +0200 activate improved estimate of edge potential 18 | 1c68aa39675291dc998a508e818e63723b0804c0 marco.kuhlmann@liu.se 2019-06-17 23:30:13 +0200 Treat edge attributes properly (closes: #13) 19 | 08e0d8a839b98a395c868cc1bd2e6ca859ef3e05 marco.kuhlmann@liu.se 2019-06-17 22:30:42 +0200 Respect node ordering in bi-lexical graphs (closes: #15) 20 | 7718d1ca50b250e154365e5846981564d7b635d5 oe@ifi.uio.no 2019-06-16 17:10:33 +0200 expose per-item result; rationalize --limit and --trace 21 | -------------------------------------------------------------------------------- /mtool/data/score/test.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --job-name=score 4 | #SBATCH --mail-type=FAIL 5 | #SBATCH --account=nn9447k 6 | #SBATCH --time=12:00:00 7 | #SBATCH --nodes=1 8 | #SBATCH --mem-per-cpu=4G 9 | #SBATCH --ntasks-per-node=8 10 | 11 | commit="$(git log --pretty=format:\%H -n 1)"; 12 | echo "directory: $(pwd)"; 13 | echo "git status: $(git status | head -1)"; 14 | echo "git commit: ${commit}"; 15 | echo; 16 | 17 | source /cluster/bin/jobsetup; 18 | 19 | module purge; 20 | module use -a /projects/nlpl/software/modulefiles; 21 | module load nlpl-python-candy/201902/3.7 nlpl-numpy/1.16.3/3.7; 22 | 23 | /bin/cp ${HOME}/lib/mrp/2019/mtool/data/score/Makefile ./Makefile; 24 | make -j ${SLURM_CPUS_ON_NODE:-4} $(egrep '^[a-z/.]*.json:' Makefile | grep -v all: | sed 's/://'); 25 | if [ -d ./../../../etc/ ]; then 26 | target=../../../../etc/${commit}; 27 | [ -d ${target} ] || mkdir ${target}; 28 | cp -va *.json *.log ${target}; 29 | fi 30 | -------------------------------------------------------------------------------- /mtool/data/score/ucca/anchors.gold.mrp: -------------------------------------------------------------------------------- 1 | {"id": "133601-0004", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (11:29)", "input": "Even though you are expensive.", "tops": [5], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 4}, {"from": 5, "to": 11}]}, {"id": 1, "anchors": [{"from": 12, "to": 15}]}, {"id": 2, "anchors": [{"from": 16, "to": 19}]}, {"id": 3, "anchors": [{"from": 20, "to": 29}]}, {"id": 4, "anchors": [{"from": 29, "to": 30}]}, {"id": 5}, {"id": 6}], "edges": [{"source": 5, "target": 0, "label": "L"}, {"source": 6, "target": 3, "label": "S"}, {"source": 6, "target": 4, "label": "U"}, {"source": 6, "target": 1, "label": "A"}, {"source": 5, "target": 6, "label": "H"}, {"source": 6, "target": 2, "label": "F"}]} 2 | -------------------------------------------------------------------------------- /mtool/data/score/ucca/anchors.tupa.mrp: -------------------------------------------------------------------------------- 1 | {"id": "133601-0004", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (11:31)", "input": "Even though you are expensive.", "tops": [5], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 11}]}, {"id": 1, "anchors": [{"from": 12, "to": 15}]}, {"id": 2, "anchors": [{"from": 16, "to": 19}]}, {"id": 3, "anchors": [{"from": 20, "to": 29}]}, {"id": 4, "anchors": [{"from": 29, "to": 30}]}, {"id": 5}, {"id": 6}], "edges": [{"source": 6, "target": 4, "label": "U"}, {"source": 6, "target": 3, "label": "S"}, {"source": 6, "target": 2, "label": "F"}, {"source": 6, "target": 1, "label": "A"}, {"source": 5, "target": 0, "label": "L"}, {"source": 5, "target": 6, "label": "H"}]} 2 | -------------------------------------------------------------------------------- /mtool/data/score/ucca/koller.mrp: -------------------------------------------------------------------------------- 1 | {"id": "291046-0001", "framework": "ucca", "flavor": 1, "time": "2019-07-17 (10:43)", "version": "0.9", "input": "Hams on Friendly … RIP", "nodes": [{"anchors": [{"from": 0, "to": 4}], "id": 0, "label": "hams", "properties": [], "values": []}, {"anchors": [{"from": 5, "to": 7}], "id": 1, "label": "on", "properties": [], "values": []}, {"anchors": [{"from": 8, "to": 16}], "id": 2, "label": "friendly", "properties": [], "values": []}, {"anchors": [{"from": 17, "to": 20}], "id": 3, "label": "...", "properties": [], "values": []}, {"anchors": [{"from": 21, "to": 24}], "id": 4, "label": "rip", "properties": [], "values": []}, {"id": 4}, {"id": 5}, {"id": 6}], "edges": [{"source": 5, "target": 1, "label": "A"}, {"source": 5, "target": 2, "label": "S"}, {"source": 6, "target": 5, "label": "A"}, {"source": 5, "target": 3, "label": "A"}, {"source": 6, "target": 0, "label": "S"}, {"source": 6, "target": 4, "label": "U"}]} 2 | -------------------------------------------------------------------------------- /mtool/data/score/ucca/small.gold.mrp: -------------------------------------------------------------------------------- 1 | {"id": "001325-0001", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (17:11)", "input": "Highly recommended", "tops": [2], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 6}]}, {"id": 1, "anchors": [{"from": 7, "to": 18}]}, {"id": 2}, {"id": 3}], "edges": [{"source": 3, "target": 1, "label": "S"}, {"source": 2, "target": 3, "label": "H"}, {"source": 3, "target": 0, "label": "D"}]} 2 | -------------------------------------------------------------------------------- /mtool/data/score/ucca/small.gold.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/ucca/small.gold.pdf -------------------------------------------------------------------------------- /mtool/data/score/ucca/small.tupa.mrp: -------------------------------------------------------------------------------- 1 | {"id": "001325-0001", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (17:12)", "input": "Highly recommended", "tops": [2], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 6}]}, {"id": 1, "anchors": [{"from": 7, "to": 18}]}, {"id": 2}, {"id": 3}], "edges": [{"source": 2, "target": 3, "label": "H"}, {"source": 3, "target": 0, "label": "D"}, {"source": 3, "target": 1, "label": "P"}]} 2 | -------------------------------------------------------------------------------- /mtool/data/score/ucca/small.tupa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/ucca/small.tupa.pdf -------------------------------------------------------------------------------- /mtool/data/score/ucca/test.gold.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/ucca/test.gold.pdf -------------------------------------------------------------------------------- /mtool/data/score/ucca/test.tupa.mrp: -------------------------------------------------------------------------------- 1 | {"id": "001325-0002", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (15:48)", "input": "My 8 year old daughter loves this place.", "tops": [10], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 2}]}, {"id": 1, "anchors": [{"from": 3, "to": 4}]}, {"id": 2, "anchors": [{"from": 5, "to": 9}]}, {"id": 3, "anchors": [{"from": 10, "to": 13}]}, {"id": 4, "anchors": [{"from": 14, "to": 22}]}, {"id": 5, "anchors": [{"from": 23, "to": 28}]}, {"id": 6, "anchors": [{"from": 29, "to": 33}]}, {"id": 7, "anchors": [{"from": 34, "to": 39}]}, {"id": 8, "anchors": [{"from": 39, "to": 40}]}, {"id": 9}, {"id": 10}, {"id": 11}, {"id": 12}, {"id": 13}], "edges": [{"source": 11, "target": 13, "label": "A"}, {"source": 13, "target": 7, "label": "C"}, {"source": 9, "target": 3, "label": "E"}, {"source": 13, "target": 8, "label": "U"}, {"source": 11, "target": 5, "label": "P"}, {"source": 13, "target": 6, "label": "E"}, {"source": 9, "target": 0, "label": "E"}, {"source": 10, "target": 11, "label": "H"}, {"source": 12, "target": 2, "label": "C"}, {"source": 11, "target": 9, "label": "D"}, {"source": 9, "target": 4, "label": "C"}, {"source": 12, "target": 1, "label": "E"}, {"source": 9, "target": 12, "label": "E"}]} 2 | {"id": "20003013", "framework": "ucca", "version": 1.0, "time": "2019-07-05", "input": "Among 33 men who worked closely with the substance, 28 have died -- more than three times the expected number.", "nodes": [{"id": 23}, {"id": 0}, {"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}, {"id": 7}, {"id": 8}, {"id": 9}, {"id": 10}, {"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}, {"id": 16}, {"id": 17}, {"id": 18}, {"id": 19}, {"id": 20}, {"id": 21}, {"id": 23}, {"id": 24}], "edges": [{"source": 23, "target": 24, "label": "U"}, {"source": 23, "target": 23, "label": "L"}]} 3 | -------------------------------------------------------------------------------- /mtool/data/score/ucca/test.tupa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/ucca/test.tupa.pdf -------------------------------------------------------------------------------- /mtool/data/validate/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | 3 | all: 4 | time python3 -u ../../main.py --trace --trace --validate all \ 5 | --read mrp eds/wsj.mrp $@ 2>&1 | tee eds.wsj.log 6 | -------------------------------------------------------------------------------- /mtool/inspector.py: -------------------------------------------------------------------------------- 1 | import sys; 2 | 3 | from graph import Graph; 4 | 5 | def summarize(graphs, golds): 6 | ids = None; 7 | if golds is not None: 8 | ids = dict(); 9 | for gold in golds: 10 | language = gold.language(); 11 | if language not in ids: ids[language] = dict(); 12 | targets = gold.targets(); 13 | if targets is None: targets = [gold.framework]; 14 | for target in targets: 15 | if target not in ids[language]: ids[language][target] = set(); 16 | ids[language][target].add(gold.id); 17 | 18 | counts = dict(); 19 | seen = dict(); 20 | targets = dict(); 21 | targets["eng"] = ["eds", "ptg", "ucca", "amr", "drg"]; 22 | targets["ces"] = ["ptg"]; 23 | targets["deu"] = ["ucca", "drg"]; 24 | targets["zho"] = ["amr"]; 25 | for language in ["eng", "ces", "deu", "zho"]: 26 | counts[language] = dict(); 27 | seen[language] = dict(); 28 | for key in targets[language]: 29 | counts[language][key] = 0; 30 | seen[language][key] = set(); 31 | 32 | for graph in graphs: 33 | language = graph.language(); 34 | if language is None: language = "eng"; 35 | framework = graph.framework; 36 | if golds is None or \ 37 | language in ids and framework in ids[language] and \ 38 | graph.id in ids[language][framework]: 39 | counts[language][framework] += 1; 40 | if graph.id in seen[language][framework]: 41 | print("inspector.summarize(): ignoring duplicate {} {} graph #{}." 42 | "".format(language, framework, graph.id), 43 | file = sys.stderr); 44 | else: 45 | seen[language][framework].add(graph.id); 46 | 47 | complete = True; 48 | for language in ["eng", "ces", "deu", "zho"]: 49 | for key in targets[language]: 50 | if len(ids[language][key]) != counts[language][key]: complete = False; 51 | counts["complete"] = complete; 52 | return counts; 53 | -------------------------------------------------------------------------------- /mtool/score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__init__.py -------------------------------------------------------------------------------- /mtool/score/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/core.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/core.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/core.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/core.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/core.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/core.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/edm.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/edm.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/edm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/edm.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/edm.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/edm.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/mces.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/mces.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/mces.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/mces.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/mces.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/mces.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/sdp.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/sdp.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/sdp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/sdp.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/sdp.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/sdp.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/smatch.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/smatch.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/smatch.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/smatch.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/smatch.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/smatch.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/ucca.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/ucca.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/ucca.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/ucca.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/score/__pycache__/ucca.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/ucca.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/score/core.py: -------------------------------------------------------------------------------- 1 | import sys; 2 | 3 | # 4 | # _fix_me_ 5 | # maybe use Unicode character classes instead, even if it likely would mean 6 | # many calls to match one-character regular expressions? 7 | # 8 | PUNCTUATION = frozenset(".?!;,:“\"”‘'’()[]{} \t\n\f") 9 | SPACE = frozenset(" \t\n\f") 10 | 11 | def intersect(golds, systems, quiet = False): 12 | golds = {(graph.language(), graph.framework, graph.id): graph 13 | for graph in golds}; 14 | seen = set(); 15 | for graph in systems: 16 | language = graph.language(); 17 | key = (language, graph.framework, graph.id); 18 | if language is None and key not in golds: 19 | language = "eng"; 20 | key = (language, graph.framework, graph.id); 21 | if key in seen: 22 | if not quiet: 23 | print("score.intersect(): ignoring duplicate {} {} graph #{}" 24 | .format(language, graph.framework, graph.id), 25 | file=sys.stderr); 26 | else: 27 | seen.add(key); 28 | gold = golds.get(key); 29 | if gold is None: 30 | if not quiet: 31 | print("score.intersect(): ignoring {} {} graph #{} with no gold graph" 32 | .format(graph.language(), graph.framework, graph.id), 33 | file=sys.stderr); 34 | else: 35 | yield gold, graph; 36 | 37 | for key in golds.keys() - seen: 38 | gold = golds[key]; 39 | if not quiet: 40 | print("score.intersect(): missing system {} {} graph #{}" 41 | .format(gold.language(), gold.framework, gold.id), 42 | file=sys.stderr); 43 | # 44 | # manufacture an empty graph as the system graph 45 | # 46 | from graph import Graph; 47 | yield gold, Graph(gold.id, flavor = gold.flavor, 48 | framework = gold.framework); 49 | 50 | def anchor(node): 51 | result = list(); 52 | if node.anchors is not None: 53 | for span in node.anchors: 54 | if "from" in span and "to" in span: 55 | result.append((span["from"], span["to"])); 56 | return result; 57 | 58 | def explode(string, anchors, trim = PUNCTUATION): 59 | result = set(); 60 | for anchor in anchors: 61 | start = end = None; 62 | if isinstance(anchor, tuple): 63 | start, end = anchor; 64 | elif "from" in anchor and "to" in anchor: 65 | start = anchor["from"]; end = anchor["to"]; 66 | if start is not None and end is not None: 67 | while start < end and string[start] in trim: 68 | start += 1; 69 | while end > start and string[end - 1] in trim: 70 | end -= 1; 71 | for i in range(start, end): 72 | if string[i] not in SPACE: 73 | result.add(i); 74 | return frozenset(result); 75 | 76 | def fscore(gold, system, correct): 77 | p = correct / system if system else 0.0; 78 | r = correct / gold if gold else 0.0; 79 | f = 2 * p * r / (p + r) if p + r != 0 else 0.0; 80 | return p, r, f; 81 | 82 | 83 | -------------------------------------------------------------------------------- /mtool/score/edm.py: -------------------------------------------------------------------------------- 1 | import sys; 2 | 3 | from graph import Graph; 4 | import score.core; 5 | 6 | def tuples(graph, explode = False): 7 | identities = dict(); 8 | names = set(); 9 | tops = set(); 10 | arguments = set(); 11 | properties = set(); 12 | for node in graph.nodes: 13 | if graph.input and explode: 14 | identity = score.core.explode(graph.input, 15 | score.core.anchor(node)); 16 | else: 17 | identity = tuple(score.core.anchor(node)); 18 | identities[node.id] = identity; 19 | if node.label is not None: names.add((identity, node.label)); 20 | if node.is_top: tops.add(identity); 21 | if node.properties and node.values: 22 | for property, value in zip(node.properties, node.values): 23 | properties.add((identity, property, value)) 24 | for edge in graph.edges: 25 | arguments.add((identities[edge.src], identities[edge.tgt], edge.lab)); 26 | return names, arguments, properties, tops; 27 | 28 | def evaluate(golds, systems, format = "json", trace = 0): 29 | tgn = tsn = tcn = 0; 30 | tga = tsa = tca = 0; 31 | tgt = tst = tct = 0; 32 | tgp = tsp = tcp = 0; 33 | scores = dict() if trace else None; 34 | result = {"n": 0}; 35 | for gold, system in score.core.intersect(golds, systems): 36 | explode = gold.input and system.input; 37 | gnames, garguments, gproperties, gtops = tuples(gold, explode = explode); 38 | snames, sarguments, sproperties, stops = tuples(system, explode = explode); 39 | if trace > 1: 40 | print("[{}] gold:\n{}\n{}\n{}\n{}\n\n" 41 | "".format(gold.id, gtops, 42 | gnames, garguments, gproperties)); 43 | print("[{}] system:\n{}\n{}\n{}\n{}\n\n" 44 | "".format(gold.id, stops, 45 | snames, sarguments, sproperties)); 46 | gn = len(gnames); sn = len(snames); 47 | cn = len(gnames & snames); 48 | ga = len(garguments); sa = len(sarguments); 49 | ca = len(garguments & sarguments); 50 | gt = len(gtops); st = len(stops); 51 | ct = len(gtops & stops); 52 | gp = len(gproperties); sp = len(sproperties); 53 | cp = len(gproperties & sproperties); 54 | tgn += gn; tsn += sn; tcn += cn; 55 | tga += ga; tsa += sa; tca += ca; 56 | tgt += gt; tst += st; tct += ct; 57 | tgp += gp; tsp += sp; tcp += cp; 58 | result["n"] += 1; 59 | if trace: 60 | if gold.id in scores: 61 | print("edm.evaluate(): duplicate graph identifier: {}" 62 | "".format(gold.id), file = sys.stderr); 63 | scores[gold.id] = {"names": {"g": gn, "s": sn, "c": cn}, 64 | "arguments": {"g": ga, "s": sa, "c": ca}, 65 | "tops": {"g": gt, "s": st, "c": ct}, 66 | "properties": {"g": gp, "s": sp, "c": cp}}; 67 | if scores is not None: result["scores"] = scores; 68 | p, r, f = score.core.fscore(tgn, tsn, tcn); 69 | result["names"] = {"g": tgn, "s": tsn, "c": tcn, "p": p, "r": r, "f": f}; 70 | p, r, f = score.core.fscore(tga, tsa, tca); 71 | result["arguments"] = {"g": tga, "s": tsa, "c": tca, "p": p, "r": r, "f": f}; 72 | p, r, f = score.core.fscore(tgt, tst, tct); 73 | result["tops"] = {"g": tgt, "s": tst, "c": tct, "p": p, "r": r, "f": f}; 74 | p, r, f = score.core.fscore(tgp, tsp, tcp); 75 | result["properties"] = {"g": tgp, "s": tsp, "c": tcp, "p": p, "r": r, "f": f}; 76 | tga = tgn + tga + tgt + tgp; 77 | tsa = tsn + tsa + tst + tsp; 78 | tca = tcn + tca + tct + tcp; 79 | p, r, f = score.core.fscore(tga, tsa, tca); 80 | result["all"] = {"g": tga, "s": tsa, "c": tca, "p": p, "r": r, "f": f}; 81 | return result; 82 | -------------------------------------------------------------------------------- /mtool/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | 7 | exec(open('version.py').read()) 8 | release = __version__ 9 | version = '.'.join(release.split('.')[:2]) 10 | 11 | 12 | setuptools.setup( 13 | name="mtool", 14 | version="0.0.1", 15 | author="Stephan Oepen , Marco Kuhlmann , " 16 | "Daniel Hershcovich , Tim O'Gorman ", 17 | author_email="mrp-organizers@nlpl.eu", 18 | description="The Swiss Army Knife of Meaning Representation", 19 | long_description=long_description, 20 | long_description_content_type="text/markdown", 21 | url="https://github.com/cfmrp/mtool", 22 | packages=setuptools.find_packages(), 23 | py_modules=["graph", "analyzer", "inspector", "treewidth", 'main', 'version'], 24 | license='LGPL-3.0', 25 | install_requires=[ 26 | 'numpy', 27 | ], 28 | entry_points = { 29 | 'console_scripts': ['mtool=main:main'], 30 | }, 31 | classifiers=[ 32 | "Environment :: Console", 33 | "Development Status :: 4 - Beta", 34 | "Intended Audience :: Developers", 35 | "Intended Audience :: Education", 36 | "Intended Audience :: Science/Research", 37 | "Operating System :: OS Independent", 38 | "Programming Language :: Python :: 3", 39 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 40 | "Topic :: Scientific/Engineering :: Information Analysis" 41 | ] 42 | ) 43 | -------------------------------------------------------------------------------- /mtool/smatch/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2015 Shu Cai and Kevin Knight 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /mtool/smatch/README.md: -------------------------------------------------------------------------------- 1 | # Smatch (semantic match) tool 2 | 3 | This is source code of [smatch](http://amr.isi.edu/evaluation.html), an evaluation tool for AMR (Abstract Meaning Representation). 4 | 5 | The code here is based on [Shu Cai](https://github.com/snowblink14)'s [smatch v1.0.2](https://github.com/danielhers/smatch/tree/1.0.2), with some changes to allow programmatic usage. 6 | 7 | More details and updates about AMR and smatch can be found in USC/ISI's AMR site: http://amr.isi.edu/index.html 8 | -------------------------------------------------------------------------------- /mtool/smatch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__init__.py -------------------------------------------------------------------------------- /mtool/smatch/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/smatch/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/smatch/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/smatch/__pycache__/amr.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/amr.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/smatch/__pycache__/amr.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/amr.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/smatch/__pycache__/amr.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/amr.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/smatch/__pycache__/smatch.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/smatch.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/smatch/__pycache__/smatch.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/smatch.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/smatch/__pycache__/smatch.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/smatch.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/ucca/README.md: -------------------------------------------------------------------------------- 1 | Universal Conceptual Cognitive Annotation 2 | ============================ 3 | UCCA is a linguistic framework for semantic annotation, whose details 4 | are available at [the following paper](http://www.cs.huji.ac.il/~oabend/papers/ucca_acl.pdf): 5 | 6 | @inproceedings{abend2013universal, 7 | author={Abend, Omri and Rappoport, Ari}, 8 | title={{U}niversal {C}onceptual {C}ognitive {A}nnotation ({UCCA})}, 9 | booktitle={Proc. of ACL}, 10 | month={August}, 11 | year={2013}, 12 | pages={228--238}, 13 | url={http://aclweb.org/anthology/P13-1023} 14 | } 15 | 16 | This Python 3 package provides an API to the UCCA annotation and tools to 17 | manipulate and process it. Its main features are conversion between different 18 | representations of UCCA annotations, and rich objects for all of the linguistic 19 | relations which appear in the theoretical framework (see `core`, `layer0`, `layer1` 20 | and `convert` modules under the `ucca` package). 21 | 22 | The `scripts` package contains various utilities for processing passage files. 23 | 24 | To parse text to UCCA graphs, use [TUPA, the UCCA parser](http://www.cs.huji.ac.il/~danielh/tupa). 25 | 26 | 27 | Authors 28 | ------ 29 | * Amit Beka: amit.beka@gmail.com 30 | * Daniel Hershcovich: danielh@cs.huji.ac.il 31 | 32 | 33 | License 34 | ------- 35 | This package is licensed under the GPLv3 or later license. 36 | 37 | [![Build Status (Travis CI)](https://travis-ci.org/danielhers/ucca.svg?branch=master)](https://travis-ci.org/danielhers/ucca) 38 | [![Build Status (AppVeyor)](https://ci.appveyor.com/api/projects/status/github/danielhers/ucca?svg=true)](https://ci.appveyor.com/project/danielh/ucca) 39 | [![Build Status (Docs)](https://readthedocs.org/projects/ucca/badge/?version=latest)](http://ucca.readthedocs.io/en/latest/) 40 | [![PyPI version](https://badge.fury.io/py/UCCA.svg)](https://badge.fury.io/py/UCCA) 41 | -------------------------------------------------------------------------------- /mtool/ucca/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__init__.py -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/convert.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/convert.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/convert.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/convert.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/convert.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/convert.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/core.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/core.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/core.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/core.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/core.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/core.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/ioutil.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/ioutil.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/ioutil.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/ioutil.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/ioutil.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/ioutil.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/layer0.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer0.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/layer0.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer0.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/layer0.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer0.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/layer1.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer1.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/layer1.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer1.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/layer1.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer1.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/normalization.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/normalization.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/normalization.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/normalization.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/normalization.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/normalization.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/textutil.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/textutil.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/textutil.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/textutil.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/ucca/__pycache__/textutil.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/textutil.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/validate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__init__.py -------------------------------------------------------------------------------- /mtool/validate/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/amr.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/amr.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/amr.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/amr.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/amr.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/amr.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/core.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/core.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/core.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/core.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/core.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/core.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/eds.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/eds.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/eds.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/eds.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/eds.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/eds.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/sdp.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/sdp.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/sdp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/sdp.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/sdp.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/sdp.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/ucca.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/ucca.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/ucca.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/ucca.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/ucca.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/ucca.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/utilities.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/utilities.cpython-37.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/utilities.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/utilities.cpython-38.pyc -------------------------------------------------------------------------------- /mtool/validate/__pycache__/utilities.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/utilities.cpython-39.pyc -------------------------------------------------------------------------------- /mtool/validate/amr.py: -------------------------------------------------------------------------------- 1 | import sys; 2 | 3 | from graph import Graph; 4 | from validate.utilities import report; 5 | 6 | def test(graph, actions, stream = sys.stderr): 7 | n = 0; 8 | return n; 9 | 10 | -------------------------------------------------------------------------------- /mtool/validate/core.py: -------------------------------------------------------------------------------- 1 | import sys; 2 | 3 | import validate.amr; 4 | import validate.eds; 5 | import validate.sdp; 6 | import validate.ucca; 7 | from validate.utilities import report; 8 | 9 | 10 | def test(graph, actions, stream = sys.stderr): 11 | n = 0; 12 | if not isinstance(graph.id, str) or len(graph.id) == 0: 13 | n += 1; 14 | report(graph, 15 | "missing or invalid ‘id’ property", 16 | stream = stream); 17 | if not isinstance(graph.flavor, int) or graph.flavor not in {0, 1, 2}: 18 | n += 1; 19 | report(graph, 20 | "missing or invalid ‘flavor’ property", 21 | stream = stream); 22 | if not isinstance(graph.framework, str) or \ 23 | graph.framework not in {"ccd", "dm", "pas", "psd", "ptg", "ud", 24 | "eds", "ucca", "amr", "drg"}: 25 | n += 1; 26 | report(graph, 27 | "missing or invalid ‘framework’ property", 28 | stream = stream); 29 | elif graph.flavor == 0 and \ 30 | graph.framework not in {"ccd", "dm", "pas", "psd", "ud"} or \ 31 | graph.flavor == 1 and graph.framework not in {"eds", "ptg", "ucca"} or \ 32 | graph.flavor == 2 and graph.framework not in {"amr", "drg"}: 33 | n += 1; 34 | report(graph, 35 | "invalid Flavor ({}) framework: ‘{}’" 36 | "".format(graph.flavor, graph.framework), stream = stream); 37 | 38 | if "input" in actions: 39 | if not isinstance(graph.input, str) or len(graph.input) == 0: 40 | n += 1; 41 | report(graph, 42 | "missing or invalid ‘input’ property", 43 | stream = stream); 44 | 45 | l = len(graph.input) if graph.input else 0; 46 | for node in graph.nodes: 47 | if not isinstance(node.id, int): 48 | n += 1; 49 | report(graph, 50 | "invalid identifier", 51 | node = node, stream = stream); 52 | if "anchors" in actions and node.anchors and l: 53 | for anchor in node.anchors: 54 | if anchor["from"] < 0 or anchor["from"] > l \ 55 | or anchor["to"] < 0 or anchor["to"] > l \ 56 | or anchor["from"] > anchor["to"]: 57 | n += 1; 58 | report(graph, 59 | "invalid anchor: {}".format(anchor), 60 | node = node, stream = stream); 61 | 62 | if "edges" in actions: 63 | # 64 | # the following is most likely redundant: the MRP input codec already has 65 | # to make sure all source and target identifiers actually exist. maybe 66 | # add a type check (int), though? 67 | # 68 | nodes = {node.id: node for node in graph.nodes}; 69 | for edge in graph.edges: 70 | if not isinstance(edge.src, int) or edge.src not in nodes: 71 | n += 1; 72 | report(graph, 73 | "invalid source", 74 | edge = edge, stream = stream); 75 | if not isinstance(edge.tgt, int) or edge.tgt not in nodes: 76 | n += 1; 77 | report(graph, 78 | "invalid target", 79 | edge = edge, stream = stream); 80 | num_attrib = len(edge.attributes) if edge.attributes else 0; 81 | num_values = len(edge.values) if edge.values else 0; 82 | if num_attrib != num_values: 83 | n += 1; 84 | report(graph, 85 | "unaligned ‘attributes’ vs. ‘values’", 86 | edge = edge, stream = stream); 87 | 88 | sdp = {"ccd", "dm", "pas", "psd"}; 89 | if graph.framework == "amr" and "amr" in actions: 90 | n += validate.amr.test(graph, actions, stream); 91 | elif graph.framework == "eds" and "eds" in actions: 92 | n += validate.eds.test(graph, actions, stream); 93 | elif graph.framework in sdp and (sdp & actions): 94 | n += validate.sdp.test(graph, actions, stream); 95 | elif graph.framework == "ucca" and "ucca" in actions: 96 | n += validate.ucca.test(graph, actions, stream); 97 | 98 | return n; 99 | -------------------------------------------------------------------------------- /mtool/validate/eds.py: -------------------------------------------------------------------------------- 1 | import sys; 2 | 3 | from graph import Graph; 4 | from validate.utilities import report; 5 | 6 | def test(graph, actions, stream = sys.stderr): 7 | n = 0; 8 | for node in graph.nodes: 9 | if not isinstance(node.label, str) or len(node.label) == 0: 10 | n += 1; 11 | report(graph, 12 | "missing or invalid label", 13 | node = node, framework = "EDS", stream = stream); 14 | message = None; 15 | if "anchors" in actions: 16 | if not isinstance(node.anchors, list): 17 | message = "missing or invalid anchoring"; 18 | elif len(node.anchors) != 1 \ 19 | or ("from" not in node.anchors[0] or "to" not in node.anchors[0]): 20 | message = "invalid ‘anchors’ value: {}".format(node.anchors); 21 | if message is not None: 22 | n += 1; 23 | report(graph, message, 24 | node = node, framework = "EDS", stream = stream); 25 | return n; 26 | 27 | -------------------------------------------------------------------------------- /mtool/validate/sdp.py: -------------------------------------------------------------------------------- 1 | import sys; 2 | 3 | from graph import Graph; 4 | from validate.utilities import report; 5 | 6 | def test(graph, actions, stream = sys.stderr): 7 | n = 0; 8 | return n; 9 | 10 | -------------------------------------------------------------------------------- /mtool/validate/ucca.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from validate.utilities import report 4 | 5 | CATEGORIES = {'H', 'A', 'P', 'S', 'D', 'G', 'C', 'E', 'F', 'N', 'R', 'T', 'Q', 'L', 'U'} 6 | 7 | 8 | def is_primary(edge): 9 | for attribute, value in zip(edge.attributes or (), edge.values or ()): 10 | if attribute == "remote" and value != "false": 11 | return False 12 | return True 13 | 14 | 15 | def is_implicit(node): 16 | for prop, value in zip(node.properties or (), node.values or ()): 17 | if prop == "implicit" and value != "false": 18 | return True 19 | return False 20 | 21 | 22 | def test(graph, actions, stream=sys.stderr): 23 | n = 0 24 | for edge in graph.edges: 25 | if not isinstance(edge.lab, str) or len(edge.lab) == 0: 26 | n += 1 27 | report(graph, 28 | "missing or invalid label", 29 | edge=edge, framework="UCCA", stream=stream) 30 | elif edge.lab.upper() not in CATEGORIES: 31 | n += 1 32 | report(graph, 33 | "edge label is not a UCCA category", 34 | edge=edge, framework="UCCA", stream=stream) 35 | if edge.is_loop(): 36 | n += 1 37 | report(graph, 38 | "loop edge", 39 | edge=edge, framework="UCCA", stream=stream) 40 | roots = [] 41 | for node in graph.nodes: 42 | primary = [edge for edge in node.incoming_edges if is_primary(edge)] 43 | primary_parents = {edge.src for edge in primary} 44 | if not primary: 45 | roots.append(node) 46 | elif len(primary_parents) > 1: 47 | n += 1 48 | report(graph, 49 | "multiple primary parents for node", 50 | node=node, edge=primary[0], framework="UCCA", stream=stream) 51 | if not roots: 52 | n += 1 53 | report(graph, 54 | "no roots in graph", 55 | framework="UCCA", stream=stream) 56 | elif len(roots) > 1: 57 | n += 1 58 | report(graph, 59 | "multiple roots in graph", 60 | node=roots[0], framework="UCCA", stream=stream) 61 | else: 62 | for node in roots: 63 | remotes = [edge for edge in node.incoming_edges if not is_primary(edge)] 64 | if remotes: 65 | n += 1 66 | report(graph, 67 | "root has remote parents", 68 | node=node, edge=remotes[0], framework="UCCA", stream=stream) 69 | for node in graph.nodes: 70 | if node.is_leaf() and not node.anchors and not is_implicit(node): 71 | n += 1 72 | report(graph, 73 | "unanchored non-implicit node", 74 | node=node, framework="UCCA", stream=stream) 75 | return n 76 | -------------------------------------------------------------------------------- /mtool/validate/utilities.py: -------------------------------------------------------------------------------- 1 | import sys; 2 | 3 | def report(graph, message, node = None, edge = None, 4 | framework = None, level = "E", stream = sys.stderr): 5 | if node is not None: 6 | node = "; node #{}".format(node.id); 7 | else: 8 | node = ""; 9 | if edge is not None: 10 | edge = "; edge {} -{}-> {}".format(edge.src, edge.tgt, 11 | edge.lab if edge.lab else ""); 12 | else: 13 | edge = ""; 14 | if framework is not None: 15 | framework = "{{{}}} ".format(framework); 16 | else: 17 | framework = ""; 18 | print("validate(): [{}] {}graph #{}{}{}: {}." 19 | "".format(level, framework, graph.id, node, edge, message), 20 | file = stream); 21 | -------------------------------------------------------------------------------- /mtool/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.1"; 2 | 3 | -------------------------------------------------------------------------------- /perin/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/.DS_Store -------------------------------------------------------------------------------- /perin/config/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/config/.DS_Store -------------------------------------------------------------------------------- /perin/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/config/__init__.py -------------------------------------------------------------------------------- /perin/config/edge_ace_e.yaml: -------------------------------------------------------------------------------- 1 | framework: ace 2 | language: en 3 | graph_mode: labeled-edge 4 | 5 | encoder: /cluster/work/projects/ec30/huiliny/xlm-roberta-large # local directory to xlmr 6 | epochs: 200 7 | n_layers: 3 8 | query_length: 2 9 | decoder_learning_rate: 1.0e-4 10 | encoder_learning_rate: 6.0e-6 # initial encoder learning rate 11 | encoder_weight_decay: 0.1 12 | encoder_delay_steps: 500 13 | warmup_steps: 1000 14 | char_embedding: true 15 | dropout_word: 0.1 16 | focal: true 17 | hidden_size_edge_presence: 256 18 | hidden_size_anchor: 256 19 | dropout_anchor: 0.4 20 | dropout_edge_presence: 0.5 21 | dropout_label: 0.85 22 | batch_size: 16 23 | dropout_transformer: 0.25 24 | beta_2: 0.98 25 | layerwise_lr_decay: 0.9 -------------------------------------------------------------------------------- /perin/config/edge_ace_e_p.yaml: -------------------------------------------------------------------------------- 1 | framework: ace_p 2 | language: en 3 | graph_mode: labeled-edge 4 | 5 | encoder: /cluster/work/projects/ec30/huiliny/xlm-roberta-large # local directory to xlmr 6 | epochs: 200 7 | n_layers: 3 8 | query_length: 2 9 | decoder_learning_rate: 1.0e-4 10 | encoder_learning_rate: 6.0e-6 # initial encoder learning rate 11 | encoder_weight_decay: 0.1 12 | encoder_delay_steps: 500 13 | warmup_steps: 1000 14 | char_embedding: true 15 | dropout_word: 0.1 16 | focal: true 17 | hidden_size_edge_presence: 256 18 | hidden_size_anchor: 256 19 | dropout_anchor: 0.4 20 | dropout_edge_presence: 0.5 21 | dropout_label: 0.85 22 | batch_size: 16 23 | dropout_transformer: 0.25 24 | beta_2: 0.98 25 | layerwise_lr_decay: 0.9 -------------------------------------------------------------------------------- /perin/config/edge_ace_e_pp.yaml: -------------------------------------------------------------------------------- 1 | framework: ace_pp 2 | language: en 3 | graph_mode: labeled-edge 4 | 5 | encoder: /cluster/work/projects/ec30/huiliny/xlm-roberta-large # local directory to xlmr 6 | epochs: 200 7 | n_layers: 3 8 | query_length: 2 9 | decoder_learning_rate: 1.0e-4 10 | encoder_learning_rate: 6.0e-6 # initial encoder learning rate 11 | encoder_weight_decay: 0.1 12 | encoder_delay_steps: 500 13 | warmup_steps: 1000 14 | char_embedding: true 15 | dropout_word: 0.1 16 | focal: true 17 | hidden_size_edge_presence: 256 18 | hidden_size_anchor: 256 19 | dropout_anchor: 0.4 20 | dropout_edge_presence: 0.5 21 | dropout_label: 0.85 22 | batch_size: 16 23 | dropout_transformer: 0.25 24 | beta_2: 0.98 25 | layerwise_lr_decay: 0.9 26 | -------------------------------------------------------------------------------- /perin/config/edge_ace_e_ppp.yaml: -------------------------------------------------------------------------------- 1 | framework: ace_ppp 2 | language: en 3 | graph_mode: labeled-edge 4 | 5 | encoder: /cluster/work/projects/ec30/huiliny/xlm-roberta-large # local directory to xlmr 6 | epochs: 200 7 | n_layers: 3 8 | query_length: 2 9 | decoder_learning_rate: 1.0e-4 10 | encoder_learning_rate: 6.0e-6 # initial encoder learning rate 11 | encoder_weight_decay: 0.1 12 | encoder_delay_steps: 500 13 | warmup_steps: 1000 14 | char_embedding: true 15 | dropout_word: 0.1 16 | focal: true 17 | hidden_size_edge_presence: 256 18 | hidden_size_anchor: 256 19 | dropout_anchor: 0.4 20 | dropout_edge_presence: 0.5 21 | dropout_label: 0.85 22 | batch_size: 16 23 | dropout_transformer: 0.25 24 | beta_2: 0.98 25 | layerwise_lr_decay: 0.9 26 | -------------------------------------------------------------------------------- /perin/convert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python3 ../mtool/main.py $2--strings --ids --read mrp --write ace "$1" "$1_converted" -------------------------------------------------------------------------------- /perin/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/.DS_Store -------------------------------------------------------------------------------- /perin/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/__init__.py -------------------------------------------------------------------------------- /perin/data/batch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | 8 | class Batch: 9 | @staticmethod 10 | def build(data): 11 | fields = list(data[0].keys()) 12 | transposed = {} 13 | for field in fields: 14 | if isinstance(data[0][field], tuple): 15 | transposed[field] = tuple(Batch._stack(field, [example[field][i] for example in data]) for i in range(len(data[0][field]))) 16 | else: 17 | transposed[field] = Batch._stack(field, [example[field] for example in data]) 18 | 19 | return transposed 20 | 21 | @staticmethod 22 | def _stack(field: str, examples): 23 | if field == "anchored_labels": 24 | return examples 25 | 26 | dim = examples[0].dim() 27 | 28 | if dim == 0: 29 | return torch.stack(examples) 30 | 31 | lengths = [max(example.size(i) for example in examples) for i in range(dim)] 32 | if any(length == 0 for length in lengths): 33 | return torch.LongTensor(len(examples), *lengths) 34 | 35 | examples = [F.pad(example, Batch._pad_size(example, lengths)) for example in examples] 36 | return torch.stack(examples) 37 | 38 | @staticmethod 39 | def _pad_size(example, total_size): 40 | return [p for i, l in enumerate(total_size[::-1]) for p in (0, l - example.size(-1 - i))] 41 | 42 | @staticmethod 43 | def index_select(batch, indices): 44 | filtered_batch = {} 45 | for key, examples in batch.items(): 46 | if isinstance(examples, list) or isinstance(examples, tuple): 47 | filtered_batch[key] = [example.index_select(0, indices) for example in examples] 48 | else: 49 | filtered_batch[key] = examples.index_select(0, indices) 50 | 51 | return filtered_batch 52 | 53 | @staticmethod 54 | def to_str(batch): 55 | string = "\n".join([f"\t{name}: {Batch._short_str(item)}" for name, item in batch.items()]) 56 | return string 57 | 58 | @staticmethod 59 | def to(batch, device): 60 | converted = {} 61 | for field in batch.keys(): 62 | converted[field] = Batch._to(batch[field], device) 63 | return converted 64 | 65 | @staticmethod 66 | def _short_str(tensor): 67 | # unwrap variable to tensor 68 | if not torch.is_tensor(tensor): 69 | # (1) unpack variable 70 | if hasattr(tensor, "data"): 71 | tensor = getattr(tensor, "data") 72 | # (2) handle include_lengths 73 | elif isinstance(tensor, tuple) or isinstance(tensor, list): 74 | return str(tuple(Batch._short_str(t) for t in tensor)) 75 | # (3) fallback to default str 76 | else: 77 | return str(tensor) 78 | 79 | # copied from torch _tensor_str 80 | size_str = "x".join(str(size) for size in tensor.size()) 81 | device_str = "" if not tensor.is_cuda else " (GPU {})".format(tensor.get_device()) 82 | strt = "[{} of size {}{}]".format(torch.typename(tensor), size_str, device_str) 83 | return strt 84 | 85 | @staticmethod 86 | def _to(tensor, device): 87 | if not torch.is_tensor(tensor): 88 | if isinstance(tensor, tuple): 89 | return tuple(Batch._to(t, device) for t in tensor) 90 | elif isinstance(tensor, list): 91 | return [Batch._to(t, device) for t in tensor] 92 | else: 93 | raise Exception(f"unsupported type of {tensor} to be casted to cuda") 94 | 95 | return tensor.to(device, non_blocking=True) 96 | -------------------------------------------------------------------------------- /perin/data/field/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/.DS_Store -------------------------------------------------------------------------------- /perin/data/field/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/__init__.py -------------------------------------------------------------------------------- /perin/data/field/anchor_field.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | from data.field.mini_torchtext.field import RawField 6 | 7 | 8 | class AnchorField(RawField): 9 | def process(self, batch, device=None): 10 | tensors, masks = self.pad(batch, device) 11 | return tensors, masks 12 | 13 | def pad(self, anchors, device): 14 | tensor = torch.zeros(anchors[0], anchors[1], dtype=torch.long, device=device) 15 | for anchor in anchors[-1]: 16 | tensor[anchor[0], anchor[1]] = 1 17 | mask = tensor.sum(-1) == 0 18 | 19 | return tensor, mask 20 | -------------------------------------------------------------------------------- /perin/data/field/anchored_label_field.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from data.field.mini_torchtext.field import RawField 3 | 4 | 5 | class AnchoredLabelField(RawField): 6 | def __init__(self): 7 | super(AnchoredLabelField, self).__init__() 8 | self.vocab = None 9 | 10 | def process(self, example, device=None): 11 | example = self.numericalize(example) 12 | tensor = self.pad(example, device) 13 | return tensor 14 | 15 | def pad(self, example, device): 16 | n_labels = len(self.vocab) 17 | n_nodes, n_tokens = len(example[1]), example[0] 18 | 19 | tensor = torch.full([n_nodes, n_tokens, n_labels + 1], 0, dtype=torch.long, device=device) 20 | for i_node, node in enumerate(example[1]): 21 | for anchor, rule in node: 22 | tensor[i_node, anchor, rule + 1] = 1 23 | 24 | return tensor 25 | 26 | def numericalize(self, arr): 27 | def multi_map(array, function): 28 | if isinstance(array, tuple): 29 | return (array[0], function(array[1])) 30 | elif isinstance(array, list): 31 | return [multi_map(a, function) for a in array] 32 | else: 33 | return array 34 | 35 | if self.vocab is not None: 36 | arr = multi_map(arr, lambda x: self.vocab.stoi[x] if x in self.vocab.stoi else 0) 37 | 38 | return arr 39 | -------------------------------------------------------------------------------- /perin/data/field/basic_field.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | from data.field.mini_torchtext.field import RawField 6 | 7 | 8 | class BasicField(RawField): 9 | def process(self, example, device=None): 10 | tensor = torch.tensor(example, dtype=torch.long, device=device) 11 | # tensor = example.clone().detach().to(device) 12 | 13 | return tensor 14 | 15 | # it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). -------------------------------------------------------------------------------- /perin/data/field/bert_field.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | from data.field.mini_torchtext.field import RawField 6 | 7 | 8 | class BertField(RawField): 9 | def __init__(self): 10 | super(BertField, self).__init__() 11 | 12 | def process(self, example, device=None): 13 | attention_mask = [1] * len(example) 14 | 15 | example = torch.LongTensor(example, device=device) 16 | attention_mask = torch.ones_like(example) 17 | 18 | return example, attention_mask 19 | -------------------------------------------------------------------------------- /perin/data/field/edge_field.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | from data.field.mini_torchtext.field import RawField 6 | from data.field.mini_torchtext.vocab import Vocab 7 | from collections import Counter 8 | import types 9 | 10 | 11 | class EdgeField(RawField): 12 | def __init__(self): 13 | super(EdgeField, self).__init__() 14 | self.vocab = None 15 | 16 | def process(self, edges, device=None): 17 | edges = self.numericalize(edges) 18 | tensor = self.pad(edges, device) 19 | return tensor 20 | 21 | def pad(self, edges, device): 22 | tensor = torch.zeros(edges[0], edges[1], dtype=torch.long, device=device) 23 | for edge in edges[-1]: 24 | tensor[edge[0], edge[1]] = edge[2] 25 | 26 | return tensor 27 | 28 | def numericalize(self, arr): 29 | def multi_map(array, function): 30 | if isinstance(array, tuple): 31 | return (array[0], array[1], function(array[2])) 32 | elif isinstance(array, list): 33 | return [multi_map(array[i], function) for i in range(len(array))] 34 | else: 35 | return array 36 | 37 | if self.vocab is not None: 38 | arr = multi_map(arr, lambda x: self.vocab.stoi[x] if x is not None else 0) 39 | return arr 40 | 41 | def build_vocab(self, *args): 42 | def generate(l): 43 | if isinstance(l, tuple): 44 | yield l[2] 45 | elif isinstance(l, list) or isinstance(l, types.GeneratorType): 46 | for i in l: 47 | yield from generate(i) 48 | else: 49 | return 50 | 51 | counter = Counter() 52 | sources = [] 53 | for arg in args: 54 | if isinstance(arg, torch.utils.data.Dataset): 55 | sources += [arg.get_examples(name) for name, field in arg.fields.items() if field is self] 56 | else: 57 | sources.append(arg) 58 | 59 | for x in generate(sources): 60 | if x is not None: 61 | counter.update([x]) 62 | 63 | self.vocab = Vocab(counter, specials=[]) 64 | -------------------------------------------------------------------------------- /perin/data/field/edge_label_field.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | from data.field.mini_torchtext.field import RawField 6 | from data.field.mini_torchtext.vocab import Vocab 7 | from collections import Counter 8 | import types 9 | 10 | 11 | class EdgeLabelField(RawField): 12 | def process(self, edges, device=None): 13 | edges, masks = self.numericalize(edges) 14 | edges, masks = self.pad(edges, masks, device) 15 | 16 | return edges, masks 17 | 18 | def pad(self, edges, masks, device): 19 | n_labels = len(self.vocab) 20 | 21 | tensor = torch.zeros(edges[0], edges[1], n_labels, dtype=torch.long, device=device) 22 | mask_tensor = torch.zeros(edges[0], edges[1], dtype=torch.bool, device=device) 23 | 24 | for edge in edges[-1]: 25 | tensor[edge[0], edge[1], edge[2]] = 1 26 | 27 | for mask in masks[-1]: 28 | mask_tensor[mask[0], mask[1]] = mask[2] 29 | 30 | return tensor, mask_tensor 31 | 32 | def numericalize(self, arr): 33 | def multi_map(array, function): 34 | if isinstance(array, tuple): 35 | return (array[0], array[1], function(array[2])) 36 | elif isinstance(array, list): 37 | return [multi_map(array[i], function) for i in range(len(array))] 38 | else: 39 | return array 40 | 41 | mask = multi_map(arr, lambda x: x is None) 42 | arr = multi_map(arr, lambda x: self.vocab.stoi[x] if x in self.vocab.stoi else 0) 43 | return arr, mask 44 | 45 | def build_vocab(self, *args): 46 | def generate(l): 47 | if isinstance(l, tuple): 48 | yield l[2] 49 | elif isinstance(l, list) or isinstance(l, types.GeneratorType): 50 | for i in l: 51 | yield from generate(i) 52 | else: 53 | return 54 | 55 | counter = Counter() 56 | sources = [] 57 | for arg in args: 58 | if isinstance(arg, torch.utils.data.Dataset): 59 | sources += [arg.get_examples(name) for name, field in arg.fields.items() if field is self] 60 | else: 61 | sources.append(arg) 62 | 63 | for x in generate(sources): 64 | if x is not None: 65 | counter.update([x]) 66 | 67 | self.vocab = Vocab(counter, specials=[]) 68 | -------------------------------------------------------------------------------- /perin/data/field/field.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from data.field.mini_torchtext.field import Field as TorchTextField 3 | from collections import Counter, OrderedDict 4 | 5 | 6 | # small change of vocab building to correspond to our version of Dataset 7 | class Field(TorchTextField): 8 | def build_vocab(self, *args, **kwargs): 9 | counter = Counter() 10 | sources = [] 11 | for arg in args: 12 | if isinstance(arg, torch.utils.data.Dataset): 13 | sources += [arg.get_examples(name) for name, field in arg.fields.items() if field is self] 14 | else: 15 | sources.append(arg) 16 | for data in sources: 17 | for x in data: 18 | if not self.sequential: 19 | x = [x] 20 | counter.update(x) 21 | 22 | specials = list( 23 | OrderedDict.fromkeys( 24 | tok 25 | for tok in [self.unk_token, self.pad_token, self.init_token, self.eos_token] + kwargs.pop("specials", []) 26 | if tok is not None 27 | ) 28 | ) 29 | self.vocab = self.vocab_cls(counter, specials=specials, **kwargs) 30 | 31 | def process(self, example, device=None): 32 | if self.include_lengths: 33 | example = example, len(example) 34 | tensor = self.numericalize(example, device=device) 35 | return tensor 36 | 37 | def numericalize(self, ex, device=None): 38 | if self.include_lengths and not isinstance(ex, tuple): 39 | raise ValueError("Field has include_lengths set to True, but input data is not a tuple of (data batch, batch lengths).") 40 | 41 | if isinstance(ex, tuple): 42 | ex, lengths = ex 43 | lengths = torch.tensor(lengths, dtype=self.dtype, device=device) 44 | 45 | if self.use_vocab: 46 | if self.sequential: 47 | ex = [self.vocab.stoi[x] for x in ex] 48 | else: 49 | ex = self.vocab.stoi[ex] 50 | 51 | if self.postprocessing is not None: 52 | ex = self.postprocessing(ex, self.vocab) 53 | else: 54 | numericalization_func = self.dtypes[self.dtype] 55 | 56 | if not self.sequential: 57 | ex = numericalization_func(ex) if isinstance(ex, str) else ex 58 | if self.postprocessing is not None: 59 | ex = self.postprocessing(ex, None) 60 | 61 | var = torch.tensor(ex, dtype=self.dtype, device=device) 62 | 63 | if self.sequential and not self.batch_first: 64 | var.t_() 65 | if self.sequential: 66 | var = var.contiguous() 67 | 68 | if self.include_lengths: 69 | return var, lengths 70 | return var 71 | -------------------------------------------------------------------------------- /perin/data/field/label_field.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from data.field.mini_torchtext.field import RawField 3 | from data.field.mini_torchtext.vocab import Vocab 4 | from collections import Counter 5 | 6 | 7 | class LabelField(RawField): 8 | def __self__(self, preprocessing): 9 | super(LabelField, self).__init__(preprocessing=preprocessing) 10 | self.vocab = None 11 | 12 | def build_vocab(self, *args, **kwargs): 13 | sources = [] 14 | for arg in args: 15 | if isinstance(arg, torch.utils.data.Dataset): 16 | sources += [arg.get_examples(name) for name, field in arg.fields.items() if field is self] 17 | else: 18 | sources.append(arg) 19 | 20 | counter = Counter() 21 | for data in sources: 22 | for x in data: 23 | counter.update(x) 24 | 25 | self.vocab = Vocab(counter, specials=[]) 26 | 27 | def process(self, example, device=None): 28 | tensor, lengths = self.numericalize(example, device=device) 29 | return tensor, lengths 30 | 31 | def numericalize(self, example, device=None): 32 | example = [self.vocab.stoi[x] + 1 for x in example] 33 | length = torch.LongTensor([len(example)], device=device).squeeze(0) 34 | tensor = torch.LongTensor(example, device=device) 35 | 36 | return tensor, length 37 | -------------------------------------------------------------------------------- /perin/data/field/mini_torchtext/__pycache__/example.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/mini_torchtext/__pycache__/example.cpython-39.pyc -------------------------------------------------------------------------------- /perin/data/field/mini_torchtext/__pycache__/field.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/mini_torchtext/__pycache__/field.cpython-39.pyc -------------------------------------------------------------------------------- /perin/data/field/mini_torchtext/__pycache__/pipeline.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/mini_torchtext/__pycache__/pipeline.cpython-39.pyc -------------------------------------------------------------------------------- /perin/data/field/mini_torchtext/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/mini_torchtext/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /perin/data/field/mini_torchtext/__pycache__/vocab.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/mini_torchtext/__pycache__/vocab.cpython-39.pyc -------------------------------------------------------------------------------- /perin/data/field/mini_torchtext/pipeline.py: -------------------------------------------------------------------------------- 1 | class Pipeline(object): 2 | """Defines a pipeline for transforming sequence data. 3 | 4 | The input is assumed to be utf-8 encoded `str` (Python 3) or 5 | `unicode` (Python 2). 6 | 7 | Attributes: 8 | convert_token: The function to apply to input sequence data. 9 | pipes: The Pipelines that will be applied to input sequence 10 | data in order. 11 | """ 12 | 13 | def __init__(self, convert_token=None): 14 | """Create a pipeline. 15 | 16 | Arguments: 17 | convert_token: The function to apply to input sequence data. 18 | If None, the identity function is used. Default: None 19 | """ 20 | if convert_token is None: 21 | self.convert_token = Pipeline.identity 22 | elif callable(convert_token): 23 | self.convert_token = convert_token 24 | else: 25 | raise ValueError("Pipeline input convert_token {} is not None " 26 | "or callable".format(convert_token)) 27 | self.pipes = [self] 28 | 29 | def __call__(self, x, *args): 30 | """Apply the the current Pipeline(s) to an input. 31 | 32 | Arguments: 33 | x: The input to process with the Pipeline(s). 34 | Positional arguments: Forwarded to the `call` function 35 | of the Pipeline(s). 36 | """ 37 | for pipe in self.pipes: 38 | x = pipe.call(x, *args) 39 | return x 40 | 41 | def call(self, x, *args): 42 | """Apply _only_ the convert_token function of the current pipeline 43 | to the input. If the input is a list, a list with the results of 44 | applying the `convert_token` function to all input elements is 45 | returned. 46 | 47 | Arguments: 48 | x: The input to apply the convert_token function to. 49 | Positional arguments: Forwarded to the `convert_token` function 50 | of the current Pipeline. 51 | """ 52 | if isinstance(x, list): 53 | return [self.convert_token(tok, *args) for tok in x] 54 | return self.convert_token(x, *args) 55 | 56 | def add_before(self, pipeline): 57 | """Add a Pipeline to be applied before this processing pipeline. 58 | 59 | Arguments: 60 | pipeline: The Pipeline or callable to apply before this 61 | Pipeline. 62 | """ 63 | if not isinstance(pipeline, Pipeline): 64 | pipeline = Pipeline(pipeline) 65 | self.pipes = pipeline.pipes[:] + self.pipes[:] 66 | return self 67 | 68 | def add_after(self, pipeline): 69 | """Add a Pipeline to be applied after this processing pipeline. 70 | 71 | Arguments: 72 | pipeline: The Pipeline or callable to apply after this 73 | Pipeline. 74 | """ 75 | if not isinstance(pipeline, Pipeline): 76 | pipeline = Pipeline(pipeline) 77 | self.pipes = self.pipes[:] + pipeline.pipes[:] 78 | return self 79 | 80 | @staticmethod 81 | def identity(x): 82 | """Return a copy of the input. 83 | 84 | This is here for serialization compatibility with pickle. 85 | """ 86 | return x 87 | -------------------------------------------------------------------------------- /perin/data/field/nested_field.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | from data.field.mini_torchtext.field import NestedField as TorchTextNestedField 6 | 7 | 8 | class NestedField(TorchTextNestedField): 9 | def pad(self, example): 10 | self.nesting_field.include_lengths = self.include_lengths 11 | if not self.include_lengths: 12 | return self.nesting_field.pad(example) 13 | 14 | sentence_length = len(example) 15 | example, word_lengths = self.nesting_field.pad(example) 16 | return example, sentence_length, word_lengths 17 | 18 | def numericalize(self, arr, device=None): 19 | numericalized = [] 20 | self.nesting_field.include_lengths = False 21 | if self.include_lengths: 22 | arr, sentence_length, word_lengths = arr 23 | 24 | numericalized = self.nesting_field.numericalize(arr, device=device) 25 | 26 | self.nesting_field.include_lengths = True 27 | if self.include_lengths: 28 | sentence_length = torch.tensor(sentence_length, dtype=self.dtype, device=device) 29 | word_lengths = torch.tensor(word_lengths, dtype=self.dtype, device=device) 30 | return (numericalized, sentence_length, word_lengths) 31 | return numericalized 32 | 33 | def build_vocab(self, *args, **kwargs): 34 | sources = [] 35 | for arg in args: 36 | if isinstance(arg, torch.utils.data.Dataset): 37 | sources += [arg.get_examples(name) for name, field in arg.fields.items() if field is self] 38 | else: 39 | sources.append(arg) 40 | 41 | flattened = [] 42 | for source in sources: 43 | flattened.extend(source) 44 | 45 | # just build vocab and does not load vector 46 | self.nesting_field.build_vocab(*flattened, **kwargs) 47 | super(TorchTextNestedField, self).build_vocab() 48 | self.vocab.extend(self.nesting_field.vocab) 49 | self.vocab.freqs = self.nesting_field.vocab.freqs.copy() 50 | self.nesting_field.vocab = self.vocab 51 | -------------------------------------------------------------------------------- /perin/data/parser/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/.DS_Store -------------------------------------------------------------------------------- /perin/data/parser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/__init__.py -------------------------------------------------------------------------------- /perin/data/parser/from_mrp/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/from_mrp/.DS_Store -------------------------------------------------------------------------------- /perin/data/parser/from_mrp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/from_mrp/__init__.py -------------------------------------------------------------------------------- /perin/data/parser/from_mrp/abstract_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | from data.parser.json_parser import example_from_json 6 | 7 | 8 | class AbstractParser(torch.utils.data.Dataset): 9 | def __init__(self, fields, data, filter_pred=None): 10 | super(AbstractParser, self).__init__() 11 | 12 | self.examples = [example_from_json(d, fields) for _, d in sorted(data.items())] 13 | 14 | if isinstance(fields, dict): 15 | fields, field_dict = [], fields 16 | for field in field_dict.values(): 17 | if isinstance(field, list): 18 | fields.extend(field) 19 | else: 20 | fields.append(field) 21 | 22 | if filter_pred is not None: 23 | make_list = isinstance(self.examples, list) 24 | self.examples = filter(filter_pred, self.examples) 25 | if make_list: 26 | self.examples = list(self.examples) 27 | 28 | self.fields = dict(fields) 29 | 30 | # Unpack field tuples 31 | for n, f in list(self.fields.items()): 32 | if isinstance(n, tuple): 33 | self.fields.update(zip(n, f)) 34 | del self.fields[n] 35 | 36 | def __getitem__(self, i): 37 | item = self.examples[i] 38 | processed_item = {} 39 | for (name, field) in self.fields.items(): 40 | if field is not None: 41 | processed_item[name] = field.process(getattr(item, name), device=None) 42 | return processed_item 43 | 44 | def __len__(self): 45 | return len(self.examples) 46 | 47 | def get_examples(self, attr): 48 | if attr in self.fields: 49 | for x in self.examples: 50 | yield getattr(x, attr) 51 | -------------------------------------------------------------------------------- /perin/data/parser/from_mrp/evaluation_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | from data.parser.from_mrp.abstract_parser import AbstractParser 5 | import utility.parser_utils as utils 6 | 7 | 8 | class EvaluationParser(AbstractParser): 9 | def __init__(self, args, fields): 10 | path = args.test_data 11 | self.data = utils.load_dataset(path) 12 | 13 | for sentence in self.data.values(): 14 | sentence["token anchors"] = [[a["from"], a["to"]] for a in sentence["token anchors"]] 15 | 16 | utils.create_bert_tokens(self.data, args.encoder) 17 | 18 | super(EvaluationParser, self).__init__(fields, self.data) 19 | -------------------------------------------------------------------------------- /perin/data/parser/from_mrp/labeled_edge_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | from data.parser.from_mrp.abstract_parser import AbstractParser 5 | import utility.parser_utils as utils 6 | 7 | 8 | class LabeledEdgeParser(AbstractParser): 9 | def __init__(self, args, part: str, fields, filter_pred=None, **kwargs): 10 | assert part == "training" or part == "validation" 11 | path = args.training_data if part == "training" else args.validation_data 12 | 13 | self.data = utils.load_dataset(path) 14 | utils.anchor_ids_from_intervals(self.data) 15 | 16 | self.node_counter, self.edge_counter, self.no_edge_counter = 0, 0, 0 17 | anchor_count, n_node_token_pairs = 0, 0 18 | 19 | for sentence_id, sentence in list(self.data.items()): 20 | for edge in sentence["edges"]: 21 | if "label" not in edge: 22 | del self.data[sentence_id] 23 | break 24 | 25 | for node, sentence in utils.node_generator(self.data): 26 | node["label"] = "Node" 27 | 28 | self.node_counter += 1 29 | 30 | utils.create_bert_tokens(self.data, args.encoder) 31 | 32 | # create edge vectors 33 | for sentence in self.data.values(): 34 | assert sentence["tops"] == [0], sentence 35 | N = len(sentence["nodes"]) 36 | 37 | edge_count = utils.create_edges(sentence) 38 | self.edge_counter += edge_count 39 | self.no_edge_counter += N * (N - 1) - edge_count 40 | 41 | sentence["nodes"] = sentence["nodes"][1:] 42 | N = len(sentence["nodes"]) 43 | 44 | sentence["anchor edges"] = [N, len(sentence["input"]), []] 45 | sentence["anchored labels"] = [len(sentence["input"]), []] 46 | for i, node in enumerate(sentence["nodes"]): 47 | anchored_labels = [] 48 | 49 | for anchor in node["anchors"]: 50 | sentence["anchor edges"][-1].append((i, anchor)) 51 | anchored_labels.append((anchor, node["label"])) 52 | 53 | sentence["anchored labels"][1].append(anchored_labels) 54 | 55 | anchor_count += len(node["anchors"]) 56 | n_node_token_pairs += len(sentence["input"]) 57 | 58 | sentence["id"] = [sentence["id"]] 59 | 60 | self.anchor_freq = anchor_count / n_node_token_pairs 61 | self.input_count = sum(len(sentence["input"]) for sentence in self.data.values()) 62 | 63 | super(LabeledEdgeParser, self).__init__(fields, self.data, filter_pred) 64 | 65 | @staticmethod 66 | def node_similarity_key(node): 67 | return tuple([node["label"]] + node["anchors"]) 68 | -------------------------------------------------------------------------------- /perin/data/parser/from_mrp/request_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import utility.parser_utils as utils 5 | from data.parser.from_mrp.abstract_parser import AbstractParser 6 | 7 | 8 | class RequestParser(AbstractParser): 9 | def __init__(self, sentences, args, language: str, fields): 10 | self.data = {i: {"id": str(i), "sentence": sentence} for i, sentence in enumerate(sentences)} 11 | 12 | sentences = [example["sentence"] for example in self.data.values()] 13 | 14 | for example in zip(self.data.values()): 15 | example["input"] = example["input"].strip().split(' ') 16 | utils.create_token_anchors(example) 17 | 18 | for example in self.data.values(): 19 | example["token anchors"] = [[a["from"], a["to"]] for a in example["token anchors"]] 20 | 21 | utils.create_bert_tokens(self.data, args.encoder) 22 | 23 | super(RequestParser, self).__init__(fields, self.data) 24 | -------------------------------------------------------------------------------- /perin/data/parser/json_parser.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | from data.field.mini_torchtext.example import Example 3 | 4 | 5 | def example_from_json(obj, fields): 6 | ex = Example() 7 | for key, vals in fields.items(): 8 | if vals is not None: 9 | if not isinstance(vals, list): 10 | vals = [vals] 11 | for val in vals: 12 | # for processing the key likes 'foo.bar' 13 | name, field = val 14 | ks = key.split(".") 15 | 16 | def reducer(obj, key): 17 | if isinstance(obj, list): 18 | results = [] 19 | for data in obj: 20 | if key not in data: 21 | # key error 22 | raise ValueError("Specified key {} was not found in " "the input data".format(key)) 23 | else: 24 | results.append(data[key]) 25 | return results 26 | else: 27 | # key error 28 | if key not in obj: 29 | raise ValueError("Specified key {} was not found in " "the input data".format(key)) 30 | else: 31 | return obj[key] 32 | 33 | v = reduce(reducer, ks, obj) 34 | setattr(ex, name, field.preprocess(v)) 35 | return ex 36 | -------------------------------------------------------------------------------- /perin/data/parser/to_mrp/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/to_mrp/.DS_Store -------------------------------------------------------------------------------- /perin/data/parser/to_mrp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/to_mrp/__init__.py -------------------------------------------------------------------------------- /perin/data/parser/to_mrp/abstract_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | class AbstractParser: 5 | def __init__(self, dataset): 6 | self.dataset = dataset 7 | 8 | def create_nodes(self, prediction): 9 | return [ 10 | {"id": i, "label": self.label_to_str(l, prediction["anchors"][i], prediction)} 11 | for i, l in enumerate(prediction["labels"]) 12 | ] 13 | 14 | def label_to_str(self, label, anchors, prediction): 15 | return self.dataset.label_field.vocab.itos[label - 1] 16 | 17 | def create_edges(self, prediction, nodes): 18 | N = len(nodes) 19 | node_sets = [{"id": n, "set": set([n])} for n in range(N)] 20 | _, indices = prediction["edge presence"][:N, :N].reshape(-1).sort(descending=True) 21 | sources, targets = indices // N, indices % N 22 | 23 | edges = [] 24 | for i in range((N - 1) * N // 2): 25 | source, target = sources[i].item(), targets[i].item() 26 | p = prediction["edge presence"][source, target] 27 | 28 | if p < 0.5 and len(edges) >= N - 1: 29 | break 30 | 31 | if node_sets[source]["set"] is node_sets[target]["set"] and p < 0.5: 32 | continue 33 | 34 | self.create_edge(source, target, prediction, edges, nodes) 35 | 36 | if node_sets[source]["set"] is not node_sets[target]["set"]: 37 | from_set = node_sets[source]["set"] 38 | for n in node_sets[target]["set"]: 39 | from_set.add(n) 40 | node_sets[n]["set"] = from_set 41 | 42 | return edges 43 | 44 | def create_edge(self, source, target, prediction, edges, nodes): 45 | label = self.get_edge_label(prediction, source, target) 46 | edge = {"source": source, "target": target, "label": label} 47 | 48 | edges.append(edge) 49 | 50 | def create_anchors(self, prediction, nodes, join_contiguous=True, at_least_one=False, single_anchor=False, mode="anchors"): 51 | for i, node in enumerate(nodes): 52 | threshold = 0.5 if not at_least_one else min(0.5, prediction[mode][i].max().item()) 53 | node[mode] = (prediction[mode][i] >= threshold).nonzero(as_tuple=False).squeeze(-1) 54 | node[mode] = prediction["token intervals"][node[mode], :] 55 | 56 | if single_anchor and len(node[mode]) > 1: 57 | start = min(a[0].item() for a in node[mode]) 58 | end = max(a[1].item() for a in node[mode]) 59 | node[mode] = [{"from": start, "to": end}] 60 | continue 61 | 62 | node[mode] = [{"from": f.item(), "to": t.item()} for f, t in node[mode]] 63 | node[mode] = sorted(node[mode], key=lambda a: a["from"]) 64 | 65 | if join_contiguous and len(node[mode]) > 1: 66 | cleaned_anchors = [] 67 | end, start = node[mode][0]["from"], node[mode][0]["from"] 68 | for anchor in node[mode]: 69 | if end < anchor["from"]: 70 | cleaned_anchors.append({"from": start, "to": end}) 71 | start = anchor["from"] 72 | end = anchor["to"] 73 | cleaned_anchors.append({"from": start, "to": end}) 74 | 75 | node[mode] = cleaned_anchors 76 | 77 | return nodes 78 | 79 | def get_edge_label(self, prediction, source, target): 80 | return self.dataset.edge_label_field.vocab.itos[prediction["edge labels"][source, target].item()] 81 | -------------------------------------------------------------------------------- /perin/inference.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import argparse 5 | import torch 6 | import os 7 | 8 | from model.model import Model 9 | from data.dataset import Dataset 10 | from utility.initialize import initialize 11 | from config.params import Params 12 | from utility.predict import predict 13 | 14 | 15 | 16 | if __name__ == "__main__": 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("--checkpoint_dir", type=str, required=True) 19 | parser.add_argument("--data_directory", type=str, default="../dataset") 20 | args = parser.parse_args() 21 | 22 | checkpoint_dir = args.checkpoint_dir 23 | 24 | checkpoint = torch.load(f"{checkpoint_dir}/best_checkpoint.h5", map_location=torch.device('cpu')) 25 | args = Params().load_state_dict(checkpoint["args"]).init_data_paths() 26 | args.log_wandb = False 27 | 28 | 29 | 30 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 31 | 32 | dataset = Dataset(args, verbose=False) 33 | 34 | model = Model(dataset, args).to(device) 35 | model.load_state_dict(checkpoint["model"]) 36 | 37 | os.makedirs(f"{checkpoint_dir}/inference", exist_ok=True) 38 | 39 | print("inference of test data", flush=True) 40 | 41 | predict(model, dataset.test, args.test_data, args.raw_testing_data, args, None, f"{checkpoint_dir}/inference", device, mode="test") 42 | -------------------------------------------------------------------------------- /perin/model/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/.DS_Store -------------------------------------------------------------------------------- /perin/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/__init__.py -------------------------------------------------------------------------------- /perin/model/head/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/head/.DS_Store -------------------------------------------------------------------------------- /perin/model/head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/head/__init__.py -------------------------------------------------------------------------------- /perin/model/head/labeled_edge_head.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from model.head.abstract_head import AbstractHead 8 | from data.parser.to_mrp.labeled_edge_parser import LabeledEdgeParser 9 | from utility.cross_entropy import binary_cross_entropy 10 | from utility.hungarian_matching import match_label 11 | 12 | 13 | class LabeledEdgeHead(AbstractHead): 14 | def __init__(self, dataset, args, initialize): 15 | config = { 16 | "label": True, 17 | "edge presence": True, 18 | "edge label": True, 19 | "anchor": True 20 | } 21 | super(LabeledEdgeHead, self).__init__(dataset, args, config, initialize) 22 | 23 | self.top_node = nn.Parameter(torch.randn(1, 1, args.hidden_size), requires_grad=True) 24 | self.parser = LabeledEdgeParser(dataset) 25 | 26 | def init_label_classifier(self, dataset, args, config, initialize: bool): 27 | classifier = nn.Sequential( 28 | nn.Dropout(args.dropout_label), 29 | nn.Linear(args.hidden_size, 1, bias=True) 30 | ) 31 | if initialize: 32 | bias_init = torch.tensor([dataset.label_freqs[1]]) 33 | classifier[1].bias.data = (bias_init / (1.0 - bias_init)).log() 34 | 35 | return classifier 36 | 37 | def forward_label(self, decoder_output): 38 | return self.label_classifier(decoder_output) 39 | 40 | def forward_edge(self, decoder_output): 41 | top_node = self.top_node.expand(decoder_output.size(0), -1, -1) 42 | decoder_output = torch.cat([top_node, decoder_output], dim=1) 43 | return self.edge_classifier(decoder_output) 44 | 45 | def loss_label(self, prediction, target, mask, matching): 46 | prediction = prediction["label"] 47 | target = match_label( 48 | target["labels"][0], matching, prediction.shape[:-1], prediction.device, self.query_length 49 | ) 50 | return {"label": binary_cross_entropy(prediction.squeeze(-1), target.float(), mask, focal=self.focal)} 51 | 52 | def inference_label(self, prediction): 53 | return (prediction.squeeze(-1) > 0.0).long() 54 | 55 | def label_cost_matrix(self, output, batch, decoder_lens, b: int): 56 | if output["label"] is None: 57 | return 1.0 58 | 59 | target_labels = batch["anchored_labels"][b] # shape: (num_nodes, num_inputs, 2) 60 | label_prob = output["label"][b, : decoder_lens[b], :].sigmoid().unsqueeze(0) # shape: (1, num_queries, 1) 61 | label_prob = torch.cat([1.0 - label_prob, label_prob], dim=-1) # shape: (1, num_queries, 2) 62 | tgt_label = target_labels.repeat_interleave(self.query_length, dim=1) # shape: (num_nodes, num_queries, 2) 63 | cost_matrix = ((tgt_label * label_prob).sum(-1) * label_prob[:, :, 1:].sum(-1)).t().sqrt() # shape: (num_queries, num_nodes) 64 | 65 | return cost_matrix 66 | -------------------------------------------------------------------------------- /perin/model/model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from model.module.encoder import Encoder 8 | 9 | from model.module.transformer import Decoder 10 | from model.head.labeled_edge_head import LabeledEdgeHead 11 | from utility.utils import create_padding_mask 12 | 13 | 14 | class Model(nn.Module): 15 | def __init__(self, dataset, args, initialize=True): 16 | super(Model, self).__init__() 17 | self.encoder = Encoder(args, dataset) 18 | if args.n_layers > 0: 19 | self.decoder = Decoder(args) 20 | else: 21 | self.decoder = lambda x, *args: x # identity function, which ignores all arguments except the first one 22 | 23 | if args.graph_mode == "labeled-edge": 24 | self.head = LabeledEdgeHead(dataset, args, initialize) 25 | self.query_length = args.query_length 26 | self.dataset = dataset 27 | self.args = args 28 | 29 | def forward(self, batch, inference=False, **kwargs): 30 | every_input, word_lens = batch["every_input"] 31 | decoder_lens = self.query_length * word_lens 32 | batch_size, input_len = every_input.size(0), every_input.size(1) 33 | device = every_input.device 34 | 35 | encoder_mask = create_padding_mask(batch_size, input_len, word_lens, device) 36 | decoder_mask = create_padding_mask(batch_size, self.query_length * input_len, decoder_lens, device) 37 | 38 | encoder_output, decoder_input = self.encoder(batch["input"], batch["char_form_input"], batch["input_scatter"], input_len) 39 | 40 | decoder_output = self.decoder(decoder_input, encoder_output, decoder_mask, encoder_mask) 41 | 42 | if inference: 43 | return self.head.predict(encoder_output, decoder_output, encoder_mask, decoder_mask, batch) 44 | else: 45 | return self.head(encoder_output, decoder_output, encoder_mask, decoder_mask, batch) 46 | 47 | def get_params_for_optimizer(self, args): 48 | encoder_decay, encoder_no_decay = self.get_encoder_parameters(args.n_encoder_layers) 49 | decoder_decay, decoder_no_decay = self.get_decoder_parameters() 50 | 51 | parameters = [{"params": p, "weight_decay": args.encoder_weight_decay} for p in encoder_decay] 52 | parameters += [{"params": p, "weight_decay": 0.0} for p in encoder_no_decay] 53 | parameters += [ 54 | {"params": decoder_decay, "weight_decay": args.decoder_weight_decay}, 55 | {"params": decoder_no_decay, "weight_decay": 0.0}, 56 | ] 57 | return parameters 58 | 59 | def get_decoder_parameters(self): 60 | no_decay = ["bias", "LayerNorm.weight", "_norm.weight"] 61 | decay_params = (p for name, p in self.named_parameters() if not any(nd in name for nd in no_decay) and not name.startswith("encoder.bert") and p.requires_grad) 62 | no_decay_params = (p for name, p in self.named_parameters() if any(nd in name for nd in no_decay) and not name.startswith("encoder.bert") and p.requires_grad) 63 | 64 | return decay_params, no_decay_params 65 | 66 | def get_encoder_parameters(self, n_layers): 67 | no_decay = ["bias", "LayerNorm.weight", "_norm.weight"] 68 | decay_params = [ 69 | [p for name, p in self.named_parameters() if not any(nd in name for nd in no_decay) and name.startswith(f"encoder.bert.encoder.layer.{n_layers - 1 - i}.") and p.requires_grad] for i in range(n_layers) 70 | ] 71 | no_decay_params = [ 72 | [p for name, p in self.named_parameters() if any(nd in name for nd in no_decay) and name.startswith(f"encoder.bert.encoder.layer.{n_layers - 1 - i}.") and p.requires_grad] for i in range(n_layers) 73 | ] 74 | 75 | return decay_params, no_decay_params 76 | -------------------------------------------------------------------------------- /perin/model/module/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/module/.DS_Store -------------------------------------------------------------------------------- /perin/model/module/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/module/__init__.py -------------------------------------------------------------------------------- /perin/model/module/anchor_classifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from model.module.biaffine import Biaffine 8 | 9 | 10 | class AnchorClassifier(nn.Module): 11 | def __init__(self, dataset, args, initialize: bool, bias=True, mode="anchor"): 12 | super(AnchorClassifier, self).__init__() 13 | 14 | self.token_f = nn.Linear(args.hidden_size, args.hidden_size_anchor) 15 | self.label_f = nn.Linear(args.hidden_size, args.hidden_size_anchor) 16 | self.dropout = nn.Dropout(args.dropout_anchor) 17 | 18 | if bias and initialize: 19 | bias_init = torch.tensor([getattr(dataset, f"{mode}_freq")]) 20 | bias_init = (bias_init / (1.0 - bias_init)).log() 21 | else: 22 | bias_init = None 23 | 24 | self.output = Biaffine(args.hidden_size_anchor, 1, bias=bias, bias_init=bias_init) 25 | 26 | def forward(self, label, tokens, encoder_mask): 27 | tokens = self.dropout(F.elu(self.token_f(tokens))) # shape: (B, T_w, H) 28 | label = self.dropout(F.elu(self.label_f(label))) # shape: (B, T_l, H) 29 | anchor = self.output(label, tokens).squeeze(-1) # shape: (B, T_l, T_w) 30 | 31 | anchor = anchor.masked_fill(encoder_mask.unsqueeze(1), float("-inf")) # shape: (B, T_l, T_w) 32 | return anchor 33 | -------------------------------------------------------------------------------- /perin/model/module/biaffine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch.nn as nn 5 | from model.module.bilinear import Bilinear 6 | 7 | 8 | class Biaffine(nn.Module): 9 | def __init__(self, input_dim, output_dim, bias=True, bias_init=None): 10 | super(Biaffine, self).__init__() 11 | 12 | self.linear_1 = nn.Linear(input_dim, output_dim, bias=False) 13 | self.linear_2 = nn.Linear(input_dim, output_dim, bias=False) 14 | 15 | self.bilinear = Bilinear(input_dim, input_dim, output_dim, bias=bias) 16 | if bias_init is not None: 17 | self.bilinear.bias.data = bias_init 18 | 19 | def forward(self, x, y): 20 | return self.bilinear(x, y) + self.linear_1(x).unsqueeze(2) + self.linear_2(y).unsqueeze(1) 21 | -------------------------------------------------------------------------------- /perin/model/module/bilinear.py: -------------------------------------------------------------------------------- 1 | # from https://github.com/NLPInBLCU/BiaffineDependencyParsing/blob/master/modules/biaffine.py 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class Bilinear(nn.Module): 8 | """ 9 | 使用版本 10 | A bilinear module that deals with broadcasting for efficient memory usage. 11 | Input: tensors of sizes (N x L1 x D1) and (N x L2 x D2) 12 | Output: tensor of size (N x L1 x L2 x O)""" 13 | 14 | def __init__(self, input1_size, input2_size, output_size, bias=True): 15 | super(Bilinear, self).__init__() 16 | 17 | self.input1_size = input1_size 18 | self.input2_size = input2_size 19 | self.output_size = output_size 20 | 21 | self.weight = nn.Parameter(torch.Tensor(input1_size, input2_size, output_size)) 22 | self.bias = nn.Parameter(torch.Tensor(output_size)) if bias else None 23 | 24 | self.reset_parameters() 25 | 26 | def reset_parameters(self): 27 | nn.init.zeros_(self.weight) 28 | 29 | def forward(self, input1, input2): 30 | input1_size = list(input1.size()) 31 | input2_size = list(input2.size()) 32 | 33 | intermediate = torch.mm(input1.view(-1, input1_size[-1]), self.weight.view(-1, self.input2_size * self.output_size),) 34 | 35 | input2 = input2.transpose(1, 2) 36 | output = intermediate.view(input1_size[0], input1_size[1] * self.output_size, input2_size[2]).bmm(input2) 37 | 38 | output = output.view(input1_size[0], input1_size[1], self.output_size, input2_size[1]).transpose(2, 3) 39 | 40 | if self.bias is not None: 41 | output = output + self.bias 42 | 43 | return output 44 | -------------------------------------------------------------------------------- /perin/model/module/char_embedding.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence 7 | 8 | 9 | class CharEmbedding(nn.Module): 10 | def __init__(self, vocab_size: int, embedding_size: int, output_size: int): 11 | super(CharEmbedding, self).__init__() 12 | 13 | self.embedding = nn.Embedding(vocab_size, embedding_size, sparse=False) 14 | self.layer_norm = nn.LayerNorm(embedding_size) 15 | self.gru = nn.GRU(embedding_size, embedding_size, num_layers=1, bidirectional=True) 16 | self.out_linear = nn.Linear(2*embedding_size, output_size) 17 | self.layer_norm_2 = nn.LayerNorm(output_size) 18 | 19 | def forward(self, words, sentence_lens, word_lens): 20 | # input shape: (B, W, C) 21 | n_words = words.size(1) 22 | sentence_lens = sentence_lens.cpu() 23 | sentence_packed = pack_padded_sequence(words, sentence_lens, batch_first=True) # shape: (B*W, C) 24 | lens_packed = pack_padded_sequence(word_lens, sentence_lens, batch_first=True) # shape: (B*W) 25 | word_packed = pack_padded_sequence(sentence_packed.data, lens_packed.data.cpu(), batch_first=True, enforce_sorted=False) # shape: (B*W*C) 26 | 27 | embedded = self.embedding(word_packed.data) # shape: (B*W*C, D) 28 | embedded = self.layer_norm(embedded) # shape: (B*W*C, D) 29 | 30 | embedded_packed = PackedSequence(embedded, word_packed[1], word_packed[2], word_packed[3]) 31 | _, embedded = self.gru(embedded_packed) # shape: (layers * 2, B*W, D) 32 | 33 | embedded = embedded[-2:, :, :].transpose(0, 1).flatten(1, 2) # shape: (B*W, 2*D) 34 | embedded = F.relu(embedded) 35 | embedded = self.out_linear(embedded) 36 | embedded = self.layer_norm_2(embedded) 37 | 38 | embedded, _ = pad_packed_sequence( 39 | PackedSequence(embedded, sentence_packed[1], sentence_packed[2], sentence_packed[3]), batch_first=True, total_length=n_words, 40 | ) # shape: (B, W, 2*D) 41 | 42 | return embedded # shape: (B, W, 2*D) 43 | -------------------------------------------------------------------------------- /perin/model/module/edge_classifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from model.module.biaffine import Biaffine 8 | 9 | 10 | class EdgeClassifier(nn.Module): 11 | def __init__(self, dataset, args, initialize: bool, presence: bool, label: bool): 12 | super(EdgeClassifier, self).__init__() 13 | 14 | self.presence = presence 15 | if self.presence: 16 | if initialize: 17 | presence_init = torch.tensor([dataset.edge_presence_freq]) 18 | presence_init = (presence_init / (1.0 - presence_init)).log() 19 | else: 20 | presence_init = None 21 | 22 | self.edge_presence = EdgeBiaffine( 23 | args.hidden_size, args.hidden_size_edge_presence, 1, args.dropout_edge_presence, bias_init=presence_init 24 | ) 25 | 26 | self.label = label 27 | if self.label: 28 | label_init = (dataset.edge_label_freqs / (1.0 - dataset.edge_label_freqs)).log() if initialize else None 29 | n_labels = len(dataset.edge_label_field.vocab) 30 | self.edge_label = EdgeBiaffine( 31 | args.hidden_size, args.hidden_size_edge_label, n_labels, args.dropout_edge_label, bias_init=label_init 32 | ) 33 | 34 | def forward(self, x): 35 | presence, label = None, None 36 | 37 | if self.presence: 38 | presence = self.edge_presence(x).squeeze(-1) # shape: (B, T, T) 39 | if self.label: 40 | label = self.edge_label(x) # shape: (B, T, T, O_1) 41 | 42 | return presence, label 43 | 44 | 45 | class EdgeBiaffine(nn.Module): 46 | def __init__(self, hidden_dim, bottleneck_dim, output_dim, dropout, bias_init=None): 47 | super(EdgeBiaffine, self).__init__() 48 | self.hidden = nn.Linear(hidden_dim, 2 * bottleneck_dim) 49 | self.output = Biaffine(bottleneck_dim, output_dim, bias_init=bias_init) 50 | self.dropout = nn.Dropout(dropout) 51 | 52 | def forward(self, x): 53 | x = self.dropout(F.elu(self.hidden(x))) # shape: (B, T, 2H) 54 | predecessors, current = x.chunk(2, dim=-1) # shape: (B, T, H), (B, T, H) 55 | edge = self.output(current, predecessors) # shape: (B, T, T, O) 56 | return edge 57 | -------------------------------------------------------------------------------- /perin/model/module/transformer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | 8 | def checkpoint(module, *args, **kwargs): 9 | dummy = torch.empty(1, requires_grad=True) 10 | return torch.utils.checkpoint.checkpoint(lambda d, *a, **k: module(*a, **k), dummy, *args, **kwargs) 11 | 12 | 13 | class Attention(nn.Module): 14 | def __init__(self, args): 15 | super().__init__() 16 | self.attention = nn.MultiheadAttention(args.hidden_size, args.n_attention_heads, args.dropout_transformer_attention) 17 | self.dropout = nn.Dropout(args.dropout_transformer) 18 | 19 | def forward(self, q_input, kv_input, mask=None): 20 | output, _ = self.attention(q_input, kv_input, kv_input, mask, need_weights=False) 21 | output = self.dropout(output) 22 | return output 23 | 24 | 25 | class FeedForward(nn.Module): 26 | def __init__(self, args): 27 | super().__init__() 28 | self.f = nn.Sequential( 29 | nn.Linear(args.hidden_size, args.hidden_size_ff), 30 | self._get_activation_f(args.activation), 31 | nn.Dropout(args.dropout_transformer), 32 | nn.Linear(args.hidden_size_ff, args.hidden_size), 33 | nn.Dropout(args.dropout_transformer), 34 | ) 35 | 36 | def forward(self, x): 37 | return self.f(x) 38 | 39 | def _get_activation_f(self, activation: str): 40 | return {"relu": nn.ReLU, "gelu": nn.GELU}[activation]() 41 | 42 | 43 | class DecoderLayer(nn.Module): 44 | def __init__(self, args): 45 | super().__init__() 46 | self.self_f = Attention(args) 47 | #self.cross_f = Attention(args) 48 | self.feedforward_f = FeedForward(args) 49 | 50 | self.pre_self_norm = nn.LayerNorm(args.hidden_size) if args.pre_norm else nn.Identity() 51 | #self.pre_cross_norm = nn.LayerNorm(args.hidden_size) if args.pre_norm else nn.Identity() 52 | self.pre_feedforward_norm = nn.LayerNorm(args.hidden_size) if args.pre_norm else nn.Identity() 53 | self.post_self_norm = nn.Identity() if args.pre_norm else nn.LayerNorm(args.hidden_size) 54 | #self.post_cross_norm = nn.Identity() if args.pre_norm else nn.LayerNorm(args.hidden_size) 55 | self.post_feedforward_norm = nn.Identity() if args.pre_norm else nn.LayerNorm(args.hidden_size) 56 | 57 | def forward(self, x, encoder_output, x_mask, encoder_mask): 58 | x_ = self.pre_self_norm(x) 59 | x = self.post_self_norm(x + self.self_f(x_, x_, x_mask)) 60 | 61 | #x_ = self.pre_cross_norm(x) 62 | #x = self.post_cross_norm(x + self.cross_f(x_, encoder_output, encoder_mask)) 63 | 64 | x_ = self.pre_feedforward_norm(x) 65 | x = self.post_feedforward_norm(x + self.feedforward_f(x_)) 66 | 67 | return x 68 | 69 | 70 | class Decoder(nn.Module): 71 | def __init__(self, args): 72 | super(Decoder, self).__init__() 73 | self.layers = nn.ModuleList([DecoderLayer(args) for _ in range(args.n_layers)]) 74 | 75 | def forward(self, target, encoder, target_mask, encoder_mask): 76 | target = target.transpose(0, 1) # shape: (T, B, D) 77 | encoder = encoder.transpose(0, 1) # shape: (T, B, D) 78 | 79 | for layer in self.layers[:-1]: 80 | target = checkpoint(layer, target, encoder, target_mask, encoder_mask) 81 | target = self.layers[-1](target, encoder, target_mask, encoder_mask) # don't checkpoint due to grad_norm 82 | target = target.transpose(0, 1) # shape: (B, T, D) 83 | 84 | return target 85 | -------------------------------------------------------------------------------- /perin/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --job-name=ACE 4 | #SBATCH --account=ec30 5 | #SBATCH --time=02-00:00:00 6 | #SBATCH --nodes=1 7 | #SBATCH --ntasks=1 8 | #SBATCH --cpus-per-task=2 9 | #SBATCH --ntasks-per-node=1 10 | #SBATCH --mem-per-cpu=8G 11 | #SBATCH --partition=accel 12 | #SBATCH --gpus=1 13 | 14 | 15 | # sanity: exit on all errors and disallow unset environment variables 16 | set -o errexit 17 | set -o nounset 18 | 19 | # the important bit: unload all current modules (just in case) and load only the necessary ones 20 | 21 | module purge 22 | 23 | module use -a /fp/projects01/ec30/software/easybuild/modules/all/ 24 | module load nlpl-pytorch/1.7.1-foss-2019b-cuda-11.1.1-Python-3.7.4 25 | module load nlpl-transformers/4.14.1-foss-2019b-Python-3.7.4 26 | module load nlpl-nlptools/2021.01-foss-2019b-Python-3.7.4 27 | module load nlpl-scipy-ecosystem/2021.01-foss-2019b-Python-3.7.4 28 | module load sentencepiece/0.1.96-foss-2019b-Python-3.7.4 29 | module load nlpl-nltk/3.5-foss-2019b-Python-3.7.4 30 | module load nlpl-wandb/0.12.6-foss-2019b-Python-3.7.4 31 | 32 | 33 | #nvidia-smi --query-gpu=timestamp,utilization.gpu,utilization.memory \ 34 | # --format=csv --loop=1 > "gpu_util-$SLURM_JOB_ID.csv" & 35 | #NVIDIA_MONITOR_PID=$! # Capture PID of monitoring process 36 | 37 | TRANSFORMERS_OFFLINE=1 python3 train.py --log_wandb --config "$1" --name "$2" 38 | -------------------------------------------------------------------------------- /perin/run_infer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --job-name=ACE_EVAL 4 | #SBATCH --account=ec30 5 | #SBATCH --time=02-00:00:00 6 | #SBATCH --nodes=1 7 | #SBATCH --ntasks=1 8 | #SBATCH --cpus-per-task=2 9 | #SBATCH --ntasks-per-node=1 10 | #SBATCH --mem-per-cpu=8G 11 | #SBATCH --partition=accel 12 | #SBATCH --gpus=1 13 | 14 | 15 | # sanity: exit on all errors and disallow unset environment variables 16 | set -o errexit 17 | set -o nounset 18 | 19 | # the important bit: unload all current modules (just in case) and load only the necessary ones 20 | 21 | module purge 22 | 23 | module use -a /fp/projects01/ec30/software/easybuild/modules/all/ 24 | module load nlpl-pytorch/1.7.1-foss-2019b-cuda-11.1.1-Python-3.7.4 25 | module load nlpl-transformers/4.14.1-foss-2019b-Python-3.7.4 26 | module load nlpl-nlptools/2021.01-foss-2019b-Python-3.7.4 27 | module load nlpl-scipy-ecosystem/2021.01-foss-2019b-Python-3.7.4 28 | module load sentencepiece/0.1.96-foss-2019b-Python-3.7.4 29 | module load nlpl-nltk/3.5-foss-2019b-Python-3.7.4 30 | module load nlpl-wandb/0.12.6-foss-2019b-Python-3.7.4 31 | 32 | python3 inference.py --checkpoint_dir "$1" -------------------------------------------------------------------------------- /perin/utility/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/utility/.DS_Store -------------------------------------------------------------------------------- /perin/utility/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/utility/__init__.py -------------------------------------------------------------------------------- /perin/utility/autoclip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | 8 | class AutoClip: 9 | def __init__(self, parameters, initial_clipping=0.1, percentile=50, history_len=1000): 10 | self.parameters = list(parameters) 11 | self.grad_history = [torch.full([history_len], initial_clipping) for _ in self.parameters] 12 | 13 | self.index = 0 14 | self.history_len = history_len 15 | self.percentile = percentile 16 | 17 | @torch.no_grad() 18 | def __call__(self): 19 | self._add_to_history(self.parameters) 20 | 21 | grad_norms = [] 22 | for parameter, history in zip(self.parameters, self.grad_history): 23 | if parameter.grad is None or not parameter.grad.abs().sum().is_nonzero(): 24 | continue 25 | 26 | clip_value = self._get_percentile(history, self.percentile) 27 | grad_norms.append(nn.utils.clip_grad_norm_(parameter, clip_value).item()) 28 | 29 | return sum(grad_norms) / len(grad_norms) 30 | 31 | def _add_to_history(self, parameters): 32 | for i, param in enumerate(parameters): 33 | if param.grad is None or not param.grad.abs().sum().is_nonzero(): 34 | continue 35 | 36 | self.grad_history[i][self.index] = param.grad.data.norm(2) 37 | 38 | self.index = (self.index + 1) % self.history_len 39 | 40 | def _get_percentile(self, tensor, percentile): 41 | k = 1 + round(0.01 * percentile * (tensor.numel() - 1)) 42 | return tensor.kthvalue(k).values.item() 43 | -------------------------------------------------------------------------------- /perin/utility/cross_entropy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | 8 | def masked_sum(loss, mask, label_weight=1, eps=1e-8, reduction=True): 9 | if mask is not None: 10 | loss = loss.masked_fill(mask, 0.0) 11 | if reduction: 12 | return loss.sum() / (((1 - mask.long()) * label_weight).sum() + eps) 13 | 14 | if reduction: 15 | return loss.mean() 16 | 17 | return loss 18 | 19 | 20 | def cross_entropy(log_prob, target, mask, focal=False, label_weight=None, reduction=True): 21 | target = target.unsqueeze(-1) 22 | if focal: 23 | focal_coeff = log_prob.exp().gather(-1, target).squeeze(-1) 24 | focal_coeff = (1.0 - focal_coeff) ** 2 25 | else: 26 | focal_coeff = 1.0 27 | 28 | loss = -focal_coeff * log_prob.gather(-1, target).squeeze(-1) 29 | 30 | if label_weight is not None: 31 | loss = loss * label_weight 32 | return masked_sum(loss, mask, label_weight=label_weight, reduction=reduction) 33 | else: 34 | return masked_sum(loss, mask, reduction=reduction) 35 | 36 | 37 | def binary_cross_entropy(logits, target, mask, focal=False, reduction=True): 38 | if focal: 39 | prob = logits.sigmoid() 40 | focal_coeff = target * prob + (1.0 - target) * (1.0 - prob) 41 | focal_coeff = (1.0 - focal_coeff) ** 2 42 | else: 43 | focal_coeff = 1.0 44 | 45 | loss = focal_coeff * F.binary_cross_entropy_with_logits(logits, target, reduction="none") 46 | return masked_sum(loss, mask, reduction=reduction) 47 | -------------------------------------------------------------------------------- /perin/utility/hungarian_matching.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | from scipy.optimize import linear_sum_assignment 6 | 7 | 8 | @torch.no_grad() 9 | def match_label(target, matching, shape, device, compute_mask=True): 10 | idx = _get_src_permutation_idx(matching) 11 | 12 | target_classes = torch.zeros(shape, dtype=torch.long, device=device) 13 | target_classes[idx] = torch.cat([t[J] for t, (_, J) in zip(target, matching)]) 14 | 15 | return target_classes 16 | 17 | 18 | @torch.no_grad() 19 | def match_anchor(anchor, matching, shape, device): 20 | target, _ = anchor 21 | 22 | idx = _get_src_permutation_idx(matching) 23 | target_classes = torch.zeros(shape, dtype=torch.long, device=device) 24 | target_classes[idx] = torch.cat([t[J, :] for t, (_, J) in zip(target, matching)]) 25 | 26 | matched_mask = torch.ones(shape[:2], dtype=torch.bool, device=device) 27 | matched_mask[idx] = False 28 | 29 | return target_classes, matched_mask 30 | 31 | 32 | def _get_src_permutation_idx(indices): 33 | # permute predictions following indices 34 | batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)]) 35 | src_idx = torch.cat([src for (src, _) in indices]) 36 | return batch_idx, src_idx 37 | 38 | 39 | @torch.no_grad() 40 | def get_matching(cost_matrices): 41 | output = [] 42 | for cost_matrix in cost_matrices: 43 | indices = linear_sum_assignment(cost_matrix, maximize=True) 44 | indices = (torch.tensor(indices[0], dtype=torch.long), torch.tensor(indices[1], dtype=torch.long)) 45 | output.append(indices) 46 | 47 | return output 48 | 49 | 50 | def sort_by_target(matchings): 51 | new_matching = [] 52 | for matching in matchings: 53 | source, target = matching 54 | target, indices = target.sort() 55 | source = source[indices] 56 | new_matching.append((source, target)) 57 | return new_matching 58 | 59 | 60 | def reorder(hidden, matchings, max_length): 61 | batch_size, _, hidden_dim = hidden.shape 62 | matchings = sort_by_target(matchings) 63 | 64 | result = torch.zeros(batch_size, max_length, hidden_dim, device=hidden.device) 65 | for b in range(batch_size): 66 | indices = matchings[b][0] 67 | result[b, : len(indices), :] = hidden[b, indices, :] 68 | 69 | return result 70 | -------------------------------------------------------------------------------- /perin/utility/initialize.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | import os 4 | 5 | 6 | def seed_everything(seed_value=42): 7 | os.environ['PYTHONHASHSEED'] = str(seed_value) 8 | random.seed(seed_value) 9 | torch.manual_seed(seed_value) 10 | torch.cuda.manual_seed_all(seed_value) 11 | 12 | torch.backends.cudnn.enabled = True 13 | torch.backends.cudnn.deterministic = True 14 | torch.backends.cudnn.benchmark = False 15 | 16 | 17 | def initialize(args, init_wandb: bool): 18 | seed_everything(args.seed) 19 | 20 | if init_wandb: 21 | import wandb 22 | tags = args.framework, args.language 23 | wandb.init(name=f"{args.framework}_{args.language}_{args.graph_mode}_{args.name}", config=args, project="eGraph", tags=list(tags)) 24 | print("Connection to Weights & Biases initialized.", flush=True) 25 | -------------------------------------------------------------------------------- /perin/utility/loading_bar.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | class LoadingBar: 5 | def __init__(self, length: int = 40): 6 | self.length = length 7 | self.symbols = ["┈", "░", "▒", "▓"] 8 | 9 | def __call__(self, progress: float) -> str: 10 | p = int(progress * self.length * 4 + 0.5) 11 | d, r = p // 4, p % 4 12 | return "┠┈" + d * "█" + ((self.symbols[r]) + max(0, self.length - 1 - d) * "┈" if p < self.length * 4 else "") + "┈┨" 13 | -------------------------------------------------------------------------------- /perin/utility/parser_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import json 5 | from itertools import chain 6 | from transformers import AutoTokenizer 7 | 8 | from utility.subtokenize import subtokenize 9 | 10 | import os 11 | os.environ["TOKENIZERS_PARALLELISM"] = "true" 12 | 13 | 14 | def load_dataset(path): 15 | data = {} 16 | with open(path, encoding="utf8") as f: 17 | for sentence in f.readlines(): 18 | sentence = json.loads(sentence) 19 | data[sentence["id"]] = sentence 20 | 21 | if "nodes" not in sentence: 22 | sentence["nodes"] = [] 23 | 24 | if "edges" not in sentence: 25 | sentence["edges"] = [] 26 | 27 | for sample in list(data.values()): 28 | sample["sentence"] = sample["input"] 29 | sample["input"] = sample["sentence"].split(' ') 30 | sample["token anchors"], offset = [], 0 31 | for token in sample["input"]: 32 | sample["token anchors"].append({"from": offset, "to": offset + len(token)}) 33 | offset += len(token) + 1 34 | return data 35 | 36 | 37 | def node_generator(data): 38 | for d in data.values(): 39 | for n in d["nodes"]: 40 | yield n, d 41 | 42 | 43 | def anchor_ids_from_intervals(data): 44 | for node, sentence in node_generator(data): 45 | if "anchors" not in node: 46 | node["anchors"] = [] 47 | node["anchors"] = sorted(node["anchors"], key=lambda a: (a["from"], a["to"])) 48 | node["token references"] = set() 49 | 50 | for anchor in node["anchors"]: 51 | for i, token_anchor in enumerate(sentence["token anchors"]): 52 | if token_anchor["to"] <= anchor["from"]: 53 | continue 54 | if token_anchor["from"] >= anchor["to"]: 55 | break 56 | 57 | node["token references"].add(i) 58 | 59 | node["anchor intervals"] = node["anchors"] 60 | node["anchors"] = sorted(list(node["token references"])) 61 | del node["token references"] 62 | 63 | for sentence in data.values(): 64 | sentence["token anchors"] = [[a["from"], a["to"]] for a in sentence["token anchors"]] 65 | 66 | 67 | def create_bert_tokens(data, encoder: str): 68 | tokenizer = AutoTokenizer.from_pretrained(encoder, use_fast=True) 69 | 70 | for sentence in data.values(): 71 | sentence["bert input"], sentence["to scatter"] = subtokenize(sentence["input"], tokenizer) 72 | 73 | 74 | def create_edges(sentence, label_f=None): 75 | N = len(sentence["nodes"]) 76 | 77 | sentence["edge presence"] = [N, N, []] 78 | sentence["edge labels"] = [N, N, []] 79 | 80 | for e in sentence["edges"]: 81 | source, target = e["source"], e["target"] 82 | label = e["label"] if "label" in e else "none" 83 | 84 | if label_f is not None: 85 | label = label_f(label) 86 | 87 | sentence["edge presence"][-1].append((source, target, 1)) 88 | sentence["edge labels"][-1].append((source, target, label)) 89 | 90 | edge_counter = len(sentence["edge presence"][-1]) 91 | return edge_counter 92 | -------------------------------------------------------------------------------- /perin/utility/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch 4 | import sys 5 | 6 | from subprocess import run 7 | from data.batch import Batch 8 | 9 | sys.path.append("../evaluation") 10 | from evaluate_single_dataset import evaluate 11 | 12 | 13 | def predict(model, data, input_path, raw_input_path, args, logger, output_directory, device, mode="validation", epoch=None): 14 | model.eval() 15 | 16 | framework, language = args.framework, args.language 17 | sentences = {} 18 | with open(input_path, encoding="utf8") as f: 19 | for line in f.readlines(): 20 | line = json.loads(line) 21 | line["nodes"], line["edges"], line["tops"] = [], [], [] 22 | line["framework"], line["language"] = framework, language 23 | sentences[line["id"]] = line 24 | 25 | for i, batch in enumerate(data): 26 | with torch.no_grad(): 27 | predictions = model(Batch.to(batch, device), inference=True) 28 | for prediction in predictions: 29 | for key, value in prediction.items(): 30 | sentences[prediction["id"]][key] = value 31 | 32 | if epoch is not None: 33 | output_path = f"{output_directory}/prediction_{mode}_{epoch}_{framework}_{language}.json" 34 | else: 35 | output_path = f"{output_directory}/prediction.json" 36 | 37 | with open(output_path, "w", encoding="utf8") as f: 38 | for sentence in sentences.values(): 39 | json.dump(sentence, f, ensure_ascii=False) 40 | f.write("\n") 41 | f.flush() 42 | 43 | run(["./convert.sh", output_path]) 44 | 45 | if raw_input_path: 46 | results = evaluate(raw_input_path, f"{output_path}_converted") 47 | print(mode, results, flush=True) 48 | 49 | if logger is not None: 50 | logger.log_evaluation(results, mode, epoch) 51 | 52 | return results 53 | -------------------------------------------------------------------------------- /perin/utility/schedule/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/utility/schedule/__init__.py -------------------------------------------------------------------------------- /perin/utility/schedule/linear_lr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import math 5 | 6 | 7 | class LinearLr: 8 | def __init__(self, param_group, learning_rate: float, total_steps: int, delay: bool, multiplier: int): 9 | self.total_steps = total_steps 10 | self.delay_steps = total_steps / 20 if delay else 0 11 | self.max_lr = learning_rate 12 | self.steps = 0 13 | self.param_group = param_group 14 | self.decay_multiplier = multiplier 15 | 16 | def __call__(self, _): 17 | self.steps += 1 18 | 19 | if self.steps < self.delay_steps: 20 | lr = 0.0 21 | elif self.steps < self.total_steps / 10: 22 | lr = self.max_lr * (self.steps - self.delay_steps) / (self.total_steps / 10 - self.delay_steps) 23 | else: 24 | max_lr = self.max_lr - self.max_lr / self.decay_multiplier 25 | min_lr = self.max_lr / self.decay_multiplier 26 | lr = max_lr * (math.cos(math.pi * (self.steps - self.total_steps / 10) / (self.total_steps * 9 / 10)) + 1) / 2 + min_lr 27 | #lr = self.max_lr * (self.total_steps - self.steps) / (self.total_steps * 9 / 10) 28 | 29 | # Safety first! 30 | if lr < 0.0: 31 | lr = 0.0 32 | 33 | self.param_group["lr"] = lr 34 | 35 | def lr(self) -> float: 36 | return self.param_group["lr"] 37 | -------------------------------------------------------------------------------- /perin/utility/schedule/multi_scheduler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | from utility.schedule.linear_lr import LinearLr 5 | 6 | 7 | def multi_scheduler_wrapper(optimizer, args, steps_per_epoch): 8 | n_layers = (len(optimizer.param_groups) - 2) // 2 9 | 10 | return MultiScheduler( 11 | [ 12 | LinearLr(optimizer.param_groups[i], args.encoder_learning_rate * (args.layerwise_lr_decay ** i), args.epochs * steps_per_epoch, False, args.lr_decay_multiplier) 13 | for i in range(n_layers) 14 | ] 15 | + 16 | [ 17 | LinearLr(optimizer.param_groups[n_layers + i], args.encoder_learning_rate * (args.layerwise_lr_decay ** i), args.epochs * steps_per_epoch, False, args.lr_decay_multiplier) 18 | for i in range(n_layers) 19 | ] 20 | + 21 | [ 22 | LinearLr(optimizer.param_groups[-2], args.decoder_learning_rate, args.epochs * steps_per_epoch, False, args.lr_decay_multiplier), 23 | LinearLr(optimizer.param_groups[-1], args.decoder_learning_rate, args.epochs * steps_per_epoch, False, args.lr_decay_multiplier) 24 | ] 25 | ) 26 | 27 | 28 | class MultiScheduler: 29 | def __init__(self, schedulers): 30 | self.schedulers = schedulers 31 | 32 | def __call__(self, epoch): 33 | for scheduler in self.schedulers: 34 | scheduler(epoch) 35 | 36 | def lr(self) -> float: 37 | return [scheduler.lr() for scheduler in self.schedulers] 38 | -------------------------------------------------------------------------------- /perin/utility/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | 4 | import torch 5 | from PIL import Image 6 | 7 | 8 | def create_padding_mask(batch_size, total_length, lengths, device): 9 | mask = torch.arange(total_length, device=device).expand(batch_size, total_length) 10 | mask = mask >= lengths.unsqueeze(1) # shape: (B, T) 11 | return mask 12 | 13 | 14 | def resize_to_square(image, target_size: int, background_color="white"): 15 | width, height = image.size 16 | if width / 2 > height: 17 | result = Image.new(image.mode, (width, width // 2), background_color) 18 | result.paste(image, (0, (width // 2 - height) // 2)) 19 | image = result 20 | elif height * 2 > width: 21 | result = Image.new(image.mode, (height * 2, height), background_color) 22 | result.paste(image, ((height * 2 - width) // 2, 0)) 23 | image = result 24 | 25 | image = image.resize([target_size * 2, target_size], resample=Image.BICUBIC) 26 | return image 27 | -------------------------------------------------------------------------------- /preprocess/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/preprocess/.DS_Store --------------------------------------------------------------------------------