├── .DS_Store
├── README.md
├── convert_to_mrp.sh
├── dataset
├── .DS_Store
├── labeled_edge_mrp
│ ├── .DS_Store
│ ├── ace_en
│ │ └── .DS_Store
│ ├── ace_p_en
│ │ └── .DS_Store
│ ├── ace_pp_en
│ │ └── .DS_Store
│ └── ace_ppp_en
│ │ └── .DS_Store
├── raw
│ ├── .DS_Store
│ ├── ace_en
│ │ └── .DS_Store
│ ├── ace_p_en
│ │ └── .DS_Store
│ └── ace_ppp_en
│ │ └── .DS_Store
├── splits
│ ├── dev.txt
│ ├── test.txt
│ └── train.txt
└── splits2
│ ├── dev.doc.txt
│ ├── test.doc.txt
│ └── train.doc.txt
├── evaluation
├── .DS_Store
├── evaluate.py
└── evaluate_single_dataset.py
├── mtool
├── .DS_Store
├── .appveyor.yml
├── .travis.yml
├── LICENSE
├── Makefile
├── README.md
├── __pycache__
│ ├── analyzer.cpython-37.pyc
│ ├── analyzer.cpython-38.pyc
│ ├── analyzer.cpython-39.pyc
│ ├── graph.cpython-37.pyc
│ ├── graph.cpython-38.pyc
│ ├── graph.cpython-39.pyc
│ ├── inspector.cpython-37.pyc
│ ├── inspector.cpython-38.pyc
│ ├── inspector.cpython-39.pyc
│ ├── main.cpython-37.pyc
│ ├── treewidth.cpython-37.pyc
│ ├── treewidth.cpython-38.pyc
│ └── treewidth.cpython-39.pyc
├── analyzer.py
├── codec
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ ├── __init__.cpython-38.pyc
│ │ ├── __init__.cpython-39.pyc
│ │ ├── ace.cpython-39.pyc
│ │ ├── amr.cpython-37.pyc
│ │ ├── amr.cpython-38.pyc
│ │ ├── amr.cpython-39.pyc
│ │ ├── conllu.cpython-37.pyc
│ │ ├── conllu.cpython-38.pyc
│ │ ├── conllu.cpython-39.pyc
│ │ ├── eds.cpython-37.pyc
│ │ ├── eds.cpython-38.pyc
│ │ ├── eds.cpython-39.pyc
│ │ ├── mrp.cpython-37.pyc
│ │ ├── mrp.cpython-38.pyc
│ │ ├── mrp.cpython-39.pyc
│ │ ├── norec.cpython-37.pyc
│ │ ├── norec.cpython-38.pyc
│ │ ├── norec.cpython-39.pyc
│ │ ├── pmb.cpython-37.pyc
│ │ ├── pmb.cpython-38.pyc
│ │ ├── pmb.cpython-39.pyc
│ │ ├── sdp.cpython-37.pyc
│ │ ├── sdp.cpython-38.pyc
│ │ ├── sdp.cpython-39.pyc
│ │ ├── treex.cpython-37.pyc
│ │ ├── treex.cpython-38.pyc
│ │ ├── treex.cpython-39.pyc
│ │ ├── ucca.cpython-37.pyc
│ │ ├── ucca.cpython-38.pyc
│ │ └── ucca.cpython-39.pyc
│ ├── ace.py
│ ├── amr.py
│ ├── conllu.py
│ ├── eds.py
│ ├── mrp.py
│ ├── norec.py
│ ├── pmb.py
│ ├── sdp.py
│ ├── treex.py
│ └── ucca.py
├── data
│ ├── .DS_Store
│ ├── sample
│ │ ├── Makefile
│ │ ├── README.txt
│ │ ├── amr
│ │ │ ├── wsj.amr
│ │ │ └── wsj.mrp
│ │ ├── dm
│ │ │ ├── wsj.mrp
│ │ │ └── wsj.sdp
│ │ ├── eds
│ │ │ ├── wsj.eds
│ │ │ └── wsj.mrp
│ │ ├── norec
│ │ │ └── train.json
│ │ ├── psd
│ │ │ ├── wsj.mrp
│ │ │ └── wsj.sdp
│ │ ├── ucca
│ │ │ ├── wsj.mrp
│ │ │ └── xml
│ │ │ │ ├── files.txt
│ │ │ │ ├── wsj_0001.1.xml
│ │ │ │ ├── wsj_0001.2.xml
│ │ │ │ ├── wsj_0002.1.xml
│ │ │ │ ├── wsj_0003.1.xml
│ │ │ │ ├── wsj_0003.10.xml
│ │ │ │ ├── wsj_0003.11.xml
│ │ │ │ ├── wsj_0003.12.xml
│ │ │ │ ├── wsj_0003.13.xml
│ │ │ │ ├── wsj_0003.14.xml
│ │ │ │ ├── wsj_0003.15.xml
│ │ │ │ ├── wsj_0003.16.xml
│ │ │ │ ├── wsj_0003.17.xml
│ │ │ │ ├── wsj_0003.18.xml
│ │ │ │ ├── wsj_0003.19.xml
│ │ │ │ ├── wsj_0003.2.xml
│ │ │ │ ├── wsj_0003.20.xml
│ │ │ │ ├── wsj_0003.21.xml
│ │ │ │ ├── wsj_0003.22.xml
│ │ │ │ ├── wsj_0003.23.xml
│ │ │ │ ├── wsj_0003.24.xml
│ │ │ │ ├── wsj_0003.25.xml
│ │ │ │ ├── wsj_0003.26.xml
│ │ │ │ ├── wsj_0003.27.xml
│ │ │ │ ├── wsj_0003.28.xml
│ │ │ │ ├── wsj_0003.29.xml
│ │ │ │ ├── wsj_0003.3.xml
│ │ │ │ ├── wsj_0003.30.xml
│ │ │ │ ├── wsj_0003.4.xml
│ │ │ │ ├── wsj_0003.5.xml
│ │ │ │ ├── wsj_0003.7.xml
│ │ │ │ ├── wsj_0003.8.xml
│ │ │ │ ├── wsj_0003.9.xml
│ │ │ │ ├── wsj_0004.1.xml
│ │ │ │ ├── wsj_0004.10.xml
│ │ │ │ ├── wsj_0004.11.xml
│ │ │ │ ├── wsj_0004.12.xml
│ │ │ │ ├── wsj_0004.14.xml
│ │ │ │ ├── wsj_0004.15.xml
│ │ │ │ ├── wsj_0004.16.xml
│ │ │ │ ├── wsj_0004.17.xml
│ │ │ │ ├── wsj_0004.2.xml
│ │ │ │ ├── wsj_0004.4.xml
│ │ │ │ ├── wsj_0004.5.xml
│ │ │ │ ├── wsj_0004.6.xml
│ │ │ │ ├── wsj_0004.7.xml
│ │ │ │ ├── wsj_0004.8.xml
│ │ │ │ ├── wsj_0004.9.xml
│ │ │ │ ├── wsj_0005.1.xml
│ │ │ │ ├── wsj_0005.2.xml
│ │ │ │ ├── wsj_0005.3.xml
│ │ │ │ ├── wsj_0007.1.xml
│ │ │ │ ├── wsj_0007.2.xml
│ │ │ │ ├── wsj_0007.3.xml
│ │ │ │ ├── wsj_0007.4.xml
│ │ │ │ ├── wsj_0008.1.xml
│ │ │ │ ├── wsj_0008.2.xml
│ │ │ │ ├── wsj_0008.3.xml
│ │ │ │ ├── wsj_0008.4.xml
│ │ │ │ ├── wsj_0008.5.xml
│ │ │ │ ├── wsj_0008.6.xml
│ │ │ │ ├── wsj_0009.1.xml
│ │ │ │ ├── wsj_0009.2.xml
│ │ │ │ ├── wsj_0009.3.xml
│ │ │ │ ├── wsj_0009.4.xml
│ │ │ │ ├── wsj_0010.1.xml
│ │ │ │ ├── wsj_0010.10.xml
│ │ │ │ ├── wsj_0010.11.xml
│ │ │ │ ├── wsj_0010.12.xml
│ │ │ │ ├── wsj_0010.13.xml
│ │ │ │ ├── wsj_0010.15.xml
│ │ │ │ ├── wsj_0010.16.xml
│ │ │ │ ├── wsj_0010.17.xml
│ │ │ │ ├── wsj_0010.18.xml
│ │ │ │ ├── wsj_0010.19.xml
│ │ │ │ ├── wsj_0010.2.xml
│ │ │ │ ├── wsj_0010.20.xml
│ │ │ │ ├── wsj_0010.3.xml
│ │ │ │ ├── wsj_0010.6.xml
│ │ │ │ ├── wsj_0010.7.xml
│ │ │ │ ├── wsj_0010.8.xml
│ │ │ │ ├── wsj_0011.1.xml
│ │ │ │ ├── wsj_0011.2.xml
│ │ │ │ ├── wsj_0011.4.xml
│ │ │ │ ├── wsj_0011.5.xml
│ │ │ │ ├── wsj_0011.6.xml
│ │ │ │ ├── wsj_0011.7.xml
│ │ │ │ ├── wsj_0011.8.xml
│ │ │ │ ├── wsj_0012.1.xml
│ │ │ │ ├── wsj_0012.2.xml
│ │ │ │ ├── wsj_0012.3.xml
│ │ │ │ ├── wsj_0012.4.xml
│ │ │ │ └── wsj_0012.5.xml
│ │ ├── wsj.ids
│ │ └── wsj.txt
│ ├── score
│ │ ├── .DS_Store
│ │ ├── Makefile
│ │ ├── amr
│ │ │ ├── 233.gold.amr
│ │ │ ├── 233.gold.dot
│ │ │ ├── 233.gold.pdf
│ │ │ ├── 233.system.amr
│ │ │ ├── 233.system.dot
│ │ │ ├── 233.system.pdf
│ │ │ ├── coli.gold.amr
│ │ │ ├── coli.system.amr
│ │ │ ├── first.gold.amr
│ │ │ ├── first.system.amr
│ │ │ ├── partial.gold.mrp
│ │ │ ├── partial.system.mrp
│ │ │ ├── test1.amr
│ │ │ ├── test1.mrp
│ │ │ ├── test2.amr
│ │ │ └── test2.mrp
│ │ ├── dm
│ │ │ ├── empty.gold.mrp
│ │ │ ├── empty.peking.mrp
│ │ │ └── peking.wsj.sdp
│ │ ├── eds
│ │ │ ├── lpps.102990.png
│ │ │ ├── lpps.peking.mrp
│ │ │ ├── wsj.pet.eds
│ │ │ └── wsj.pet.mrp
│ │ ├── lpps.mrp
│ │ ├── psd
│ │ │ ├── 107480.foxik.mrp
│ │ │ ├── 107480.gold.mrp
│ │ │ └── peking.brown.sdp
│ │ ├── revisions.txt
│ │ ├── test.slurm
│ │ └── ucca
│ │ │ ├── anchors.gold.mrp
│ │ │ ├── anchors.tupa.mrp
│ │ │ ├── ewt.gold.mrp
│ │ │ ├── ewt.tupa.mrp
│ │ │ ├── id.mrp
│ │ │ ├── koller.mrp
│ │ │ ├── small.gold.mrp
│ │ │ ├── small.gold.pdf
│ │ │ ├── small.tupa.mrp
│ │ │ ├── small.tupa.pdf
│ │ │ ├── test.gold.mrp
│ │ │ ├── test.gold.pdf
│ │ │ ├── test.tupa.mrp
│ │ │ └── test.tupa.pdf
│ ├── validate
│ │ ├── Makefile
│ │ └── eds
│ │ │ └── wsj.mrp
│ └── wsj.txt
├── graph.py
├── inspector.py
├── main.py
├── score
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ ├── __init__.cpython-38.pyc
│ │ ├── __init__.cpython-39.pyc
│ │ ├── core.cpython-37.pyc
│ │ ├── core.cpython-38.pyc
│ │ ├── core.cpython-39.pyc
│ │ ├── edm.cpython-37.pyc
│ │ ├── edm.cpython-38.pyc
│ │ ├── edm.cpython-39.pyc
│ │ ├── mces.cpython-37.pyc
│ │ ├── mces.cpython-38.pyc
│ │ ├── mces.cpython-39.pyc
│ │ ├── sdp.cpython-37.pyc
│ │ ├── sdp.cpython-38.pyc
│ │ ├── sdp.cpython-39.pyc
│ │ ├── smatch.cpython-37.pyc
│ │ ├── smatch.cpython-38.pyc
│ │ ├── smatch.cpython-39.pyc
│ │ ├── ucca.cpython-37.pyc
│ │ ├── ucca.cpython-38.pyc
│ │ └── ucca.cpython-39.pyc
│ ├── core.py
│ ├── edm.py
│ ├── mces.py
│ ├── rrhc.py
│ ├── sdp.py
│ ├── smatch.py
│ └── ucca.py
├── setup.py
├── smatch
│ ├── LICENSE.txt
│ ├── README.md
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ ├── __init__.cpython-38.pyc
│ │ ├── __init__.cpython-39.pyc
│ │ ├── amr.cpython-37.pyc
│ │ ├── amr.cpython-38.pyc
│ │ ├── amr.cpython-39.pyc
│ │ ├── smatch.cpython-37.pyc
│ │ ├── smatch.cpython-38.pyc
│ │ └── smatch.cpython-39.pyc
│ ├── amr.py
│ └── smatch.py
├── treewidth.py
├── ucca
│ ├── README.md
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ ├── __init__.cpython-38.pyc
│ │ ├── __init__.cpython-39.pyc
│ │ ├── convert.cpython-37.pyc
│ │ ├── convert.cpython-38.pyc
│ │ ├── convert.cpython-39.pyc
│ │ ├── core.cpython-37.pyc
│ │ ├── core.cpython-38.pyc
│ │ ├── core.cpython-39.pyc
│ │ ├── ioutil.cpython-37.pyc
│ │ ├── ioutil.cpython-38.pyc
│ │ ├── ioutil.cpython-39.pyc
│ │ ├── layer0.cpython-37.pyc
│ │ ├── layer0.cpython-38.pyc
│ │ ├── layer0.cpython-39.pyc
│ │ ├── layer1.cpython-37.pyc
│ │ ├── layer1.cpython-38.pyc
│ │ ├── layer1.cpython-39.pyc
│ │ ├── normalization.cpython-37.pyc
│ │ ├── normalization.cpython-38.pyc
│ │ ├── normalization.cpython-39.pyc
│ │ ├── textutil.cpython-37.pyc
│ │ ├── textutil.cpython-38.pyc
│ │ └── textutil.cpython-39.pyc
│ ├── convert.py
│ ├── core.py
│ ├── ioutil.py
│ ├── layer0.py
│ ├── layer1.py
│ ├── normalization.py
│ └── textutil.py
├── validate
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ ├── __init__.cpython-38.pyc
│ │ ├── __init__.cpython-39.pyc
│ │ ├── amr.cpython-37.pyc
│ │ ├── amr.cpython-38.pyc
│ │ ├── amr.cpython-39.pyc
│ │ ├── core.cpython-37.pyc
│ │ ├── core.cpython-38.pyc
│ │ ├── core.cpython-39.pyc
│ │ ├── eds.cpython-37.pyc
│ │ ├── eds.cpython-38.pyc
│ │ ├── eds.cpython-39.pyc
│ │ ├── sdp.cpython-37.pyc
│ │ ├── sdp.cpython-38.pyc
│ │ ├── sdp.cpython-39.pyc
│ │ ├── ucca.cpython-37.pyc
│ │ ├── ucca.cpython-38.pyc
│ │ ├── ucca.cpython-39.pyc
│ │ ├── utilities.cpython-37.pyc
│ │ ├── utilities.cpython-38.pyc
│ │ └── utilities.cpython-39.pyc
│ ├── amr.py
│ ├── core.py
│ ├── eds.py
│ ├── sdp.py
│ ├── ucca.py
│ └── utilities.py
└── version.py
├── perin
├── .DS_Store
├── config
│ ├── .DS_Store
│ ├── __init__.py
│ ├── edge_ace_e.yaml
│ ├── edge_ace_e_p.yaml
│ ├── edge_ace_e_pp.yaml
│ ├── edge_ace_e_ppp.yaml
│ └── params.py
├── convert.sh
├── data
│ ├── .DS_Store
│ ├── __init__.py
│ ├── batch.py
│ ├── dataset.py
│ ├── field
│ │ ├── .DS_Store
│ │ ├── __init__.py
│ │ ├── anchor_field.py
│ │ ├── anchored_label_field.py
│ │ ├── basic_field.py
│ │ ├── bert_field.py
│ │ ├── edge_field.py
│ │ ├── edge_label_field.py
│ │ ├── field.py
│ │ ├── label_field.py
│ │ ├── mini_torchtext
│ │ │ ├── __pycache__
│ │ │ │ ├── example.cpython-39.pyc
│ │ │ │ ├── field.cpython-39.pyc
│ │ │ │ ├── pipeline.cpython-39.pyc
│ │ │ │ ├── utils.cpython-39.pyc
│ │ │ │ └── vocab.cpython-39.pyc
│ │ │ ├── example.py
│ │ │ ├── field.py
│ │ │ ├── pipeline.py
│ │ │ ├── utils.py
│ │ │ └── vocab.py
│ │ └── nested_field.py
│ └── parser
│ │ ├── .DS_Store
│ │ ├── __init__.py
│ │ ├── from_mrp
│ │ ├── .DS_Store
│ │ ├── __init__.py
│ │ ├── abstract_parser.py
│ │ ├── evaluation_parser.py
│ │ ├── labeled_edge_parser.py
│ │ └── request_parser.py
│ │ ├── json_parser.py
│ │ └── to_mrp
│ │ ├── .DS_Store
│ │ ├── __init__.py
│ │ ├── abstract_parser.py
│ │ └── labeled_edge_parser.py
├── inference.py
├── model
│ ├── .DS_Store
│ ├── __init__.py
│ ├── head
│ │ ├── .DS_Store
│ │ ├── __init__.py
│ │ ├── abstract_head.py
│ │ └── labeled_edge_head.py
│ ├── model.py
│ └── module
│ │ ├── .DS_Store
│ │ ├── __init__.py
│ │ ├── anchor_classifier.py
│ │ ├── biaffine.py
│ │ ├── bilinear.py
│ │ ├── char_embedding.py
│ │ ├── edge_classifier.py
│ │ ├── encoder.py
│ │ └── transformer.py
├── run.sh
├── run_infer.sh
├── train.py
└── utility
│ ├── .DS_Store
│ ├── __init__.py
│ ├── autoclip.py
│ ├── cross_entropy.py
│ ├── hungarian_matching.py
│ ├── initialize.py
│ ├── loading_bar.py
│ ├── log.py
│ ├── parser_utils.py
│ ├── predict.py
│ ├── schedule
│ ├── __init__.py
│ ├── linear_lr.py
│ └── multi_scheduler.py
│ ├── subtokenize.py
│ └── utils.py
└── preprocess
├── .DS_Store
├── convert_dygie.py
├── convert_oneie.py
├── extract_ace_events.py
└── extractor.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/.DS_Store
--------------------------------------------------------------------------------
/convert_to_mrp.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #indata -> input json file to be converted
4 | #outdata -> output mrp file of converted graphs
5 | # $1 -> dataset: ace_en, ace_p_en, ace_pp_en, ace_ppp_en
6 |
7 |
8 | for split in train test dev; do
9 | indata=dataset/raw/"$1"/"$split".json
10 | outdata=dataset/labeled_edge_mrp/"$1"/"$split".mrp
11 |
12 | python mtool/main.py --strings --ids --read ace --write mrp "$indata" "$outdata"
13 | done;
--------------------------------------------------------------------------------
/dataset/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/.DS_Store
--------------------------------------------------------------------------------
/dataset/labeled_edge_mrp/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/labeled_edge_mrp/.DS_Store
--------------------------------------------------------------------------------
/dataset/labeled_edge_mrp/ace_en/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/labeled_edge_mrp/ace_en/.DS_Store
--------------------------------------------------------------------------------
/dataset/labeled_edge_mrp/ace_p_en/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/labeled_edge_mrp/ace_p_en/.DS_Store
--------------------------------------------------------------------------------
/dataset/labeled_edge_mrp/ace_pp_en/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/labeled_edge_mrp/ace_pp_en/.DS_Store
--------------------------------------------------------------------------------
/dataset/labeled_edge_mrp/ace_ppp_en/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/labeled_edge_mrp/ace_ppp_en/.DS_Store
--------------------------------------------------------------------------------
/dataset/raw/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/raw/.DS_Store
--------------------------------------------------------------------------------
/dataset/raw/ace_en/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/raw/ace_en/.DS_Store
--------------------------------------------------------------------------------
/dataset/raw/ace_p_en/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/raw/ace_p_en/.DS_Store
--------------------------------------------------------------------------------
/dataset/raw/ace_ppp_en/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/dataset/raw/ace_ppp_en/.DS_Store
--------------------------------------------------------------------------------
/dataset/splits/dev.txt:
--------------------------------------------------------------------------------
1 | CNN_CF_20030303_1900_02
2 | CNN_IP_20030329_1600_00_2
3 | CNN_IP_20030402_1600_00_1
4 | CNN_IP_20030405_1600_01_1
5 | CNN_IP_20030409_1600_02
6 | marcellapr_20050228_2219
7 | rec_games_chess_politics_20041216_1047
8 | rec_games_chess_politics_20041217_2111
9 | soc_org_nonprofit_20050218_1902
10 | FLOPPINGACES_20050217_1237_014
11 | AGGRESSIVEVOICEDAILY_20041116_1347
12 | FLOPPINGACES_20041117_2002_024
13 | FLOPPINGACES_20050203_1953_038
14 | TTRACY_20050223_1049
15 | CNNHL_ENG_20030304_142751_10
16 | CNNHL_ENG_20030424_123502_25
17 | CNNHL_ENG_20030513_220910_32
18 | CNN_ENG_20030304_173120_16
19 | CNN_ENG_20030328_150609_10
20 | CNN_ENG_20030424_070008_15
21 | CNN_ENG_20030512_170454_13
22 | CNN_ENG_20030620_085840_7
23 | AFP_ENG_20030304_0250
24 | AFP_ENG_20030305_0918
25 | AFP_ENG_20030311_0491
26 | AFP_ENG_20030314_0238
27 | AFP_ENG_20030319_0879
28 | AFP_ENG_20030320_0722
29 | AFP_ENG_20030327_0022
30 | AFP_ENG_20030327_0224
31 |
--------------------------------------------------------------------------------
/dataset/splits/test.txt:
--------------------------------------------------------------------------------
1 | AFP_ENG_20030401_0476
2 | AFP_ENG_20030413_0098
3 | AFP_ENG_20030415_0734
4 | AFP_ENG_20030417_0004
5 | AFP_ENG_20030417_0307
6 | AFP_ENG_20030417_0764
7 | AFP_ENG_20030418_0556
8 | AFP_ENG_20030425_0408
9 | AFP_ENG_20030427_0118
10 | AFP_ENG_20030428_0720
11 | AFP_ENG_20030429_0007
12 | AFP_ENG_20030430_0075
13 | AFP_ENG_20030502_0614
14 | AFP_ENG_20030504_0248
15 | AFP_ENG_20030508_0118
16 | AFP_ENG_20030508_0357
17 | AFP_ENG_20030509_0345
18 | AFP_ENG_20030514_0706
19 | AFP_ENG_20030519_0049
20 | AFP_ENG_20030519_0372
21 | AFP_ENG_20030522_0878
22 | AFP_ENG_20030527_0616
23 | AFP_ENG_20030528_0561
24 | AFP_ENG_20030530_0132
25 | AFP_ENG_20030601_0262
26 | AFP_ENG_20030607_0030
27 | AFP_ENG_20030616_0715
28 | AFP_ENG_20030617_0846
29 | AFP_ENG_20030625_0057
30 | AFP_ENG_20030630_0271
31 | APW_ENG_20030304_0555
32 | APW_ENG_20030306_0191
33 | APW_ENG_20030308_0314
34 | APW_ENG_20030310_0719
35 | APW_ENG_20030311_0775
36 | APW_ENG_20030318_0689
37 | APW_ENG_20030319_0545
38 | APW_ENG_20030322_0119
39 | APW_ENG_20030324_0768
40 | APW_ENG_20030325_0786
41 |
--------------------------------------------------------------------------------
/dataset/splits2/dev.doc.txt:
--------------------------------------------------------------------------------
1 | CNN_CF_20030303.1900.02
2 | CNN_IP_20030329.1600.00-2
3 | CNN_IP_20030402.1600.00-1
4 | CNN_IP_20030405.1600.01-1
5 | CNN_IP_20030409.1600.02
6 | marcellapr_20050228.2219
7 | rec.games.chess.politics_20041217.2111
8 | soc.org.nonprofit_20050218.1902
9 | FLOPPINGACES_20050217.1237.014
10 | AGGRESSIVEVOICEDAILY_20041116.1347
11 | FLOPPINGACES_20041117.2002.024
12 | FLOPPINGACES_20050203.1953.038
13 | TTRACY_20050223.1049
14 | CNNHL_ENG_20030304_142751.10
15 | CNNHL_ENG_20030424_123502.25
16 | CNNHL_ENG_20030513_220910.32
17 | CNN_ENG_20030304_173120.16
18 | CNN_ENG_20030328_150609.10
19 | CNN_ENG_20030424_070008.15
20 | CNN_ENG_20030512_170454.13
21 | CNN_ENG_20030620_085840.7
22 | AFP_ENG_20030305.0918
23 | AFP_ENG_20030311.0491
24 | AFP_ENG_20030314.0238
25 | AFP_ENG_20030319.0879
26 | AFP_ENG_20030320.0722
27 | AFP_ENG_20030327.0022
28 | AFP_ENG_20030327.0224
29 |
--------------------------------------------------------------------------------
/dataset/splits2/test.doc.txt:
--------------------------------------------------------------------------------
1 | AFP_ENG_20030401.0476
2 | AFP_ENG_20030413.0098
3 | AFP_ENG_20030415.0734
4 | AFP_ENG_20030417.0004
5 | AFP_ENG_20030417.0307
6 | AFP_ENG_20030417.0764
7 | AFP_ENG_20030418.0556
8 | AFP_ENG_20030425.0408
9 | AFP_ENG_20030427.0118
10 | AFP_ENG_20030428.0720
11 | AFP_ENG_20030429.0007
12 | AFP_ENG_20030430.0075
13 | AFP_ENG_20030502.0614
14 | AFP_ENG_20030504.0248
15 | AFP_ENG_20030508.0118
16 | AFP_ENG_20030508.0357
17 | AFP_ENG_20030509.0345
18 | AFP_ENG_20030514.0706
19 | AFP_ENG_20030519.0049
20 | AFP_ENG_20030519.0372
21 | AFP_ENG_20030522.0878
22 | AFP_ENG_20030527.0616
23 | AFP_ENG_20030528.0561
24 | AFP_ENG_20030530.0132
25 | AFP_ENG_20030601.0262
26 | AFP_ENG_20030607.0030
27 | AFP_ENG_20030616.0715
28 | AFP_ENG_20030617.0846
29 | AFP_ENG_20030625.0057
30 | AFP_ENG_20030630.0271
31 | APW_ENG_20030304.0555
32 | APW_ENG_20030306.0191
33 | APW_ENG_20030308.0314
34 | APW_ENG_20030310.0719
35 | APW_ENG_20030311.0775
36 | APW_ENG_20030318.0689
37 | APW_ENG_20030319.0545
38 | APW_ENG_20030322.0119
39 | APW_ENG_20030324.0768
40 | APW_ENG_20030325.0786
41 |
--------------------------------------------------------------------------------
/evaluation/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/evaluation/.DS_Store
--------------------------------------------------------------------------------
/evaluation/evaluate_single_dataset.py:
--------------------------------------------------------------------------------
1 | import json
2 | from evaluate import convert_event_to_tuple, trigger_f1, argument_f1, argument_span_f1
3 | import argparse
4 |
5 |
6 | def evaluate(gold_file, pred_file):
7 |
8 | with open(gold_file) as f:
9 | gold = json.load(f)
10 |
11 | with open(pred_file) as f:
12 | preds = json.load(f)
13 |
14 | tgold = dict([(s["sent_id"], convert_event_to_tuple(s)) for s in gold])
15 | tpreds = dict([(s["sent_id"], convert_event_to_tuple(s)) for s in preds])
16 |
17 | g = sorted(tgold.keys())
18 | p = sorted(tpreds.keys())
19 |
20 | if g != p:
21 | print("Missing some sentences!")
22 | return 0.0, 0.0, 0.0
23 |
24 | trigger_idf = trigger_f1(tgold, tpreds, classification=False)
25 | trigger_cls = trigger_f1(tgold, tpreds, classification=True)
26 |
27 |
28 |
29 | argument_idf = argument_f1(tgold, tpreds, classification=False)
30 | argument_cls = argument_f1(tgold, tpreds, classification=True)
31 |
32 | results = {
33 | 'trigger_identification': trigger_idf,
34 | 'trigger_classification': trigger_cls,
35 | 'argument_identification': argument_idf,
36 | 'argument_classification': argument_cls
37 | }
38 |
39 | return results
40 |
41 |
42 | def evaluate_span(gold_file, pred_file, overlap=0.75):
43 |
44 | with open(gold_file) as f:
45 | gold = json.load(f)
46 |
47 | with open(pred_file) as f:
48 | preds = json.load(f)
49 |
50 | tgold = dict([(s["sent_id"], convert_event_to_tuple(s)) for s in gold])
51 | tpreds = dict([(s["sent_id"], convert_event_to_tuple(s)) for s in preds])
52 |
53 | g = sorted(tgold.keys())
54 | p = sorted(tpreds.keys())
55 |
56 | if g != p:
57 | print("Missing some sentences!")
58 | return 0.0, 0.0, 0.0
59 |
60 | trigger_idf = trigger_f1(tgold, tpreds, classification=False)
61 | trigger_cls = trigger_f1(tgold, tpreds, classification=True)
62 |
63 |
64 |
65 | argument_idf = argument_span_f1(tgold, tpreds, classification=False, overlap=overlap)
66 | argument_cls = argument_span_f1(tgold, tpreds, classification=True, overlap=overlap)
67 |
68 | results = {
69 | 'trigger_identification': trigger_idf,
70 | 'trigger_classification': trigger_cls,
71 | 'argument_identification': argument_idf,
72 | 'argument_classification': argument_cls
73 | }
74 |
75 | return results
76 |
77 | def main():
78 | parser = argparse.ArgumentParser()
79 | parser.add_argument("gold_file", help="gold json file")
80 | parser.add_argument("pred_file", help="prediction json file")
81 | parser.add_argument("--span_overlap", help="argument overlap ratio", default=1, type=float)
82 |
83 | args = parser.parse_args()
84 |
85 | if args.span_overlap < 1:
86 | results = evaluate_span(args.gold_file, args.pred_file, overlap=args.span_overlap)
87 | print(f"Evaluate arguments with span overlap ratio of: {args.span_overlap}\n")
88 | else:
89 | results = evaluate(args.gold_file, args.pred_file)
90 |
91 | print(json.dumps(results, indent=2))
92 | print()
93 | print(list(results.values()))
94 |
95 |
96 | if __name__ == "__main__":
97 | main()
98 |
--------------------------------------------------------------------------------
/mtool/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/.DS_Store
--------------------------------------------------------------------------------
/mtool/.appveyor.yml:
--------------------------------------------------------------------------------
1 | environment:
2 | PYTHON: C:\Python37-x64
3 | matrix:
4 | - TEST: "score dm.edm.json"
5 | - TEST: "score eds.edm.json"
6 | - TEST: "score eds.smatch.json"
7 | - TEST: "score eds.mrp.json"
8 | - TEST: "score dm.sdp.json"
9 | - TEST: "score ucca.ucca.json"
10 | - TEST: "score ucca.smatch.json"
11 | - TEST: "score ucca.mrp.json"
12 | - TEST: "score test.smatch.json"
13 | - TEST: "score coli.smatch.json"
14 | - TEST: "score coli.mrp.json"
15 | - TEST: "score unit"
16 | - TEST: "sample all"
17 | - TEST: "validate all"
18 |
19 | init:
20 | - cmd: choco install make
21 | - set PATH=%PYTHON%;%PYTHON%\Scripts;%PATH%
22 | - cmd: copy %PYTHON%\python.exe %PYTHON%\python3.exe
23 |
24 | install:
25 | - pip install .
26 |
27 | build: off
28 |
29 | test_script:
30 | - make -C data/%TEST%
31 |
--------------------------------------------------------------------------------
/mtool/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: trusty
2 | sudo: false
3 | group: edge
4 | language: python
5 | python: 3.6
6 | install: pip install .
7 | env:
8 | - TEST="score dm.edm.json"
9 | - TEST="score eds.edm.json"
10 | - TEST="score eds.smatch.json"
11 | - TEST="score eds.mrp.json"
12 | - TEST="score dm.sdp.json"
13 | - TEST="score ucca.ucca.json"
14 | - TEST="score ucca.smatch.json"
15 | - TEST="score ucca.mrp.json"
16 | - TEST="score test.smatch.json"
17 | - TEST="score coli.smatch.json"
18 | - TEST="score coli.mrp.json"
19 | - TEST="score unit"
20 | - TEST="sample all"
21 | - TEST="validate all"
22 | script:
23 | - make -C data/$TEST
24 |
--------------------------------------------------------------------------------
/mtool/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: history regression
2 |
3 | history:
4 | git log --pretty=tformat:"%H %ae %ai %s" -- score/mces.py
5 |
6 | regression:
7 | [ -d etc ] || mkdir etc; \
8 | [ -d tmp ] || mkdir tmp; \
9 | for i in $$(awk '{print $$1}' data/score/revisions.txt); do \
10 | [ -d etc/$${i} ] || mkdir etc/$${i}; \
11 | ( cd tmp; \
12 | [ -d $${i} ] || git clone git@github.com:cfmrp/mtool.git $${i}; \
13 | cd $${i}; git checkout $${i}; \
14 | cd data/score; sbatch ../../../../data/score/test.slurm; ) \
15 | done
16 |
--------------------------------------------------------------------------------
/mtool/__pycache__/analyzer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/analyzer.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/analyzer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/analyzer.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/analyzer.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/analyzer.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/graph.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/graph.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/graph.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/graph.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/graph.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/graph.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/inspector.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/inspector.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/inspector.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/inspector.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/inspector.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/inspector.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/main.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/main.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/treewidth.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/treewidth.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/treewidth.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/treewidth.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/__pycache__/treewidth.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/__pycache__/treewidth.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__init__.py
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/ace.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/ace.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/amr.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/amr.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/amr.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/amr.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/amr.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/amr.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/conllu.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/conllu.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/conllu.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/conllu.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/conllu.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/conllu.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/eds.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/eds.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/eds.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/eds.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/eds.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/eds.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/mrp.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/mrp.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/mrp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/mrp.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/mrp.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/mrp.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/norec.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/norec.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/norec.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/norec.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/norec.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/norec.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/pmb.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/pmb.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/pmb.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/pmb.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/pmb.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/pmb.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/sdp.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/sdp.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/sdp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/sdp.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/sdp.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/sdp.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/treex.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/treex.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/treex.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/treex.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/treex.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/treex.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/ucca.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/ucca.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/ucca.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/ucca.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/codec/__pycache__/ucca.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/codec/__pycache__/ucca.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/codec/ace.py:
--------------------------------------------------------------------------------
1 | import json
2 | import sys
3 |
4 | from graph import Graph
5 |
6 | def read(fp, text=None):
7 | def anchor(node):
8 | anchors = list()
9 | for string in node[1]:
10 | string = string.split(":")
11 | anchors.append({"from": int(string[0]), "to": int(string[1])})
12 | return anchors
13 |
14 | for native in json.load(fp):
15 | map = dict()
16 |
17 | try:
18 | graph = Graph(native["sent_id"], flavor=1, framework="ace")
19 | graph.add_input(native["text"])
20 |
21 | top = graph.add_node(top=True)
22 |
23 | for event in native['events']:
24 |
25 | trigger = event["trigger"]
26 |
27 | key = tuple(event['trigger'][1])
28 | if key in map:
29 | trigger = map[key]
30 | else:
31 | trigger = graph.add_node(
32 | anchors=anchor(trigger)
33 | )
34 | map[key] = trigger
35 |
36 | graph.add_edge(top.id, trigger.id, event["event_type"])
37 |
38 |
39 | arguments = event["arguments"]
40 |
41 | if len(arguments):
42 | for argument in arguments:
43 | arg_role = argument[-1]
44 | key = tuple(argument[1])
45 | if key in map:
46 | argument = map[key]
47 | else:
48 | argument = graph.add_node(
49 | anchors=anchor(argument)
50 | )
51 | map[key] = argument
52 |
53 | graph.add_edge(trigger.id, argument.id, arg_role)
54 | yield graph, None
55 |
56 | except Exception as error:
57 | print(
58 | f"codec.ace.read(): ignoring {native}: {error}",
59 | file=sys.stderr
60 | )
61 |
62 |
63 | def get_text_span(node, text):
64 | anchored_text = [text[anchor['from']:anchor['to']] for anchor in node.anchors]
65 | anchors = [f"{anchor['from']}:{anchor['to']}" for anchor in node.anchors]
66 | return anchored_text, anchors
67 |
68 |
69 |
70 | def write(graph, input):
71 | try:
72 | return write_labeled_edge(graph, input)
73 | except Exception as error:
74 | print(f"Problem with decoding sentence {graph.id}")
75 | raise error
76 |
77 |
78 | def write_labeled_edge(graph, input):
79 |
80 | nodes = {node.id: node for node in graph.nodes}
81 |
82 | # create events
83 | events = {}
84 | for edge in graph.edges:
85 |
86 | if edge.src == 0:
87 | node = nodes[edge.tgt]
88 | events[node.id] = {
89 | 'event_type': edge.lab,
90 | 'trigger': [*get_text_span(node, input)],
91 | 'arguments': []
92 | }
93 |
94 | # add event arguments
95 | for edge in graph.edges:
96 | if edge.src != 0:
97 |
98 | node = nodes[edge.tgt]
99 | anchored_text, anchors = get_text_span(node, input)
100 |
101 | events[edge.src]['arguments'].append([anchored_text, anchors, edge.lab])
102 |
103 | sentence = {
104 | "sent_id": graph.id,
105 | "text": input,
106 | "events": list(events.values())
107 | }
108 | return sentence
109 |
110 |
--------------------------------------------------------------------------------
/mtool/codec/eds.py:
--------------------------------------------------------------------------------
1 | import os.path;
2 | import re;
3 |
4 | from graph import Graph;
5 |
6 | EDS_MATCHER = re.compile(r'(.+?)(?$");
10 |
11 | def read_instances(fp):
12 | top_handle, predicates = None, [];
13 | sentence_id = None;
14 | try:
15 | sentence_id = int(os.path.splitext(os.path.basename(fp.name))[0]);
16 | except:
17 | pass;
18 | first_curly = True
19 | for line in fp:
20 | line = line.strip()
21 | if len(line) == 0:
22 | pass
23 | elif line.startswith("#"):
24 | sentence_id = line[1:]
25 | first_curly = True
26 | elif line.startswith("{"):
27 | colon = line.index(":")
28 | assert colon >= 0
29 | top_handle = line[1:colon].strip()
30 | elif line.endswith("}"):
31 | assert len(line) == 1
32 | if first_curly:
33 | assert sentence_id is not None
34 | assert top_handle is not None
35 | assert len(predicates) > 0
36 | yield (sentence_id, top_handle, predicates)
37 | sentence_id, top_handle, predicates = None, None, []
38 | first_curly = False
39 | else:
40 | match = EDS_MATCHER.match(line)
41 | assert match is not None
42 | node_id, label, arguments = match.groups()
43 | arguments = [tuple(arg.split()) for arg in arguments.split(',') if len(arg) > 0]
44 | predicates.append((node_id, label.strip(), arguments))
45 |
46 | def instance2graph(instance, reify = False, text = None):
47 | sentence_id, top, predicates = instance;
48 | anchors = None;
49 | graph = Graph(sentence_id, flavor = 1, framework = "eds");
50 | if text: graph.add_input(text);
51 | handle2node = {};
52 | for handle, label, _ in predicates:
53 | assert handle not in handle2node
54 | properties = None;
55 | values = None;
56 | match = PROPERTIES_MATCHER.search(label);
57 | if match:
58 | label = label[:match.start()];
59 | fields = match.group(1).replace(",", "").split();
60 | properties, values = list(), list();
61 | for i, field in enumerate(fields[1:]):
62 | if i % 2 == 0: properties.append(field);
63 | else: values.append(field);
64 | carg = None;
65 | match = CARG_MATCHER.search(label);
66 | if match:
67 | label = label[:match.start()];
68 | if not reify:
69 | properties = ["CARG"] + properties;
70 | values = [match.group(1)] + values;
71 | else:
72 | carg = match.group(1);
73 | anchors = None;
74 | match = LNK_MATCHER.search(label);
75 | if match:
76 | label = label[:match.start()];
77 | anchors = [{"from": int(match.group(1)), "to": int(match.group(2))}];
78 | handle2node[handle] = \
79 | graph.add_node(label = label, properties = properties, values = values, anchors = anchors);
80 | if carg and reify:
81 | carg = graph.add_node(label = carg, anchors = anchors);
82 | source = handle2node[handle].id;
83 | target = carg.id;
84 | graph.add_edge(source, target, "CARG");
85 | handle2node[top].is_top = True
86 | for src_handle, _, arguments in predicates:
87 | src = handle2node[src_handle].id
88 | for relation, tgt_handle in arguments:
89 | tgt = handle2node[tgt_handle].id
90 | graph.add_edge(src, tgt, relation)
91 | return graph
92 |
93 | def read(fp, reify = False, text = None):
94 | for instance in read_instances(fp):
95 | yield instance2graph(instance, reify, text), None
96 |
--------------------------------------------------------------------------------
/mtool/codec/mrp.py:
--------------------------------------------------------------------------------
1 | import json;
2 | import operator;
3 | import os;
4 | import sys;
5 |
6 | from graph import Graph
7 |
8 | def read(fp, text = None, robust = False):
9 | input, i = None, 0;
10 | def compute(form):
11 | nonlocal i;
12 | m = None;
13 | j = input.find(form, i);
14 | if j >= i:
15 | i, m = j, len(form);
16 | else:
17 | base = form;
18 | k, l = len(input), 0;
19 | for old, new in {("‘", "`"), ("‘", "'"), ("’", "'"), ("`", "'"),
20 | ("“", "\""), ("”", "\""),
21 | ("–", "--"), ("–", "---"), ("—", "---"),
22 | ("…", "..."), ("…", ". . .")}:
23 | form = base.replace(old, new);
24 | j = input.find(form, i);
25 | if j >= i and j < k: k, l = j, len(form);
26 | if k < len(input): i, m = k, l;
27 | if m:
28 | match = {"from": i, "to": i + m};
29 | i += m;
30 | return match;
31 | else:
32 | raise Exception("failed to anchor |{}| in |{}|{}| ({})"
33 | "".format(form, input[:i], input[i:], i));
34 |
35 | def anchor(graph, old, new):
36 | nonlocal input, i;
37 | strings = dict();
38 | for node in graph.nodes:
39 | for j in range(len(node.anchors) if node.anchors else 0):
40 | start, end = node.anchors[j]["from"], node.anchors[j]["to"];
41 | strings[(start, end)] = old[start:end];
42 | input, i = new, 0;
43 | for key in sorted(strings.keys(), key = operator.itemgetter(0, 1)):
44 | strings[key] = compute(strings[key]);
45 | for node in graph.nodes:
46 | for j in range(len(node.anchors) if node.anchors else 0):
47 | node.anchors[j] \
48 | = strings[(node.anchors[j]["from"], node.anchors[j]["to"])];
49 |
50 | for j, line in enumerate(fp):
51 | try:
52 | graph = Graph.decode(json.loads(line.rstrip()), robust = robust);
53 | if text is not None:
54 | if graph.input in text:
55 | graph.id = text[graph.input];
56 | else:
57 | old = graph.input;
58 | graph.add_input(text);
59 | anchor(graph, old, graph.input);
60 | yield graph, None;
61 | except Exception as error:
62 | print("codec.mrp.read(): ignoring line {}: {}"
63 | "".format(j, error), file = sys.stderr);
64 |
--------------------------------------------------------------------------------
/mtool/codec/sdp.py:
--------------------------------------------------------------------------------
1 | from graph import Graph;
2 |
3 | def read_matrix(file):
4 | rows = [];
5 | for line in file:
6 | line = line.rstrip();
7 | if len(line) == 0:
8 | return rows;
9 | else:
10 | rows.append(line.split("\t"));
11 | return rows or None
12 |
13 | def read_matrices(file):
14 | file.readline().rstrip();
15 | matrix = read_matrix(file);
16 | while matrix:
17 | yield matrix;
18 | matrix = read_matrix(file);
19 |
20 | def matrix2graph(matrix, framework = None, text = None):
21 | graph = Graph(matrix[0][0][1:], flavor = 0, framework = framework);
22 | predicates = [];
23 | for id, row in enumerate(matrix[1:]):
24 | lemma, pos, frame, top = row[2], row[3], row[6], row[4] == '+';
25 | if lemma == "_": lemma = row[1];
26 | properties = {"pos": pos};
27 | if frame != "_": properties["frame"] = frame;
28 | node = graph.add_node(id, label = lemma,
29 | properties = list(properties.keys()),
30 | values = list(properties.values()),
31 | top = top, anchors = [row[1]] if text else None);
32 | if row[5] == '+':
33 | predicates.append(id);
34 | for tgt, row in enumerate(matrix[1:]):
35 | for pred, label in enumerate(row[7:]):
36 | if label != '_':
37 | src = predicates[pred];
38 | edge = graph.add_edge(src, tgt, label);
39 | if text:
40 | graph.add_input(text);
41 | graph.anchor();
42 | #
43 | # finally, purge singleton (isolated) nodes
44 | #
45 | graph.nodes = [node for node in graph.nodes if not node.is_singleton()];
46 | return graph;
47 |
48 | def read(fp, framework = None, text = None):
49 | for matrix in read_matrices(fp):
50 | yield matrix2graph(matrix, framework, text), None;
51 |
--------------------------------------------------------------------------------
/mtool/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/.DS_Store
--------------------------------------------------------------------------------
/mtool/data/sample/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: amr/pdf dm/pdf eds/pdf psd/pdf ucca/pdf \
2 | clean release all
3 |
4 | amr/wsj.mrp: wsj.ids ../wsj.txt amr/wsj.amr
5 | for i in $$(cat wsj.ids); do \
6 | ../../main.py --text ../wsj.txt --read amr \
7 | --id $$i --write mrp ./amr/wsj.amr; \
8 | done > $@;
9 |
10 | amr/pdf:
11 | [ ! -d amr/dot ] && mkdir amr/dot;
12 | [ ! -d amr/pdf ] && mkdir amr/pdf;
13 | for i in $$(cat wsj.ids); do \
14 | ../../main.py --text ../wsj.txt --read amr \
15 | --id $$i --write dot \
16 | ./amr/wsj.amr ./amr/dot/$$i.dot; \
17 | done
18 | rm $$(find ./amr/dot -size 0);
19 | for i in ./amr/dot/*.dot; do \
20 | j=$$(basename $$i .dot); \
21 | dot -Tpdf $$i > ./amr/pdf/$${j}.pdf; \
22 | done
23 |
24 | dm/wsj.mrp: wsj.ids ../wsj.txt dm/wsj.sdp
25 | for i in $$(cat wsj.ids); do \
26 | ../../main.py --text ../wsj.txt --read dm \
27 | --id $$i --write mrp ./dm/wsj.sdp; \
28 | done > $@;
29 |
30 | dm/pdf:
31 | [ ! -d dm/dot ] && mkdir dm/dot;
32 | [ ! -d dm/pdf ] && mkdir dm/pdf;
33 | for i in $$(cat wsj.ids); do \
34 | ../../main.py --text ../wsj.txt --read dm \
35 | --id $$i --write dot \
36 | ./dm/wsj.sdp ./dm/dot/$$i.dot; \
37 | done
38 | for i in ./dm/dot/*.dot; do \
39 | j=$$(basename $$i .dot); \
40 | dot -Tpdf $$i > ./dm/pdf/$${j}.pdf; \
41 | done
42 |
43 | eds/wsj.mrp: wsj.ids ../wsj.txt eds/wsj.eds
44 | for i in $$(cat wsj.ids); do \
45 | ../../main.py --text ../wsj.txt --read eds \
46 | --id $$i --write mrp ./eds/wsj.eds; \
47 | done > $@;
48 |
49 | eds/pdf:
50 | [ ! -d eds/dot ] && mkdir eds/dot;
51 | [ ! -d eds/pdf ] && mkdir eds/pdf;
52 | for i in $$(cat wsj.ids); do \
53 | ../../main.py --text ../wsj.txt --read eds \
54 | --id $$i --write dot \
55 | ./eds/wsj.eds ./eds/dot/$$i.dot; \
56 | done
57 | for i in ./eds/dot/*.dot; do \
58 | j=$$(basename $$i .dot); \
59 | dot -Tpdf $$i > ./eds/pdf/$${j}.pdf; \
60 | done
61 |
62 | psd/wsj.mrp: wsj.ids ../wsj.txt psd/wsj.sdp
63 | for i in $$(cat wsj.ids); do \
64 | ../../main.py --text ../wsj.txt --read psd \
65 | --id $$i --write mrp ./psd/wsj.sdp; \
66 | done > $@;
67 |
68 | psd/pdf:
69 | [ ! -d psd/dot ] && mkdir psd/dot;
70 | [ ! -d psd/pdf ] && mkdir psd/pdf;
71 | for i in $$(cat wsj.ids); do \
72 | ../../main.py --text ../wsj.txt --read dm \
73 | --id $$i --write dot \
74 | ./psd/wsj.sdp ./psd/dot/$$i.dot; \
75 | done
76 | for i in ./psd/dot/*.dot; do \
77 | j=$$(basename $$i .dot); \
78 | dot -Tpdf $$i > ./psd/pdf/$${j}.pdf; \
79 | done
80 |
81 | ucca/wsj.mrp: wsj.ids ../wsj.txt ucca/xml/files.txt ucca/xml/*.xml
82 | for i in $$(cat wsj.ids); do \
83 | ../../main.py --text ../wsj.txt --read ucca \
84 | --id $$i --write mrp ./ucca/xml/files.txt; \
85 | done > $@;
86 |
87 | ucca/pdf:
88 | [ ! -d ucca/dot ] && mkdir ucca/dot;
89 | [ ! -d ucca/pdf ] && mkdir ucca/pdf;
90 | for i in $$(cat wsj.ids); do \
91 | ../../main.py --text ../wsj.txt --read ucca \
92 | --id $$i --write dot --strings \
93 | ./ucca/xml/files.txt ./ucca/dot/$$i.dot; \
94 | done
95 | rm $$(find ./ucca/dot -size 0);
96 | for i in ./ucca/dot/*.dot; do \
97 | j=$$(basename $$i .dot); \
98 | dot -Tpdf $$i > ./ucca/pdf/$${j}.pdf; \
99 | done
100 |
101 | clean:
102 | rm */wsj.mrp */dot/*.dot */pdf/*pdf
103 |
104 | release:
105 | tar zpScvf ../public/sample.tgz --transform='s@^@mrp/2019/sample/@'\
106 | README.txt Makefile \
107 | amr/wsj.mrp dm/wsj.mrp eds/wsj.mrp psd/wsj.mrp ucca/wsj.mrp \
108 | amr/dot amr/pdf dm/dot dm/pdf eds/dot eds/pdf \
109 | psd/dot psd/pdf ucca/dot ucca/pdf
110 |
111 | all: amr/wsj.mrp dm/wsj.mrp eds/wsj.mrp psd/wsj.mrp ucca/wsj.mrp
112 |
113 |
--------------------------------------------------------------------------------
/mtool/data/sample/README.txt:
--------------------------------------------------------------------------------
1 |
2 | CoNLL 2019 Shared Task: Meaning Representation Parsing --- Sample Graphs
3 |
4 | Version 0.9; April 9, 2019
5 |
6 |
7 | Overview
8 | ========
9 |
10 | This directory contains a collection of 89 sample graphs in the five framworks
11 | represented in the task: AMR, DM, EDS, PSD, and UCCA. The sentences are drawn
12 | from Section 00 of (the Penn Treebank selection from) the venerable Wall Street
13 | Journal (WSJ) Corpus. We only include sentences for which all five graph banks
14 | provide annotations.
15 |
16 | The purpose of this sample data is twofold: (a) exemplify the uniform graph
17 | representation format (serialized in JSON) adopted for the task and (b) enable
18 | in-depth linguistic comparison across frameworks.
19 |
20 | For general information on the file format, please see:
21 |
22 | http://mrp.nlpl.eu/index.php?page=4#format
23 |
24 |
25 | Contents
26 | ========
27 |
28 | The main contents in this release are the JSON files:
29 |
30 | $ ls -l */*.mrp
31 | -rw-r--r--. 1 oe oe 145935 Apr 8 00:11 amr/wsj.mrp
32 | -rw-r--r--. 1 oe oe 290495 Apr 8 00:12 dm/wsj.mrp
33 | -rw-r--r--. 1 oe oe 334885 Apr 8 00:13 eds/wsj.mrp
34 | -rw-r--r--. 1 oe oe 225669 Apr 8 00:14 psd/wsj.mrp
35 | -rw-r--r--. 1 oe oe 254101 Apr 9 16:07 ucca/wsj.mrp
36 |
37 | Each file contains the 89 graphs in the intersection of all frameworks (87 in
38 | the case for UCCA, for the time being). These graph serializations are in what
39 | is called the JSON Lines format, effectively a stream of JSON objects with line
40 | breaks as the separator character between objects.
41 |
42 | To ease human inspection of these graphs, this package also provides graphical
43 | renderings of all graphs, as separate files (one per sentence) in the ‘dot/’
44 | and ‘pdf/’ sub-directories for each framework. These visualizations have been
45 | created using the MRP graph toolkit, which will be released by mid-May 2019.
46 |
47 |
48 | Known Limitations
49 | =================
50 |
51 | None, for the time being.
52 |
53 |
54 | Release History
55 | ===============
56 |
57 | [Version 0.9; April 9, 2018]
58 |
59 | + First release of sample graphs in five frameworks: AMR, DM, EDS, UCCA, and PSD.
60 |
61 |
62 | Contact
63 | =======
64 |
65 | For questions or comments, please do not hesitate to email the task organizers
66 | at: ‘mrp-organizers@nlpl.eu’.
67 |
68 | Omri Abend
69 | Jan Hajič
70 | Daniel Hershcovich
71 | Marco Kuhlmann
72 | Stephan Oepen
73 | Tim O'Gorman
74 | Nianwen Xue
75 |
--------------------------------------------------------------------------------
/mtool/data/sample/ucca/xml/files.txt:
--------------------------------------------------------------------------------
1 | wsj_0001.1.xml
2 | wsj_0001.2.xml
3 | wsj_0002.1.xml
4 | wsj_0003.1.xml
5 | wsj_0003.2.xml
6 | wsj_0003.3.xml
7 | wsj_0003.4.xml
8 | wsj_0003.5.xml
9 | wsj_0003.7.xml
10 | wsj_0003.8.xml
11 | wsj_0003.9.xml
12 | wsj_0003.10.xml
13 | wsj_0003.11.xml
14 | wsj_0003.12.xml
15 | wsj_0003.13.xml
16 | wsj_0003.14.xml
17 | wsj_0003.15.xml
18 | wsj_0003.16.xml
19 | wsj_0003.17.xml
20 | wsj_0003.18.xml
21 | wsj_0003.19.xml
22 | wsj_0003.20.xml
23 | wsj_0003.21.xml
24 | wsj_0003.22.xml
25 | wsj_0003.23.xml
26 | wsj_0003.24.xml
27 | wsj_0003.25.xml
28 | wsj_0003.26.xml
29 | wsj_0003.27.xml
30 | wsj_0003.28.xml
31 | wsj_0003.29.xml
32 | wsj_0003.30.xml
33 | wsj_0004.1.xml
34 | wsj_0004.2.xml
35 | wsj_0004.4.xml
36 | wsj_0004.5.xml
37 | wsj_0004.6.xml
38 | wsj_0004.7.xml
39 | wsj_0004.8.xml
40 | wsj_0004.9.xml
41 | wsj_0004.10.xml
42 | wsj_0004.11.xml
43 | wsj_0004.12.xml
44 | wsj_0004.14.xml
45 | wsj_0004.15.xml
46 | wsj_0004.16.xml
47 | wsj_0004.17.xml
48 | wsj_0005.1.xml
49 | wsj_0005.2.xml
50 | wsj_0005.3.xml
51 | wsj_0007.1.xml
52 | wsj_0007.2.xml
53 | wsj_0007.3.xml
54 | wsj_0007.4.xml
55 | wsj_0008.1.xml
56 | wsj_0008.2.xml
57 | wsj_0008.3.xml
58 | wsj_0008.4.xml
59 | wsj_0008.5.xml
60 | wsj_0008.6.xml
61 | wsj_0009.1.xml
62 | wsj_0009.2.xml
63 | wsj_0009.3.xml
64 | wsj_0009.4.xml
65 | wsj_0010.1.xml
66 | wsj_0010.2.xml
67 | wsj_0010.3.xml
68 | wsj_0010.6.xml
69 | wsj_0010.7.xml
70 | wsj_0010.8.xml
71 | wsj_0010.10.xml
72 | wsj_0010.11.xml
73 | wsj_0010.12.xml
74 | wsj_0010.13.xml
75 | wsj_0010.15.xml
76 | wsj_0010.16.xml
77 | wsj_0010.17.xml
78 | wsj_0010.18.xml
79 | wsj_0010.19.xml
80 | wsj_0010.20.xml
81 | wsj_0011.1.xml
82 | wsj_0011.2.xml
83 | wsj_0011.4.xml
84 | wsj_0011.5.xml
85 | wsj_0011.6.xml
86 | wsj_0011.7.xml
87 | wsj_0011.8.xml
88 | wsj_0012.1.xml
89 | wsj_0012.2.xml
90 | wsj_0012.3.xml
91 | wsj_0012.4.xml
92 | wsj_0012.5.xml
--------------------------------------------------------------------------------
/mtool/data/sample/ucca/xml/wsj_0010.2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
--------------------------------------------------------------------------------
/mtool/data/sample/ucca/xml/wsj_0010.8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
--------------------------------------------------------------------------------
/mtool/data/sample/wsj.ids:
--------------------------------------------------------------------------------
1 | 20001001
2 | 20001002
3 | 20003001
4 | 20003002
5 | 20003003
6 | 20003005
7 | 20003007
8 | 20003008
9 | 20003009
10 | 20003010
11 | 20003011
12 | 20003012
13 | 20003013
14 | 20003014
15 | 20003015
16 | 20003016
17 | 20003017
18 | 20003018
19 | 20003019
20 | 20003020
21 | 20003021
22 | 20003022
23 | 20003023
24 | 20003024
25 | 20003025
26 | 20003026
27 | 20003027
28 | 20003028
29 | 20003029
30 | 20003030
31 | 20004001
32 | 20004002
33 | 20004004
34 | 20004005
35 | 20004006
36 | 20004007
37 | 20004008
38 | 20004009
39 | 20004010
40 | 20004011
41 | 20004012
42 | 20004014
43 | 20004015
44 | 20004016
45 | 20004017
46 | 20005001
47 | 20005002
48 | 20005003
49 | 20006001
50 | 20006002
51 | 20007002
52 | 20007003
53 | 20007004
54 | 20008001
55 | 20008002
56 | 20008003
57 | 20008004
58 | 20008005
59 | 20008006
60 | 20009001
61 | 20009002
62 | 20009003
63 | 20009004
64 | 20010001
65 | 20010002
66 | 20010003
67 | 20010006
68 | 20010007
69 | 20010008
70 | 20010010
71 | 20010011
72 | 20010012
73 | 20010013
74 | 20010015
75 | 20010016
76 | 20010017
77 | 20010018
78 | 20010019
79 | 20010020
80 | 20011001
81 | 20011002
82 | 20011004
83 | 20011005
84 | 20011006
85 | 20011007
86 | 20011008
87 | 20012002
88 | 20012004
89 | 20012005
90 |
--------------------------------------------------------------------------------
/mtool/data/score/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/.DS_Store
--------------------------------------------------------------------------------
/mtool/data/score/amr/233.gold.amr:
--------------------------------------------------------------------------------
1 | (j / join-up-02 :ARG0 (c / country :name (n / name :op1 "U.S.") :mod (p2 / person :ARG0-of (o / observe-01))) :ARG1 (p / project))
2 |
3 |
--------------------------------------------------------------------------------
/mtool/data/score/amr/233.gold.dot:
--------------------------------------------------------------------------------
1 | digraph "233" {
2 | top [ style=invis ];
3 | top -> 0;
4 | 0 [ label=<
> ];
5 | 1 [ label=<> ];
6 | 2 [ label=<> ];
7 | 3 [ label=<> ];
8 | 4 [ label=<> ];
9 | 5 [ label=<> ];
10 | 0 -> 1 [ label="ARG0" ];
11 | 3 -> 4 [ label="(ARG0)-of" ];
12 | 1 -> 2 [ label="name" ];
13 | 1 -> 3 [ label="mod (domain)" ];
14 | 0 -> 5 [ label="ARG1" ];
15 | }
16 |
17 |
--------------------------------------------------------------------------------
/mtool/data/score/amr/233.gold.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/amr/233.gold.pdf
--------------------------------------------------------------------------------
/mtool/data/score/amr/233.system.amr:
--------------------------------------------------------------------------------
1 | (f / join-up-02 :ARG1 (e / project) :prep-as (u_1104 / observe-01 :ARG0 (c4 / country :name (n2 / name :op1 "U.S.") :ARG0-of f)))
2 |
3 |
--------------------------------------------------------------------------------
/mtool/data/score/amr/233.system.dot:
--------------------------------------------------------------------------------
1 | digraph "233" {
2 | top [ style=invis ];
3 | top -> 0;
4 | 0 [ label=<> ];
5 | 1 [ label=<> ];
6 | 2 [ label=<> ];
7 | 3 [ label=<> ];
8 | 4 [ label=<> ];
9 | 0 -> 1 [ label="ARG1" ];
10 | 3 -> 4 [ label="name" ];
11 | 0 -> 2 [ label="prep-as" ];
12 | 2 -> 3 [ label="ARG0" ];
13 | 3 -> 0 [ label="(ARG0)-of" ];
14 | }
15 |
16 |
--------------------------------------------------------------------------------
/mtool/data/score/amr/233.system.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/amr/233.system.pdf
--------------------------------------------------------------------------------
/mtool/data/score/amr/first.gold.amr:
--------------------------------------------------------------------------------
1 | (c / claim-01 :ARG0 (p / partisan :poss (p2 / person :name (n / name :op1 "Ronald" :op2 "Reagan"))) :ARG1 (w / win-01 :ARG0 p2 :ARG2 (w2 / war :name (n2 / name :op1 "Cold" :op2 "War"))) :time (c2 / collapse-01 :ARG1 (c3 / country :name (n3 / name :op1 "Soviet" :op2 "Union")) :time (d / date-entity :year 1991)))
2 |
3 |
--------------------------------------------------------------------------------
/mtool/data/score/amr/first.system.amr:
--------------------------------------------------------------------------------
1 | (f / claim-01 :ARG0 (u_2 / person :ARG0-of (o / partisan :ARG1 (p / person :name (n / name :op1 (explicitanon3 / Ronald :year-of (d / date-entity :time-of (s3 / collapse-01 :ARG1 (c4 / country :name (n2 / name :op1 "Soviet" :op2 "Union")) :time-of f))) :op2 "Reagan")) :ARG0-of (a2 / win-01 :ARG2 (e / war-01 :mod (u_1 / cold)) :ARG1-of f)))
2 |
3 |
--------------------------------------------------------------------------------
/mtool/data/score/amr/partial.gold.mrp:
--------------------------------------------------------------------------------
1 | {"edges":[{"label":"ARG1","source":1,"target":2},{"label":"op2","source":0,"target":3},{"label":"ARG1","source":3,"target":4},{"label":"op1","source":0,"target":1}],"flavor":2,"framework":"amr","id":"bolt-eng-DF-170-181103-8882762_0111.33","input":"Lowering wages/Breaking Unions.","nodes":[{"id":0,"label":"slash"},{"id":1,"label":"lower-05"},{"id":2,"label":"wage"},{"id":3,"label":"break-01"},{"id":4,"label":"union"}],"time":"2019-04-10 (20:10)","tops":[0],"version":"0.9"}
2 |
--------------------------------------------------------------------------------
/mtool/data/score/amr/partial.system.mrp:
--------------------------------------------------------------------------------
1 | {"edges":[{"label":"ARG1","source":1,"target":2},{"label":"op2","source":0,"target":3},{"label":"ARG1","source":3,"target":4},{"label":"op1","source":0,"target":1}],"flavor":2,"framework":"amr","id":"bolt-eng-DF-170-181103-8882762_0111.33","input":"Lowering wages/Breaking Unions.","nodes":[{"id":0,"label":"slash"},{"id":1,"label":"lower-05"},{"id":2,"label":"wage"},{"id":3,"label":"break-01", "anchors" : []},{"id":4,"label":"union"}],"time":"2019-04-10 (20:10)","tops":[0],"version":"0.9"}
2 |
--------------------------------------------------------------------------------
/mtool/data/score/amr/test1.amr:
--------------------------------------------------------------------------------
1 | # ::id isi_0001.1 ::date 2012-05-14T21:45:29
2 | # ::snt The boy wants the girl to believe him.
3 | (w / want-01
4 | :ARG0 (b / boy)
5 | :ARG1 (b2 / believe-01
6 | :ARG0 (g / girl)
7 | :ARG1 b))
8 |
9 | # ::id isi_0001.25 ::date 2012-05-14T21:59:17
10 | # ::snt The boy is a hard worker.
11 | (p / person
12 | :domain (b / boy)
13 | :ARG0-of (w / work-01
14 | :manner (h / hard)))
15 |
16 | # ::id isi_0002.209 ::date 2013-05-16T17:19:07
17 | # ::snt The poet William Shakespeare was born in Stratford-upon-Avon.
18 | (b / bear-02
19 | :ARG1 (p / poet :name (n / name :op1 "William" :op2 "Shakespeare"))
20 | :location (c / city :name (n2 / name :op1 "Stratford-upon-Avon")))
21 |
22 |
--------------------------------------------------------------------------------
/mtool/data/score/amr/test1.mrp:
--------------------------------------------------------------------------------
1 | {"id": "isi_0001.1", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:13)", "input": "The boy wants the girl to believe him.", "tops": [0], "nodes": [{"id": 0, "label": "want-01"}, {"id": 1, "label": "boy"}, {"id": 2, "label": "believe-01"}, {"id": 3, "label": "girl"}], "edges": [{"source": 2, "target": 3, "label": "ARG0"}, {"source": 2, "target": 1, "label": "ARG1"}, {"source": 0, "target": 1, "label": "ARG0"}, {"source": 0, "target": 2, "label": "ARG1"}]}
2 | {"id": "isi_0001.25", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:13)", "input": "The boy is a hard worker.", "tops": [0], "nodes": [{"id": 0, "label": "person"}, {"id": 1, "label": "boy"}, {"id": 2, "label": "work-01"}, {"id": 3, "label": "hard"}], "edges": [{"source": 0, "target": 1, "label": "domain"}, {"source": 2, "target": 3, "label": "manner"}, {"source": 0, "target": 2, "label": "ARG0-of", "normal": "ARG0"}]}
3 | {"id": "isi_0002.209", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:13)", "input": "The poet William Shakespeare was born in Stratford-upon-Avon.", "tops": [0], "nodes": [{"id": 0, "label": "bear-02"}, {"id": 1, "label": "poet"}, {"id": 2, "label": "name", "properties": ["op1", "op2"], "values": ["William", "Shakespeare"]}, {"id": 3, "label": "city"}, {"id": 4, "label": "name", "properties": ["op1"], "values": ["Stratford-upon-Avon"]}], "edges": [{"source": 0, "target": 3, "label": "location"}, {"source": 3, "target": 4, "label": "name"}, {"source": 1, "target": 2, "label": "name"}, {"source": 0, "target": 1, "label": "ARG1"}]}
4 |
--------------------------------------------------------------------------------
/mtool/data/score/amr/test2.amr:
--------------------------------------------------------------------------------
1 | # ::id isi_0001.1 ::date 2012-05-14T21:45:29
2 | # ::snt The boy wants the girl to believe him.
3 | (w / want-01
4 | :ARG0 (b / boy)
5 | :ARG1 (b2 / believe-01
6 | :ARG0 (g / girl)
7 | :ARG1 (h / he)))
8 |
9 | # ::id isi_0001.25 ::date 2012-05-14T21:59:17
10 | # ::snt The boy is a hard worker.
11 | (w / worker
12 | :mod (h / hard)
13 | :domain (b / boy))
14 |
15 | # ::id isi_0002.209 ::date 2013-05-16T17:19:07
16 | # ::snt The poet William Shakespeare was born in Stratford-upon-Avon.
17 | (b / bear-02
18 | :ARG1 (p / poet :name (n / name :op1 william :op2 "shakespeare"))
19 | :location (c / city :name (n2 / name :op1 "Stratford-upon-Avon")))
20 |
21 |
--------------------------------------------------------------------------------
/mtool/data/score/amr/test2.mrp:
--------------------------------------------------------------------------------
1 | {"id": "isi_0001.1", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:16)", "input": "The boy wants the girl to believe him.", "tops": [0], "nodes": [{"id": 0, "label": "want-01"}, {"id": 1, "label": "boy"}, {"id": 2, "label": "believe-01"}, {"id": 3, "label": "girl"}, {"id": 4, "label": "he"}], "edges": [{"source": 0, "target": 1, "label": "ARG0"}, {"source": 2, "target": 3, "label": "ARG0"}, {"source": 2, "target": 4, "label": "ARG1"}, {"source": 0, "target": 2, "label": "ARG1"}]}
2 | {"id": "isi_0001.25", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:16)", "input": "The boy is a hard worker.", "tops": [0], "nodes": [{"id": 0, "label": "worker"}, {"id": 1, "label": "hard"}, {"id": 2, "label": "boy"}], "edges": [{"source": 0, "target": 2, "label": "domain"}, {"source": 0, "target": 1, "label": "mod", "normal": "domain"}]}
3 | {"id": "isi_0002.209", "flavor": 2, "framework": "amr", "version": 0.9, "time": "2019-06-03 (22:16)", "input": "The poet William Shakespeare was born in Stratford-upon-Avon.", "tops": [0], "nodes": [{"id": 0, "label": "bear-02"}, {"id": 1, "label": "poet"}, {"id": 2, "label": "name", "properties": ["op1", "op2"], "values": ["william", "shakespeare"]}, {"id": 3, "label": "city"}, {"id": 4, "label": "name", "properties": ["op1"], "values": ["Stratford-upon-Avon"]}], "edges": [{"source": 3, "target": 4, "label": "name"}, {"source": 1, "target": 2, "label": "name"}, {"source": 0, "target": 3, "label": "location"}, {"source": 0, "target": 1, "label": "ARG1"}]}
4 |
--------------------------------------------------------------------------------
/mtool/data/score/dm/empty.gold.mrp:
--------------------------------------------------------------------------------
1 | {"id": "22100001", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-06-23", "input": "Consumers may want to move their telephones a little closer to the TV set.", "nodes": [], "edges": []}
2 | {"id": "22100002", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-06-23", "input": "Couch-potato jocks watching ABC's \"Monday Night Football\" can now vote during halftime for the greatest play in 20 years from among four or five filmed replays.", "tops": [], "nodes": null, "edges": null}
3 | {"id": "22100003", "flavor": 0, "framework": "dm", "version": 1.0, "time": "2019-06-23", "input": "Two weeks ago, viewers of several NBC daytime consumer segments started calling a 900 number for advice on various life-style issues.", "tops": [11], "nodes": [{"id": 0, "label": "two", "properties": ["pos", "frame"], "values": ["CD", "card:i-i-c"], "anchors": [{"from": 0, "to": 3}]}, {"id": 1, "label": "week", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 4, "to": 9}]}, {"id": 2, "label": "ago", "properties": ["pos", "frame"], "values": ["RB", "p:e-i-u"], "anchors": [{"from": 10, "to": 13}]}, {"id": 4, "label": "viewer", "properties": ["pos", "frame"], "values": ["NNS", "n_of:x-i"], "anchors": [{"from": 15, "to": 22}]}, {"id": 6, "label": "several", "properties": ["pos", "frame"], "values": ["JJ", "a:e-p"], "anchors": [{"from": 26, "to": 33}]}, {"id": 7, "label": "NBC", "properties": ["pos", "frame"], "values": ["NNP", "named:x-c"], "anchors": [{"from": 34, "to": 37}]}, {"id": 8, "label": "daytime", "properties": ["pos", "frame"], "values": ["JJ", "n:x"], "anchors": [{"from": 38, "to": 45}]}, {"id": 9, "label": "consumer", "properties": ["pos", "frame"], "values": ["NN", "n_of:x-i"], "anchors": [{"from": 46, "to": 54}]}, {"id": 10, "label": "segment", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 55, "to": 63}]}, {"id": 11, "label": "start", "properties": ["pos", "frame"], "values": ["VBD", "v:e-h"], "anchors": [{"from": 64, "to": 71}]}, {"id": 12, "label": "call", "properties": ["pos", "frame"], "values": ["VBG", "v:e-i-p"], "anchors": [{"from": 72, "to": 79}]}, {"id": 13, "label": "a", "properties": ["pos", "frame"], "values": ["DT", "q:i-h-h"], "anchors": [{"from": 80, "to": 81}]}, {"id": 14, "label": "900", "properties": ["pos", "frame"], "values": ["CD", "card:i-i-c"], "anchors": [{"from": 82, "to": 85}]}, {"id": 15, "label": "number", "properties": ["pos", "frame"], "values": ["NN", "n_of:x"], "anchors": [{"from": 86, "to": 92}]}, {"id": 16, "label": "for", "properties": ["pos", "frame"], "values": ["IN", "p:e-u-i"], "anchors": [{"from": 93, "to": 96}]}, {"id": 17, "label": "advice", "properties": ["pos", "frame"], "values": ["NN", "n:x"], "anchors": [{"from": 97, "to": 103}]}, {"id": 18, "label": "on", "properties": ["pos", "frame"], "values": ["IN", "p:e-u-i"], "anchors": [{"from": 104, "to": 106}]}, {"id": 19, "label": "various", "properties": ["pos", "frame"], "values": ["JJ", "a:e-p"], "anchors": [{"from": 107, "to": 114}]}, {"id": 20, "label": "style", "properties": ["pos", "frame"], "values": ["NN", "n_of:x"], "anchors": [{"from": 115, "to": 125}]}, {"id": 21, "label": "issue", "properties": ["pos", "frame"], "values": ["NNS", "n:x"], "anchors": [{"from": 126, "to": 132}]}], "edges": [{"source": 2, "target": 11, "label": "ARG1"}, {"source": 8, "target": 10, "label": "compound"}, {"source": 16, "target": 12, "label": "ARG1"}, {"source": 13, "target": 15, "label": "BV"}, {"source": 0, "target": 1, "label": "ARG1"}, {"source": 9, "target": 10, "label": "compound"}, {"source": 14, "target": 15, "label": "ARG1"}, {"source": 12, "target": 4, "label": "ARG1"}, {"source": 18, "target": 17, "label": "ARG1"}, {"source": 2, "target": 1, "label": "ARG2"}, {"source": 12, "target": 15, "label": "ARG2"}, {"source": 19, "target": 21, "label": "ARG1"}, {"source": 11, "target": 12, "label": "ARG1"}, {"source": 6, "target": 10, "label": "ARG1"}, {"source": 20, "target": 21, "label": "compound"}, {"source": 4, "target": 10, "label": "ARG1"}, {"source": 16, "target": 17, "label": "ARG2"}, {"source": 7, "target": 10, "label": "compound"}, {"source": 18, "target": 21, "label": "ARG2"}]}
4 |
--------------------------------------------------------------------------------
/mtool/data/score/eds/lpps.102990.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/eds/lpps.102990.png
--------------------------------------------------------------------------------
/mtool/data/score/psd/107480.foxik.mrp:
--------------------------------------------------------------------------------
1 | {"id": "107480", "flavor": 0, "framework": "psd", "version": 1.0, "time": "2019-08-01 (16:21)", "input": "I own three volcanoes, which I clean out every week (for I also clean out the one that is extinct; one never knows).", "tops": [1], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 1}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 1, "anchors": [{"from": 2, "to": 5}], "label": "own", "properties": ["pos", "frame"], "values": ["VBP", "ev-w2176f1"]}, {"id": 2, "anchors": [{"from": 6, "to": 11}], "label": "three", "properties": ["pos"], "values": ["CD"]}, {"id": 3, "anchors": [{"from": 12, "to": 21}], "label": "volcanoe", "properties": ["pos"], "values": ["NNS"]}, {"id": 4, "anchors": [{"from": 23, "to": 28}], "label": "which", "properties": ["pos"], "values": ["WDT"]}, {"id": 5, "anchors": [{"from": 29, "to": 30}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 6, "anchors": [{"from": 31, "to": 36}], "label": "clean_out", "properties": ["pos", "frame"], "values": ["VBP", "ev-w544f1"]}, {"id": 7, "anchors": [{"from": 41, "to": 46}], "label": "every", "properties": ["pos"], "values": ["DT"]}, {"id": 8, "anchors": [{"from": 47, "to": 51}], "label": "week", "properties": ["pos"], "values": ["NN"]}, {"id": 9, "anchors": [{"from": 57, "to": 58}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 10, "anchors": [{"from": 59, "to": 63}], "label": "also", "properties": ["pos"], "values": ["RB"]}, {"id": 11, "anchors": [{"from": 64, "to": 69}], "label": "clean_out", "properties": ["pos", "frame"], "values": ["VBP", "ev-w544f1"]}, {"id": 12, "anchors": [{"from": 78, "to": 81}], "label": "one", "properties": ["pos"], "values": ["NN"]}, {"id": 13, "anchors": [{"from": 82, "to": 86}], "label": "that", "properties": ["pos"], "values": ["WDT"]}, {"id": 14, "anchors": [{"from": 90, "to": 97}], "label": "extinct", "properties": ["pos"], "values": ["JJ"]}, {"id": 15, "anchors": [{"from": 97, "to": 98}], "label": "#Semicolon", "properties": ["pos"], "values": [":"]}, {"id": 16, "anchors": [{"from": 99, "to": 102}], "label": "#PersPron", "properties": ["pos"], "values": ["PRP"]}, {"id": 17, "anchors": [{"from": 103, "to": 108}], "label": "never", "properties": ["pos"], "values": ["RB"]}, {"id": 18, "anchors": [{"from": 109, "to": 114}], "label": "know", "properties": ["pos", "frame"], "values": ["VBZ", "ev-w1810f1"]}], "edges": [{"source": 1, "target": 0, "label": "ACT-arg"}, {"source": 1, "target": 3, "label": "PAT-arg"}, {"source": 1, "target": 11, "label": "CAUS"}, {"source": 3, "target": 2, "label": "RSTR"}, {"source": 3, "target": 6, "label": "DESCR"}, {"source": 6, "target": 4, "label": "PAT-arg"}, {"source": 6, "target": 5, "label": "ACT-arg"}, {"source": 6, "target": 8, "label": "THO"}, {"source": 8, "target": 7, "label": "RSTR"}, {"source": 11, "target": 4, "label": "PAT-arg"}, {"source": 11, "target": 5, "label": "ACT-arg"}, {"source": 11, "target": 9, "label": "ACT-arg"}, {"source": 11, "target": 10, "label": "RHEM"}, {"source": 11, "target": 12, "label": "PAT-arg"}, {"source": 15, "target": 11, "label": "CSQ.member"}, {"source": 15, "target": 18, "label": "CONJ.member"}, {"source": 18, "target": 16, "label": "ACT-arg"}, {"source": 18, "target": 17, "label": "TWHEN"}, {"source": 1, "target": 0, "label": "ACT-arg"}, {"source": 3, "target": 2, "label": "RSTR"}, {"source": 1, "target": 3, "label": "PAT-arg"}, {"source": 6, "target": 4, "label": "PAT-arg"}, {"source": 11, "target": 4, "label": "PAT-arg"}, {"source": 6, "target": 5, "label": "ACT-arg"}, {"source": 11, "target": 5, "label": "ACT-arg"}, {"source": 3, "target": 6, "label": "DESCR"}, {"source": 8, "target": 7, "label": "RSTR"}, {"source": 6, "target": 8, "label": "THO"}, {"source": 11, "target": 9, "label": "ACT-arg"}, {"source": 11, "target": 10, "label": "RHEM"}, {"source": 1, "target": 11, "label": "CAUS"}, {"source": 15, "target": 11, "label": "CSQ.member"}, {"source": 11, "target": 12, "label": "PAT-arg"}, {"source": 18, "target": 16, "label": "ACT-arg"}, {"source": 18, "target": 17, "label": "TWHEN"}, {"source": 15, "target": 18, "label": "CONJ.member"}]}
2 |
--------------------------------------------------------------------------------
/mtool/data/score/psd/107480.gold.mrp:
--------------------------------------------------------------------------------
1 | {"id": "107480", "flavor": 0, "framework": "psd", "version": 1.0, "time": "2019-06-23", "input": "I own three volcanoes, which I clean out every week (for I also clean out the one that is extinct; one never knows).", "tops": [1], "nodes": [{"id": 0, "label": "#PersPron", "properties": ["pos"], "values": ["PRP"], "anchors": [{"from": 0, "to": 1}]}, {"id": 1, "label": "own", "properties": ["pos"], "values": ["VBP"], "anchors": [{"from": 2, "to": 5}]}, {"id": 2, "label": "three", "properties": ["pos"], "values": ["CD"], "anchors": [{"from": 6, "to": 11}]}, {"id": 3, "label": "volcano", "properties": ["pos"], "values": ["NNS"], "anchors": [{"from": 12, "to": 21}]}, {"id": 5, "label": "which", "properties": ["pos"], "values": ["WDT"], "anchors": [{"from": 23, "to": 28}]}, {"id": 6, "label": "#PersPron", "properties": ["pos"], "values": ["PRP"], "anchors": [{"from": 29, "to": 30}]}, {"id": 7, "label": "clean_out", "properties": ["pos"], "values": ["VBP"], "anchors": [{"from": 31, "to": 36}]}, {"id": 9, "label": "every", "properties": ["pos"], "values": ["DT"], "anchors": [{"from": 41, "to": 46}]}, {"id": 10, "label": "week", "properties": ["pos"], "values": ["NN"], "anchors": [{"from": 47, "to": 51}]}, {"id": 13, "label": "#PersPron", "properties": ["pos"], "values": ["PRP"], "anchors": [{"from": 57, "to": 58}]}, {"id": 14, "label": "also", "properties": ["pos"], "values": ["RB"], "anchors": [{"from": 59, "to": 63}]}, {"id": 15, "label": "clean_out", "properties": ["pos"], "values": ["VBP"], "anchors": [{"from": 64, "to": 69}]}, {"id": 18, "label": "one", "properties": ["pos"], "values": ["NN"], "anchors": [{"from": 78, "to": 81}]}, {"id": 19, "label": "that", "properties": ["pos"], "values": ["WDT"], "anchors": [{"from": 82, "to": 86}]}, {"id": 20, "label": "be", "properties": ["pos"], "values": ["VBZ"], "anchors": [{"from": 87, "to": 89}]}, {"id": 21, "label": "extinct", "properties": ["pos"], "values": ["JJ"], "anchors": [{"from": 90, "to": 97}]}, {"id": 23, "label": "one", "properties": ["pos"], "values": ["CD"], "anchors": [{"from": 99, "to": 102}]}, {"id": 24, "label": "never", "properties": ["pos"], "values": ["RB"], "anchors": [{"from": 103, "to": 108}]}, {"id": 25, "label": "know", "properties": ["pos"], "values": ["VBZ"], "anchors": [{"from": 109, "to": 114}]}], "edges": [{"source": 7, "target": 6, "label": "ACT-arg"}, {"source": 18, "target": 20, "label": "RSTR"}, {"source": 15, "target": 18, "label": "PAT-arg"}, {"source": 15, "target": 13, "label": "ACT-arg"}, {"source": 25, "target": 24, "label": "TWHEN"}, {"source": 15, "target": 25, "label": "CAUS"}, {"source": 3, "target": 7, "label": "RSTR"}, {"source": 15, "target": 14, "label": "RHEM"}, {"source": 1, "target": 15, "label": "CAUS"}, {"source": 20, "target": 19, "label": "ACT-arg"}, {"source": 1, "target": 0, "label": "ACT-arg"}, {"source": 25, "target": 23, "label": "ACT-arg"}, {"source": 3, "target": 2, "label": "RSTR"}, {"source": 20, "target": 21, "label": "PAT-arg"}, {"source": 7, "target": 10, "label": "THO"}, {"source": 7, "target": 5, "label": "PAT-arg"}, {"source": 1, "target": 3, "label": "PAT-arg"}, {"source": 10, "target": 9, "label": "RSTR"}]}
2 |
--------------------------------------------------------------------------------
/mtool/data/score/psd/peking.brown.sdp:
--------------------------------------------------------------------------------
1 | Representation type: PSD
2 | # Evaluation
3 |
4 | Gold standard file: ../test/en.ood.psd.sdp
5 | System output file: Peking/en.ood.closed.psd.1.sdp
6 |
7 | ## Scores including virtual dependencies to top nodes
8 |
9 | ### Labeled scores
10 |
11 | Number of edges in gold standard: 21396
12 | Number of edges in system output: 19411
13 | Number of edges in common: 14877
14 |
15 | LP: 0.766421
16 | LR: 0.695317
17 | LF: 0.729140
18 | LM: 0.171444
19 |
20 | ### Unlabeled scores
21 |
22 | Number of unlabeled edges in gold standard: 21396
23 | Number of unlabeled edges in system output: 19411
24 | Number of unlabeled edges in common: 17432
25 |
26 | UP: 0.898047
27 | UR: 0.814732
28 | UF: 0.854363
29 | UM: 0.358031
30 |
31 | ### Complete predications
32 |
33 | Number of complete predications in gold standard: 3919
34 | Number of complete predications in system output: 3900
35 | Number of complete predications in common: 2048
36 |
37 | PP: 0.525128
38 | PR: 0.522582
39 | PF: 0.523852
40 |
41 | ### Semantic frames
42 |
43 | Number of semantic frames in gold standard: 3919
44 | Number of semantic frames in system output: 3900
45 | Number of semantic frames in common: 1322
46 |
47 | FP: 0.338974
48 | FR: 0.337331
49 | FF: 0.338151
50 |
51 | ### Senses
52 |
53 | Number of senses in gold standard: 3919
54 | Number of senses in system output: 3900
55 | Number of senses in common: 2171
56 |
57 | SP: 0.556667
58 | SR: 0.553968
59 | SF: 0.555314
60 |
61 | ## Scores excluding virtual dependencies to top nodes
62 |
63 | ### Labeled scores
64 |
65 | Number of edges in gold standard: 19058
66 | Number of edges in system output: 17181
67 | Number of edges in common: 12790
68 |
69 | LP: 0.744427
70 | LR: 0.671109
71 | LF: 0.705869
72 | LM: 0.173067
73 |
74 | ### Unlabeled scores
75 |
76 | Number of unlabeled edges in gold standard: 19058
77 | Number of unlabeled edges in system output: 17181
78 | Number of unlabeled edges in common: 15345
79 |
80 | UP: 0.893138
81 | UR: 0.805174
82 | UF: 0.846878
83 | UM: 0.362358
84 |
85 | ### Complete predications
86 |
87 | Number of complete predications in gold standard: 3919
88 | Number of complete predications in system output: 3900
89 | Number of complete predications in common: 2048
90 |
91 | PP: 0.525128
92 | PR: 0.522582
93 | PF: 0.523852
94 |
95 | ### Semantic frames
96 |
97 | Number of semantic frames in gold standard: 3919
98 | Number of semantic frames in system output: 3900
99 | Number of semantic frames in common: 1322
100 |
101 | FP: 0.338974
102 | FR: 0.337331
103 | FF: 0.338151
104 |
105 | ### Senses
106 |
107 | Number of senses in gold standard: 3919
108 | Number of senses in system output: 3900
109 | Number of senses in common: 2171
110 |
111 | SP: 0.556667
112 | SR: 0.553968
113 | SF: 0.555314
114 |
--------------------------------------------------------------------------------
/mtool/data/score/revisions.txt:
--------------------------------------------------------------------------------
1 | 54c0499f55874555c22827a7e61d79aeb8d29906 oe@ifi.uio.no 2019-07-05 23:49:38 +0200 cosmetics; so much for tonight ...
2 | f9ceb0a2090742a67ca89ed26b293fbdcfc292cb daniel.hershcovich@gmail.com 2019-07-05 21:57:08 +0200 Fix dominated dict lookup to be by node id rather than index
3 | 8df18be265c92c11a7fac788d727a2c879e142c4 milan@strakovi.com 2019-07-05 10:13:02 +0200 Another fix for evaluation of empty graphs.
4 | 15187440752dec7819093fa79849ff4b48d7a3d4 oe@ifi.uio.no 2019-07-05 00:55:58 +0200 fine-tuning default limits for MRP and SMATCH scorers; disable RRHC-based initialization for UCCA graphs; allow better control of RRHC and MCES limits from the command line
5 | 0d20656f47ad86352d6de86ce5b193295a3442bd oe@ifi.uio.no 2019-07-03 12:57:38 +0200 cosmetics
6 | 1e2fa352c1384ea6a1005c193ebf1d449a0de1dd oe@ifi.uio.no 2019-07-03 01:41:40 +0200 disable more assertions: is_injective() actually fails on the UCCA test (when initializing from SMATCH)
7 | 8aaa494d5794abc849965dda6fd70208a530c3db oe@ifi.uio.no 2019-07-02 21:33:43 +0200 bug fix: over-counting can apply on the same set of correspondences too
8 | 3cccda87794669573018f08a3717461b6deedfab oe@ifi.uio.no 2019-07-02 17:46:36 +0200 allow initialization from SMATCH hill-climbing; guard against over-counting (see my email to tim of june 30, 2019)
9 | 6c863c9e6233b8d3e81f39e0015333c4c75d5264 daniel.hershcovich@gmail.com 2019-07-01 14:22:24 +0200 Normalization: drop (attribute, value) pairs whose value is the default value
10 | b2145c4fc9ec79624fc84955f373b3387ca02d75 oe@ifi.uio.no 2019-06-30 01:33:24 +0200 give more weight to anchor overlap in UCCA initialization and rewards
11 | c31601c31b0e17639aa9557559d5655bfd55c371 oe@ifi.uio.no 2019-06-30 01:15:07 +0200 bug fix in sorted_splits(); streamlined smatch() interface; cosmetics
12 | 210da9b2e9eff2be7adf988d2865ab77c5ec3447 oe@ifi.uio.no 2019-06-27 22:38:06 +0200 close #20 (prior to scoring, normalize graphs according to the description on the web page)
13 | 1a61ea4484e77a458030a62a62e751e0668e7f11 oe@ifi.uio.no 2019-06-27 13:15:25 +0200 generalize anchor treatment in SMATCH wrapper
14 | b4db1996a894ad70dcb8bc83ba46ddfa354db44e daniel.hershcovich@gmail.com 2019-06-25 11:04:54 +0200 #26 Require leaf status of matched nodes to be the same in UCCA MCES
15 | 8696ffe1fa154acd03a4adbb1813354f198dfeb9 oe@ifi.uio.no 2019-06-20 10:34:00 +0200 fix copy-paste error (owing to a missing generalization)
16 | 274890bdccf3e3e502b755386b7af7fecf39284d oe@ifi.uio.no 2019-06-18 23:59:10 +0200 bug fix: edge attributes
17 | 09c48bd4a8ab8b72d05cea9571000a2e3524bb1b oe@ifi.uio.no 2019-06-18 00:59:52 +0200 activate improved estimate of edge potential
18 | 1c68aa39675291dc998a508e818e63723b0804c0 marco.kuhlmann@liu.se 2019-06-17 23:30:13 +0200 Treat edge attributes properly (closes: #13)
19 | 08e0d8a839b98a395c868cc1bd2e6ca859ef3e05 marco.kuhlmann@liu.se 2019-06-17 22:30:42 +0200 Respect node ordering in bi-lexical graphs (closes: #15)
20 | 7718d1ca50b250e154365e5846981564d7b635d5 oe@ifi.uio.no 2019-06-16 17:10:33 +0200 expose per-item result; rationalize --limit and --trace
21 |
--------------------------------------------------------------------------------
/mtool/data/score/test.slurm:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #SBATCH --job-name=score
4 | #SBATCH --mail-type=FAIL
5 | #SBATCH --account=nn9447k
6 | #SBATCH --time=12:00:00
7 | #SBATCH --nodes=1
8 | #SBATCH --mem-per-cpu=4G
9 | #SBATCH --ntasks-per-node=8
10 |
11 | commit="$(git log --pretty=format:\%H -n 1)";
12 | echo "directory: $(pwd)";
13 | echo "git status: $(git status | head -1)";
14 | echo "git commit: ${commit}";
15 | echo;
16 |
17 | source /cluster/bin/jobsetup;
18 |
19 | module purge;
20 | module use -a /projects/nlpl/software/modulefiles;
21 | module load nlpl-python-candy/201902/3.7 nlpl-numpy/1.16.3/3.7;
22 |
23 | /bin/cp ${HOME}/lib/mrp/2019/mtool/data/score/Makefile ./Makefile;
24 | make -j ${SLURM_CPUS_ON_NODE:-4} $(egrep '^[a-z/.]*.json:' Makefile | grep -v all: | sed 's/://');
25 | if [ -d ./../../../etc/ ]; then
26 | target=../../../../etc/${commit};
27 | [ -d ${target} ] || mkdir ${target};
28 | cp -va *.json *.log ${target};
29 | fi
30 |
--------------------------------------------------------------------------------
/mtool/data/score/ucca/anchors.gold.mrp:
--------------------------------------------------------------------------------
1 | {"id": "133601-0004", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (11:29)", "input": "Even though you are expensive.", "tops": [5], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 4}, {"from": 5, "to": 11}]}, {"id": 1, "anchors": [{"from": 12, "to": 15}]}, {"id": 2, "anchors": [{"from": 16, "to": 19}]}, {"id": 3, "anchors": [{"from": 20, "to": 29}]}, {"id": 4, "anchors": [{"from": 29, "to": 30}]}, {"id": 5}, {"id": 6}], "edges": [{"source": 5, "target": 0, "label": "L"}, {"source": 6, "target": 3, "label": "S"}, {"source": 6, "target": 4, "label": "U"}, {"source": 6, "target": 1, "label": "A"}, {"source": 5, "target": 6, "label": "H"}, {"source": 6, "target": 2, "label": "F"}]}
2 |
--------------------------------------------------------------------------------
/mtool/data/score/ucca/anchors.tupa.mrp:
--------------------------------------------------------------------------------
1 | {"id": "133601-0004", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (11:31)", "input": "Even though you are expensive.", "tops": [5], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 11}]}, {"id": 1, "anchors": [{"from": 12, "to": 15}]}, {"id": 2, "anchors": [{"from": 16, "to": 19}]}, {"id": 3, "anchors": [{"from": 20, "to": 29}]}, {"id": 4, "anchors": [{"from": 29, "to": 30}]}, {"id": 5}, {"id": 6}], "edges": [{"source": 6, "target": 4, "label": "U"}, {"source": 6, "target": 3, "label": "S"}, {"source": 6, "target": 2, "label": "F"}, {"source": 6, "target": 1, "label": "A"}, {"source": 5, "target": 0, "label": "L"}, {"source": 5, "target": 6, "label": "H"}]}
2 |
--------------------------------------------------------------------------------
/mtool/data/score/ucca/koller.mrp:
--------------------------------------------------------------------------------
1 | {"id": "291046-0001", "framework": "ucca", "flavor": 1, "time": "2019-07-17 (10:43)", "version": "0.9", "input": "Hams on Friendly … RIP", "nodes": [{"anchors": [{"from": 0, "to": 4}], "id": 0, "label": "hams", "properties": [], "values": []}, {"anchors": [{"from": 5, "to": 7}], "id": 1, "label": "on", "properties": [], "values": []}, {"anchors": [{"from": 8, "to": 16}], "id": 2, "label": "friendly", "properties": [], "values": []}, {"anchors": [{"from": 17, "to": 20}], "id": 3, "label": "...", "properties": [], "values": []}, {"anchors": [{"from": 21, "to": 24}], "id": 4, "label": "rip", "properties": [], "values": []}, {"id": 4}, {"id": 5}, {"id": 6}], "edges": [{"source": 5, "target": 1, "label": "A"}, {"source": 5, "target": 2, "label": "S"}, {"source": 6, "target": 5, "label": "A"}, {"source": 5, "target": 3, "label": "A"}, {"source": 6, "target": 0, "label": "S"}, {"source": 6, "target": 4, "label": "U"}]}
2 |
--------------------------------------------------------------------------------
/mtool/data/score/ucca/small.gold.mrp:
--------------------------------------------------------------------------------
1 | {"id": "001325-0001", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (17:11)", "input": "Highly recommended", "tops": [2], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 6}]}, {"id": 1, "anchors": [{"from": 7, "to": 18}]}, {"id": 2}, {"id": 3}], "edges": [{"source": 3, "target": 1, "label": "S"}, {"source": 2, "target": 3, "label": "H"}, {"source": 3, "target": 0, "label": "D"}]}
2 |
--------------------------------------------------------------------------------
/mtool/data/score/ucca/small.gold.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/ucca/small.gold.pdf
--------------------------------------------------------------------------------
/mtool/data/score/ucca/small.tupa.mrp:
--------------------------------------------------------------------------------
1 | {"id": "001325-0001", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (17:12)", "input": "Highly recommended", "tops": [2], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 6}]}, {"id": 1, "anchors": [{"from": 7, "to": 18}]}, {"id": 2}, {"id": 3}], "edges": [{"source": 2, "target": 3, "label": "H"}, {"source": 3, "target": 0, "label": "D"}, {"source": 3, "target": 1, "label": "P"}]}
2 |
--------------------------------------------------------------------------------
/mtool/data/score/ucca/small.tupa.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/ucca/small.tupa.pdf
--------------------------------------------------------------------------------
/mtool/data/score/ucca/test.gold.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/ucca/test.gold.pdf
--------------------------------------------------------------------------------
/mtool/data/score/ucca/test.tupa.mrp:
--------------------------------------------------------------------------------
1 | {"id": "001325-0002", "flavor": 1, "framework": "ucca", "version": 0.9, "time": "2019-05-29 (15:48)", "input": "My 8 year old daughter loves this place.", "tops": [10], "nodes": [{"id": 0, "anchors": [{"from": 0, "to": 2}]}, {"id": 1, "anchors": [{"from": 3, "to": 4}]}, {"id": 2, "anchors": [{"from": 5, "to": 9}]}, {"id": 3, "anchors": [{"from": 10, "to": 13}]}, {"id": 4, "anchors": [{"from": 14, "to": 22}]}, {"id": 5, "anchors": [{"from": 23, "to": 28}]}, {"id": 6, "anchors": [{"from": 29, "to": 33}]}, {"id": 7, "anchors": [{"from": 34, "to": 39}]}, {"id": 8, "anchors": [{"from": 39, "to": 40}]}, {"id": 9}, {"id": 10}, {"id": 11}, {"id": 12}, {"id": 13}], "edges": [{"source": 11, "target": 13, "label": "A"}, {"source": 13, "target": 7, "label": "C"}, {"source": 9, "target": 3, "label": "E"}, {"source": 13, "target": 8, "label": "U"}, {"source": 11, "target": 5, "label": "P"}, {"source": 13, "target": 6, "label": "E"}, {"source": 9, "target": 0, "label": "E"}, {"source": 10, "target": 11, "label": "H"}, {"source": 12, "target": 2, "label": "C"}, {"source": 11, "target": 9, "label": "D"}, {"source": 9, "target": 4, "label": "C"}, {"source": 12, "target": 1, "label": "E"}, {"source": 9, "target": 12, "label": "E"}]}
2 | {"id": "20003013", "framework": "ucca", "version": 1.0, "time": "2019-07-05", "input": "Among 33 men who worked closely with the substance, 28 have died -- more than three times the expected number.", "nodes": [{"id": 23}, {"id": 0}, {"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}, {"id": 6}, {"id": 7}, {"id": 8}, {"id": 9}, {"id": 10}, {"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}, {"id": 16}, {"id": 17}, {"id": 18}, {"id": 19}, {"id": 20}, {"id": 21}, {"id": 23}, {"id": 24}], "edges": [{"source": 23, "target": 24, "label": "U"}, {"source": 23, "target": 23, "label": "L"}]}
3 |
--------------------------------------------------------------------------------
/mtool/data/score/ucca/test.tupa.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/data/score/ucca/test.tupa.pdf
--------------------------------------------------------------------------------
/mtool/data/validate/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: all
2 |
3 | all:
4 | time python3 -u ../../main.py --trace --trace --validate all \
5 | --read mrp eds/wsj.mrp $@ 2>&1 | tee eds.wsj.log
6 |
--------------------------------------------------------------------------------
/mtool/inspector.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | from graph import Graph;
4 |
5 | def summarize(graphs, golds):
6 | ids = None;
7 | if golds is not None:
8 | ids = dict();
9 | for gold in golds:
10 | language = gold.language();
11 | if language not in ids: ids[language] = dict();
12 | targets = gold.targets();
13 | if targets is None: targets = [gold.framework];
14 | for target in targets:
15 | if target not in ids[language]: ids[language][target] = set();
16 | ids[language][target].add(gold.id);
17 |
18 | counts = dict();
19 | seen = dict();
20 | targets = dict();
21 | targets["eng"] = ["eds", "ptg", "ucca", "amr", "drg"];
22 | targets["ces"] = ["ptg"];
23 | targets["deu"] = ["ucca", "drg"];
24 | targets["zho"] = ["amr"];
25 | for language in ["eng", "ces", "deu", "zho"]:
26 | counts[language] = dict();
27 | seen[language] = dict();
28 | for key in targets[language]:
29 | counts[language][key] = 0;
30 | seen[language][key] = set();
31 |
32 | for graph in graphs:
33 | language = graph.language();
34 | if language is None: language = "eng";
35 | framework = graph.framework;
36 | if golds is None or \
37 | language in ids and framework in ids[language] and \
38 | graph.id in ids[language][framework]:
39 | counts[language][framework] += 1;
40 | if graph.id in seen[language][framework]:
41 | print("inspector.summarize(): ignoring duplicate {} {} graph #{}."
42 | "".format(language, framework, graph.id),
43 | file = sys.stderr);
44 | else:
45 | seen[language][framework].add(graph.id);
46 |
47 | complete = True;
48 | for language in ["eng", "ces", "deu", "zho"]:
49 | for key in targets[language]:
50 | if len(ids[language][key]) != counts[language][key]: complete = False;
51 | counts["complete"] = complete;
52 | return counts;
53 |
--------------------------------------------------------------------------------
/mtool/score/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__init__.py
--------------------------------------------------------------------------------
/mtool/score/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/core.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/core.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/core.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/core.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/core.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/core.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/edm.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/edm.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/edm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/edm.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/edm.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/edm.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/mces.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/mces.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/mces.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/mces.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/mces.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/mces.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/sdp.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/sdp.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/sdp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/sdp.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/sdp.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/sdp.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/smatch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/smatch.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/smatch.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/smatch.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/smatch.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/smatch.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/ucca.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/ucca.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/ucca.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/ucca.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/score/__pycache__/ucca.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/score/__pycache__/ucca.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/score/core.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | #
4 | # _fix_me_
5 | # maybe use Unicode character classes instead, even if it likely would mean
6 | # many calls to match one-character regular expressions?
7 | #
8 | PUNCTUATION = frozenset(".?!;,:“\"”‘'’()[]{} \t\n\f")
9 | SPACE = frozenset(" \t\n\f")
10 |
11 | def intersect(golds, systems, quiet = False):
12 | golds = {(graph.language(), graph.framework, graph.id): graph
13 | for graph in golds};
14 | seen = set();
15 | for graph in systems:
16 | language = graph.language();
17 | key = (language, graph.framework, graph.id);
18 | if language is None and key not in golds:
19 | language = "eng";
20 | key = (language, graph.framework, graph.id);
21 | if key in seen:
22 | if not quiet:
23 | print("score.intersect(): ignoring duplicate {} {} graph #{}"
24 | .format(language, graph.framework, graph.id),
25 | file=sys.stderr);
26 | else:
27 | seen.add(key);
28 | gold = golds.get(key);
29 | if gold is None:
30 | if not quiet:
31 | print("score.intersect(): ignoring {} {} graph #{} with no gold graph"
32 | .format(graph.language(), graph.framework, graph.id),
33 | file=sys.stderr);
34 | else:
35 | yield gold, graph;
36 |
37 | for key in golds.keys() - seen:
38 | gold = golds[key];
39 | if not quiet:
40 | print("score.intersect(): missing system {} {} graph #{}"
41 | .format(gold.language(), gold.framework, gold.id),
42 | file=sys.stderr);
43 | #
44 | # manufacture an empty graph as the system graph
45 | #
46 | from graph import Graph;
47 | yield gold, Graph(gold.id, flavor = gold.flavor,
48 | framework = gold.framework);
49 |
50 | def anchor(node):
51 | result = list();
52 | if node.anchors is not None:
53 | for span in node.anchors:
54 | if "from" in span and "to" in span:
55 | result.append((span["from"], span["to"]));
56 | return result;
57 |
58 | def explode(string, anchors, trim = PUNCTUATION):
59 | result = set();
60 | for anchor in anchors:
61 | start = end = None;
62 | if isinstance(anchor, tuple):
63 | start, end = anchor;
64 | elif "from" in anchor and "to" in anchor:
65 | start = anchor["from"]; end = anchor["to"];
66 | if start is not None and end is not None:
67 | while start < end and string[start] in trim:
68 | start += 1;
69 | while end > start and string[end - 1] in trim:
70 | end -= 1;
71 | for i in range(start, end):
72 | if string[i] not in SPACE:
73 | result.add(i);
74 | return frozenset(result);
75 |
76 | def fscore(gold, system, correct):
77 | p = correct / system if system else 0.0;
78 | r = correct / gold if gold else 0.0;
79 | f = 2 * p * r / (p + r) if p + r != 0 else 0.0;
80 | return p, r, f;
81 |
82 |
83 |
--------------------------------------------------------------------------------
/mtool/score/edm.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | from graph import Graph;
4 | import score.core;
5 |
6 | def tuples(graph, explode = False):
7 | identities = dict();
8 | names = set();
9 | tops = set();
10 | arguments = set();
11 | properties = set();
12 | for node in graph.nodes:
13 | if graph.input and explode:
14 | identity = score.core.explode(graph.input,
15 | score.core.anchor(node));
16 | else:
17 | identity = tuple(score.core.anchor(node));
18 | identities[node.id] = identity;
19 | if node.label is not None: names.add((identity, node.label));
20 | if node.is_top: tops.add(identity);
21 | if node.properties and node.values:
22 | for property, value in zip(node.properties, node.values):
23 | properties.add((identity, property, value))
24 | for edge in graph.edges:
25 | arguments.add((identities[edge.src], identities[edge.tgt], edge.lab));
26 | return names, arguments, properties, tops;
27 |
28 | def evaluate(golds, systems, format = "json", trace = 0):
29 | tgn = tsn = tcn = 0;
30 | tga = tsa = tca = 0;
31 | tgt = tst = tct = 0;
32 | tgp = tsp = tcp = 0;
33 | scores = dict() if trace else None;
34 | result = {"n": 0};
35 | for gold, system in score.core.intersect(golds, systems):
36 | explode = gold.input and system.input;
37 | gnames, garguments, gproperties, gtops = tuples(gold, explode = explode);
38 | snames, sarguments, sproperties, stops = tuples(system, explode = explode);
39 | if trace > 1:
40 | print("[{}] gold:\n{}\n{}\n{}\n{}\n\n"
41 | "".format(gold.id, gtops,
42 | gnames, garguments, gproperties));
43 | print("[{}] system:\n{}\n{}\n{}\n{}\n\n"
44 | "".format(gold.id, stops,
45 | snames, sarguments, sproperties));
46 | gn = len(gnames); sn = len(snames);
47 | cn = len(gnames & snames);
48 | ga = len(garguments); sa = len(sarguments);
49 | ca = len(garguments & sarguments);
50 | gt = len(gtops); st = len(stops);
51 | ct = len(gtops & stops);
52 | gp = len(gproperties); sp = len(sproperties);
53 | cp = len(gproperties & sproperties);
54 | tgn += gn; tsn += sn; tcn += cn;
55 | tga += ga; tsa += sa; tca += ca;
56 | tgt += gt; tst += st; tct += ct;
57 | tgp += gp; tsp += sp; tcp += cp;
58 | result["n"] += 1;
59 | if trace:
60 | if gold.id in scores:
61 | print("edm.evaluate(): duplicate graph identifier: {}"
62 | "".format(gold.id), file = sys.stderr);
63 | scores[gold.id] = {"names": {"g": gn, "s": sn, "c": cn},
64 | "arguments": {"g": ga, "s": sa, "c": ca},
65 | "tops": {"g": gt, "s": st, "c": ct},
66 | "properties": {"g": gp, "s": sp, "c": cp}};
67 | if scores is not None: result["scores"] = scores;
68 | p, r, f = score.core.fscore(tgn, tsn, tcn);
69 | result["names"] = {"g": tgn, "s": tsn, "c": tcn, "p": p, "r": r, "f": f};
70 | p, r, f = score.core.fscore(tga, tsa, tca);
71 | result["arguments"] = {"g": tga, "s": tsa, "c": tca, "p": p, "r": r, "f": f};
72 | p, r, f = score.core.fscore(tgt, tst, tct);
73 | result["tops"] = {"g": tgt, "s": tst, "c": tct, "p": p, "r": r, "f": f};
74 | p, r, f = score.core.fscore(tgp, tsp, tcp);
75 | result["properties"] = {"g": tgp, "s": tsp, "c": tcp, "p": p, "r": r, "f": f};
76 | tga = tgn + tga + tgt + tgp;
77 | tsa = tsn + tsa + tst + tsp;
78 | tca = tcn + tca + tct + tcp;
79 | p, r, f = score.core.fscore(tga, tsa, tca);
80 | result["all"] = {"g": tga, "s": tsa, "c": tca, "p": p, "r": r, "f": f};
81 | return result;
82 |
--------------------------------------------------------------------------------
/mtool/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | with open("README.md", "r") as fh:
4 | long_description = fh.read()
5 |
6 |
7 | exec(open('version.py').read())
8 | release = __version__
9 | version = '.'.join(release.split('.')[:2])
10 |
11 |
12 | setuptools.setup(
13 | name="mtool",
14 | version="0.0.1",
15 | author="Stephan Oepen , Marco Kuhlmann , "
16 | "Daniel Hershcovich , Tim O'Gorman ",
17 | author_email="mrp-organizers@nlpl.eu",
18 | description="The Swiss Army Knife of Meaning Representation",
19 | long_description=long_description,
20 | long_description_content_type="text/markdown",
21 | url="https://github.com/cfmrp/mtool",
22 | packages=setuptools.find_packages(),
23 | py_modules=["graph", "analyzer", "inspector", "treewidth", 'main', 'version'],
24 | license='LGPL-3.0',
25 | install_requires=[
26 | 'numpy',
27 | ],
28 | entry_points = {
29 | 'console_scripts': ['mtool=main:main'],
30 | },
31 | classifiers=[
32 | "Environment :: Console",
33 | "Development Status :: 4 - Beta",
34 | "Intended Audience :: Developers",
35 | "Intended Audience :: Education",
36 | "Intended Audience :: Science/Research",
37 | "Operating System :: OS Independent",
38 | "Programming Language :: Python :: 3",
39 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
40 | "Topic :: Scientific/Engineering :: Information Analysis"
41 | ]
42 | )
43 |
--------------------------------------------------------------------------------
/mtool/smatch/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (C) 2015 Shu Cai and Kevin Knight
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 |
--------------------------------------------------------------------------------
/mtool/smatch/README.md:
--------------------------------------------------------------------------------
1 | # Smatch (semantic match) tool
2 |
3 | This is source code of [smatch](http://amr.isi.edu/evaluation.html), an evaluation tool for AMR (Abstract Meaning Representation).
4 |
5 | The code here is based on [Shu Cai](https://github.com/snowblink14)'s [smatch v1.0.2](https://github.com/danielhers/smatch/tree/1.0.2), with some changes to allow programmatic usage.
6 |
7 | More details and updates about AMR and smatch can be found in USC/ISI's AMR site: http://amr.isi.edu/index.html
8 |
--------------------------------------------------------------------------------
/mtool/smatch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__init__.py
--------------------------------------------------------------------------------
/mtool/smatch/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/smatch/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/smatch/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/smatch/__pycache__/amr.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/amr.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/smatch/__pycache__/amr.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/amr.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/smatch/__pycache__/amr.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/amr.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/smatch/__pycache__/smatch.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/smatch.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/smatch/__pycache__/smatch.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/smatch.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/smatch/__pycache__/smatch.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/smatch/__pycache__/smatch.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/ucca/README.md:
--------------------------------------------------------------------------------
1 | Universal Conceptual Cognitive Annotation
2 | ============================
3 | UCCA is a linguistic framework for semantic annotation, whose details
4 | are available at [the following paper](http://www.cs.huji.ac.il/~oabend/papers/ucca_acl.pdf):
5 |
6 | @inproceedings{abend2013universal,
7 | author={Abend, Omri and Rappoport, Ari},
8 | title={{U}niversal {C}onceptual {C}ognitive {A}nnotation ({UCCA})},
9 | booktitle={Proc. of ACL},
10 | month={August},
11 | year={2013},
12 | pages={228--238},
13 | url={http://aclweb.org/anthology/P13-1023}
14 | }
15 |
16 | This Python 3 package provides an API to the UCCA annotation and tools to
17 | manipulate and process it. Its main features are conversion between different
18 | representations of UCCA annotations, and rich objects for all of the linguistic
19 | relations which appear in the theoretical framework (see `core`, `layer0`, `layer1`
20 | and `convert` modules under the `ucca` package).
21 |
22 | The `scripts` package contains various utilities for processing passage files.
23 |
24 | To parse text to UCCA graphs, use [TUPA, the UCCA parser](http://www.cs.huji.ac.il/~danielh/tupa).
25 |
26 |
27 | Authors
28 | ------
29 | * Amit Beka: amit.beka@gmail.com
30 | * Daniel Hershcovich: danielh@cs.huji.ac.il
31 |
32 |
33 | License
34 | -------
35 | This package is licensed under the GPLv3 or later license.
36 |
37 | [](https://travis-ci.org/danielhers/ucca)
38 | [](https://ci.appveyor.com/project/danielh/ucca)
39 | [](http://ucca.readthedocs.io/en/latest/)
40 | [](https://badge.fury.io/py/UCCA)
41 |
--------------------------------------------------------------------------------
/mtool/ucca/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__init__.py
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/convert.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/convert.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/convert.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/convert.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/convert.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/convert.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/core.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/core.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/core.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/core.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/core.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/core.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/ioutil.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/ioutil.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/ioutil.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/ioutil.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/ioutil.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/ioutil.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/layer0.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer0.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/layer0.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer0.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/layer0.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer0.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/layer1.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer1.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/layer1.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer1.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/layer1.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/layer1.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/normalization.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/normalization.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/normalization.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/normalization.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/normalization.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/normalization.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/textutil.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/textutil.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/textutil.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/textutil.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/ucca/__pycache__/textutil.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/ucca/__pycache__/textutil.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/validate/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__init__.py
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/amr.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/amr.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/amr.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/amr.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/amr.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/amr.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/core.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/core.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/core.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/core.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/core.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/core.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/eds.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/eds.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/eds.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/eds.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/eds.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/eds.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/sdp.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/sdp.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/sdp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/sdp.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/sdp.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/sdp.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/ucca.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/ucca.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/ucca.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/ucca.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/ucca.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/ucca.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/utilities.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/utilities.cpython-37.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/utilities.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/utilities.cpython-38.pyc
--------------------------------------------------------------------------------
/mtool/validate/__pycache__/utilities.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/mtool/validate/__pycache__/utilities.cpython-39.pyc
--------------------------------------------------------------------------------
/mtool/validate/amr.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | from graph import Graph;
4 | from validate.utilities import report;
5 |
6 | def test(graph, actions, stream = sys.stderr):
7 | n = 0;
8 | return n;
9 |
10 |
--------------------------------------------------------------------------------
/mtool/validate/core.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | import validate.amr;
4 | import validate.eds;
5 | import validate.sdp;
6 | import validate.ucca;
7 | from validate.utilities import report;
8 |
9 |
10 | def test(graph, actions, stream = sys.stderr):
11 | n = 0;
12 | if not isinstance(graph.id, str) or len(graph.id) == 0:
13 | n += 1;
14 | report(graph,
15 | "missing or invalid ‘id’ property",
16 | stream = stream);
17 | if not isinstance(graph.flavor, int) or graph.flavor not in {0, 1, 2}:
18 | n += 1;
19 | report(graph,
20 | "missing or invalid ‘flavor’ property",
21 | stream = stream);
22 | if not isinstance(graph.framework, str) or \
23 | graph.framework not in {"ccd", "dm", "pas", "psd", "ptg", "ud",
24 | "eds", "ucca", "amr", "drg"}:
25 | n += 1;
26 | report(graph,
27 | "missing or invalid ‘framework’ property",
28 | stream = stream);
29 | elif graph.flavor == 0 and \
30 | graph.framework not in {"ccd", "dm", "pas", "psd", "ud"} or \
31 | graph.flavor == 1 and graph.framework not in {"eds", "ptg", "ucca"} or \
32 | graph.flavor == 2 and graph.framework not in {"amr", "drg"}:
33 | n += 1;
34 | report(graph,
35 | "invalid Flavor ({}) framework: ‘{}’"
36 | "".format(graph.flavor, graph.framework), stream = stream);
37 |
38 | if "input" in actions:
39 | if not isinstance(graph.input, str) or len(graph.input) == 0:
40 | n += 1;
41 | report(graph,
42 | "missing or invalid ‘input’ property",
43 | stream = stream);
44 |
45 | l = len(graph.input) if graph.input else 0;
46 | for node in graph.nodes:
47 | if not isinstance(node.id, int):
48 | n += 1;
49 | report(graph,
50 | "invalid identifier",
51 | node = node, stream = stream);
52 | if "anchors" in actions and node.anchors and l:
53 | for anchor in node.anchors:
54 | if anchor["from"] < 0 or anchor["from"] > l \
55 | or anchor["to"] < 0 or anchor["to"] > l \
56 | or anchor["from"] > anchor["to"]:
57 | n += 1;
58 | report(graph,
59 | "invalid anchor: {}".format(anchor),
60 | node = node, stream = stream);
61 |
62 | if "edges" in actions:
63 | #
64 | # the following is most likely redundant: the MRP input codec already has
65 | # to make sure all source and target identifiers actually exist. maybe
66 | # add a type check (int), though?
67 | #
68 | nodes = {node.id: node for node in graph.nodes};
69 | for edge in graph.edges:
70 | if not isinstance(edge.src, int) or edge.src not in nodes:
71 | n += 1;
72 | report(graph,
73 | "invalid source",
74 | edge = edge, stream = stream);
75 | if not isinstance(edge.tgt, int) or edge.tgt not in nodes:
76 | n += 1;
77 | report(graph,
78 | "invalid target",
79 | edge = edge, stream = stream);
80 | num_attrib = len(edge.attributes) if edge.attributes else 0;
81 | num_values = len(edge.values) if edge.values else 0;
82 | if num_attrib != num_values:
83 | n += 1;
84 | report(graph,
85 | "unaligned ‘attributes’ vs. ‘values’",
86 | edge = edge, stream = stream);
87 |
88 | sdp = {"ccd", "dm", "pas", "psd"};
89 | if graph.framework == "amr" and "amr" in actions:
90 | n += validate.amr.test(graph, actions, stream);
91 | elif graph.framework == "eds" and "eds" in actions:
92 | n += validate.eds.test(graph, actions, stream);
93 | elif graph.framework in sdp and (sdp & actions):
94 | n += validate.sdp.test(graph, actions, stream);
95 | elif graph.framework == "ucca" and "ucca" in actions:
96 | n += validate.ucca.test(graph, actions, stream);
97 |
98 | return n;
99 |
--------------------------------------------------------------------------------
/mtool/validate/eds.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | from graph import Graph;
4 | from validate.utilities import report;
5 |
6 | def test(graph, actions, stream = sys.stderr):
7 | n = 0;
8 | for node in graph.nodes:
9 | if not isinstance(node.label, str) or len(node.label) == 0:
10 | n += 1;
11 | report(graph,
12 | "missing or invalid label",
13 | node = node, framework = "EDS", stream = stream);
14 | message = None;
15 | if "anchors" in actions:
16 | if not isinstance(node.anchors, list):
17 | message = "missing or invalid anchoring";
18 | elif len(node.anchors) != 1 \
19 | or ("from" not in node.anchors[0] or "to" not in node.anchors[0]):
20 | message = "invalid ‘anchors’ value: {}".format(node.anchors);
21 | if message is not None:
22 | n += 1;
23 | report(graph, message,
24 | node = node, framework = "EDS", stream = stream);
25 | return n;
26 |
27 |
--------------------------------------------------------------------------------
/mtool/validate/sdp.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | from graph import Graph;
4 | from validate.utilities import report;
5 |
6 | def test(graph, actions, stream = sys.stderr):
7 | n = 0;
8 | return n;
9 |
10 |
--------------------------------------------------------------------------------
/mtool/validate/ucca.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from validate.utilities import report
4 |
5 | CATEGORIES = {'H', 'A', 'P', 'S', 'D', 'G', 'C', 'E', 'F', 'N', 'R', 'T', 'Q', 'L', 'U'}
6 |
7 |
8 | def is_primary(edge):
9 | for attribute, value in zip(edge.attributes or (), edge.values or ()):
10 | if attribute == "remote" and value != "false":
11 | return False
12 | return True
13 |
14 |
15 | def is_implicit(node):
16 | for prop, value in zip(node.properties or (), node.values or ()):
17 | if prop == "implicit" and value != "false":
18 | return True
19 | return False
20 |
21 |
22 | def test(graph, actions, stream=sys.stderr):
23 | n = 0
24 | for edge in graph.edges:
25 | if not isinstance(edge.lab, str) or len(edge.lab) == 0:
26 | n += 1
27 | report(graph,
28 | "missing or invalid label",
29 | edge=edge, framework="UCCA", stream=stream)
30 | elif edge.lab.upper() not in CATEGORIES:
31 | n += 1
32 | report(graph,
33 | "edge label is not a UCCA category",
34 | edge=edge, framework="UCCA", stream=stream)
35 | if edge.is_loop():
36 | n += 1
37 | report(graph,
38 | "loop edge",
39 | edge=edge, framework="UCCA", stream=stream)
40 | roots = []
41 | for node in graph.nodes:
42 | primary = [edge for edge in node.incoming_edges if is_primary(edge)]
43 | primary_parents = {edge.src for edge in primary}
44 | if not primary:
45 | roots.append(node)
46 | elif len(primary_parents) > 1:
47 | n += 1
48 | report(graph,
49 | "multiple primary parents for node",
50 | node=node, edge=primary[0], framework="UCCA", stream=stream)
51 | if not roots:
52 | n += 1
53 | report(graph,
54 | "no roots in graph",
55 | framework="UCCA", stream=stream)
56 | elif len(roots) > 1:
57 | n += 1
58 | report(graph,
59 | "multiple roots in graph",
60 | node=roots[0], framework="UCCA", stream=stream)
61 | else:
62 | for node in roots:
63 | remotes = [edge for edge in node.incoming_edges if not is_primary(edge)]
64 | if remotes:
65 | n += 1
66 | report(graph,
67 | "root has remote parents",
68 | node=node, edge=remotes[0], framework="UCCA", stream=stream)
69 | for node in graph.nodes:
70 | if node.is_leaf() and not node.anchors and not is_implicit(node):
71 | n += 1
72 | report(graph,
73 | "unanchored non-implicit node",
74 | node=node, framework="UCCA", stream=stream)
75 | return n
76 |
--------------------------------------------------------------------------------
/mtool/validate/utilities.py:
--------------------------------------------------------------------------------
1 | import sys;
2 |
3 | def report(graph, message, node = None, edge = None,
4 | framework = None, level = "E", stream = sys.stderr):
5 | if node is not None:
6 | node = "; node #{}".format(node.id);
7 | else:
8 | node = "";
9 | if edge is not None:
10 | edge = "; edge {} -{}-> {}".format(edge.src, edge.tgt,
11 | edge.lab if edge.lab else "");
12 | else:
13 | edge = "";
14 | if framework is not None:
15 | framework = "{{{}}} ".format(framework);
16 | else:
17 | framework = "";
18 | print("validate(): [{}] {}graph #{}{}{}: {}."
19 | "".format(level, framework, graph.id, node, edge, message),
20 | file = stream);
21 |
--------------------------------------------------------------------------------
/mtool/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.1";
2 |
3 |
--------------------------------------------------------------------------------
/perin/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/.DS_Store
--------------------------------------------------------------------------------
/perin/config/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/config/.DS_Store
--------------------------------------------------------------------------------
/perin/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/config/__init__.py
--------------------------------------------------------------------------------
/perin/config/edge_ace_e.yaml:
--------------------------------------------------------------------------------
1 | framework: ace
2 | language: en
3 | graph_mode: labeled-edge
4 |
5 | encoder: /cluster/work/projects/ec30/huiliny/xlm-roberta-large # local directory to xlmr
6 | epochs: 200
7 | n_layers: 3
8 | query_length: 2
9 | decoder_learning_rate: 1.0e-4
10 | encoder_learning_rate: 6.0e-6 # initial encoder learning rate
11 | encoder_weight_decay: 0.1
12 | encoder_delay_steps: 500
13 | warmup_steps: 1000
14 | char_embedding: true
15 | dropout_word: 0.1
16 | focal: true
17 | hidden_size_edge_presence: 256
18 | hidden_size_anchor: 256
19 | dropout_anchor: 0.4
20 | dropout_edge_presence: 0.5
21 | dropout_label: 0.85
22 | batch_size: 16
23 | dropout_transformer: 0.25
24 | beta_2: 0.98
25 | layerwise_lr_decay: 0.9
--------------------------------------------------------------------------------
/perin/config/edge_ace_e_p.yaml:
--------------------------------------------------------------------------------
1 | framework: ace_p
2 | language: en
3 | graph_mode: labeled-edge
4 |
5 | encoder: /cluster/work/projects/ec30/huiliny/xlm-roberta-large # local directory to xlmr
6 | epochs: 200
7 | n_layers: 3
8 | query_length: 2
9 | decoder_learning_rate: 1.0e-4
10 | encoder_learning_rate: 6.0e-6 # initial encoder learning rate
11 | encoder_weight_decay: 0.1
12 | encoder_delay_steps: 500
13 | warmup_steps: 1000
14 | char_embedding: true
15 | dropout_word: 0.1
16 | focal: true
17 | hidden_size_edge_presence: 256
18 | hidden_size_anchor: 256
19 | dropout_anchor: 0.4
20 | dropout_edge_presence: 0.5
21 | dropout_label: 0.85
22 | batch_size: 16
23 | dropout_transformer: 0.25
24 | beta_2: 0.98
25 | layerwise_lr_decay: 0.9
--------------------------------------------------------------------------------
/perin/config/edge_ace_e_pp.yaml:
--------------------------------------------------------------------------------
1 | framework: ace_pp
2 | language: en
3 | graph_mode: labeled-edge
4 |
5 | encoder: /cluster/work/projects/ec30/huiliny/xlm-roberta-large # local directory to xlmr
6 | epochs: 200
7 | n_layers: 3
8 | query_length: 2
9 | decoder_learning_rate: 1.0e-4
10 | encoder_learning_rate: 6.0e-6 # initial encoder learning rate
11 | encoder_weight_decay: 0.1
12 | encoder_delay_steps: 500
13 | warmup_steps: 1000
14 | char_embedding: true
15 | dropout_word: 0.1
16 | focal: true
17 | hidden_size_edge_presence: 256
18 | hidden_size_anchor: 256
19 | dropout_anchor: 0.4
20 | dropout_edge_presence: 0.5
21 | dropout_label: 0.85
22 | batch_size: 16
23 | dropout_transformer: 0.25
24 | beta_2: 0.98
25 | layerwise_lr_decay: 0.9
26 |
--------------------------------------------------------------------------------
/perin/config/edge_ace_e_ppp.yaml:
--------------------------------------------------------------------------------
1 | framework: ace_ppp
2 | language: en
3 | graph_mode: labeled-edge
4 |
5 | encoder: /cluster/work/projects/ec30/huiliny/xlm-roberta-large # local directory to xlmr
6 | epochs: 200
7 | n_layers: 3
8 | query_length: 2
9 | decoder_learning_rate: 1.0e-4
10 | encoder_learning_rate: 6.0e-6 # initial encoder learning rate
11 | encoder_weight_decay: 0.1
12 | encoder_delay_steps: 500
13 | warmup_steps: 1000
14 | char_embedding: true
15 | dropout_word: 0.1
16 | focal: true
17 | hidden_size_edge_presence: 256
18 | hidden_size_anchor: 256
19 | dropout_anchor: 0.4
20 | dropout_edge_presence: 0.5
21 | dropout_label: 0.85
22 | batch_size: 16
23 | dropout_transformer: 0.25
24 | beta_2: 0.98
25 | layerwise_lr_decay: 0.9
26 |
--------------------------------------------------------------------------------
/perin/convert.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | python3 ../mtool/main.py $2--strings --ids --read mrp --write ace "$1" "$1_converted"
--------------------------------------------------------------------------------
/perin/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/.DS_Store
--------------------------------------------------------------------------------
/perin/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/__init__.py
--------------------------------------------------------------------------------
/perin/data/batch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | import torch.nn.functional as F
6 |
7 |
8 | class Batch:
9 | @staticmethod
10 | def build(data):
11 | fields = list(data[0].keys())
12 | transposed = {}
13 | for field in fields:
14 | if isinstance(data[0][field], tuple):
15 | transposed[field] = tuple(Batch._stack(field, [example[field][i] for example in data]) for i in range(len(data[0][field])))
16 | else:
17 | transposed[field] = Batch._stack(field, [example[field] for example in data])
18 |
19 | return transposed
20 |
21 | @staticmethod
22 | def _stack(field: str, examples):
23 | if field == "anchored_labels":
24 | return examples
25 |
26 | dim = examples[0].dim()
27 |
28 | if dim == 0:
29 | return torch.stack(examples)
30 |
31 | lengths = [max(example.size(i) for example in examples) for i in range(dim)]
32 | if any(length == 0 for length in lengths):
33 | return torch.LongTensor(len(examples), *lengths)
34 |
35 | examples = [F.pad(example, Batch._pad_size(example, lengths)) for example in examples]
36 | return torch.stack(examples)
37 |
38 | @staticmethod
39 | def _pad_size(example, total_size):
40 | return [p for i, l in enumerate(total_size[::-1]) for p in (0, l - example.size(-1 - i))]
41 |
42 | @staticmethod
43 | def index_select(batch, indices):
44 | filtered_batch = {}
45 | for key, examples in batch.items():
46 | if isinstance(examples, list) or isinstance(examples, tuple):
47 | filtered_batch[key] = [example.index_select(0, indices) for example in examples]
48 | else:
49 | filtered_batch[key] = examples.index_select(0, indices)
50 |
51 | return filtered_batch
52 |
53 | @staticmethod
54 | def to_str(batch):
55 | string = "\n".join([f"\t{name}: {Batch._short_str(item)}" for name, item in batch.items()])
56 | return string
57 |
58 | @staticmethod
59 | def to(batch, device):
60 | converted = {}
61 | for field in batch.keys():
62 | converted[field] = Batch._to(batch[field], device)
63 | return converted
64 |
65 | @staticmethod
66 | def _short_str(tensor):
67 | # unwrap variable to tensor
68 | if not torch.is_tensor(tensor):
69 | # (1) unpack variable
70 | if hasattr(tensor, "data"):
71 | tensor = getattr(tensor, "data")
72 | # (2) handle include_lengths
73 | elif isinstance(tensor, tuple) or isinstance(tensor, list):
74 | return str(tuple(Batch._short_str(t) for t in tensor))
75 | # (3) fallback to default str
76 | else:
77 | return str(tensor)
78 |
79 | # copied from torch _tensor_str
80 | size_str = "x".join(str(size) for size in tensor.size())
81 | device_str = "" if not tensor.is_cuda else " (GPU {})".format(tensor.get_device())
82 | strt = "[{} of size {}{}]".format(torch.typename(tensor), size_str, device_str)
83 | return strt
84 |
85 | @staticmethod
86 | def _to(tensor, device):
87 | if not torch.is_tensor(tensor):
88 | if isinstance(tensor, tuple):
89 | return tuple(Batch._to(t, device) for t in tensor)
90 | elif isinstance(tensor, list):
91 | return [Batch._to(t, device) for t in tensor]
92 | else:
93 | raise Exception(f"unsupported type of {tensor} to be casted to cuda")
94 |
95 | return tensor.to(device, non_blocking=True)
96 |
--------------------------------------------------------------------------------
/perin/data/field/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/.DS_Store
--------------------------------------------------------------------------------
/perin/data/field/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/__init__.py
--------------------------------------------------------------------------------
/perin/data/field/anchor_field.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | from data.field.mini_torchtext.field import RawField
6 |
7 |
8 | class AnchorField(RawField):
9 | def process(self, batch, device=None):
10 | tensors, masks = self.pad(batch, device)
11 | return tensors, masks
12 |
13 | def pad(self, anchors, device):
14 | tensor = torch.zeros(anchors[0], anchors[1], dtype=torch.long, device=device)
15 | for anchor in anchors[-1]:
16 | tensor[anchor[0], anchor[1]] = 1
17 | mask = tensor.sum(-1) == 0
18 |
19 | return tensor, mask
20 |
--------------------------------------------------------------------------------
/perin/data/field/anchored_label_field.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from data.field.mini_torchtext.field import RawField
3 |
4 |
5 | class AnchoredLabelField(RawField):
6 | def __init__(self):
7 | super(AnchoredLabelField, self).__init__()
8 | self.vocab = None
9 |
10 | def process(self, example, device=None):
11 | example = self.numericalize(example)
12 | tensor = self.pad(example, device)
13 | return tensor
14 |
15 | def pad(self, example, device):
16 | n_labels = len(self.vocab)
17 | n_nodes, n_tokens = len(example[1]), example[0]
18 |
19 | tensor = torch.full([n_nodes, n_tokens, n_labels + 1], 0, dtype=torch.long, device=device)
20 | for i_node, node in enumerate(example[1]):
21 | for anchor, rule in node:
22 | tensor[i_node, anchor, rule + 1] = 1
23 |
24 | return tensor
25 |
26 | def numericalize(self, arr):
27 | def multi_map(array, function):
28 | if isinstance(array, tuple):
29 | return (array[0], function(array[1]))
30 | elif isinstance(array, list):
31 | return [multi_map(a, function) for a in array]
32 | else:
33 | return array
34 |
35 | if self.vocab is not None:
36 | arr = multi_map(arr, lambda x: self.vocab.stoi[x] if x in self.vocab.stoi else 0)
37 |
38 | return arr
39 |
--------------------------------------------------------------------------------
/perin/data/field/basic_field.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | from data.field.mini_torchtext.field import RawField
6 |
7 |
8 | class BasicField(RawField):
9 | def process(self, example, device=None):
10 | tensor = torch.tensor(example, dtype=torch.long, device=device)
11 | # tensor = example.clone().detach().to(device)
12 |
13 | return tensor
14 |
15 | # it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
--------------------------------------------------------------------------------
/perin/data/field/bert_field.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | from data.field.mini_torchtext.field import RawField
6 |
7 |
8 | class BertField(RawField):
9 | def __init__(self):
10 | super(BertField, self).__init__()
11 |
12 | def process(self, example, device=None):
13 | attention_mask = [1] * len(example)
14 |
15 | example = torch.LongTensor(example, device=device)
16 | attention_mask = torch.ones_like(example)
17 |
18 | return example, attention_mask
19 |
--------------------------------------------------------------------------------
/perin/data/field/edge_field.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | from data.field.mini_torchtext.field import RawField
6 | from data.field.mini_torchtext.vocab import Vocab
7 | from collections import Counter
8 | import types
9 |
10 |
11 | class EdgeField(RawField):
12 | def __init__(self):
13 | super(EdgeField, self).__init__()
14 | self.vocab = None
15 |
16 | def process(self, edges, device=None):
17 | edges = self.numericalize(edges)
18 | tensor = self.pad(edges, device)
19 | return tensor
20 |
21 | def pad(self, edges, device):
22 | tensor = torch.zeros(edges[0], edges[1], dtype=torch.long, device=device)
23 | for edge in edges[-1]:
24 | tensor[edge[0], edge[1]] = edge[2]
25 |
26 | return tensor
27 |
28 | def numericalize(self, arr):
29 | def multi_map(array, function):
30 | if isinstance(array, tuple):
31 | return (array[0], array[1], function(array[2]))
32 | elif isinstance(array, list):
33 | return [multi_map(array[i], function) for i in range(len(array))]
34 | else:
35 | return array
36 |
37 | if self.vocab is not None:
38 | arr = multi_map(arr, lambda x: self.vocab.stoi[x] if x is not None else 0)
39 | return arr
40 |
41 | def build_vocab(self, *args):
42 | def generate(l):
43 | if isinstance(l, tuple):
44 | yield l[2]
45 | elif isinstance(l, list) or isinstance(l, types.GeneratorType):
46 | for i in l:
47 | yield from generate(i)
48 | else:
49 | return
50 |
51 | counter = Counter()
52 | sources = []
53 | for arg in args:
54 | if isinstance(arg, torch.utils.data.Dataset):
55 | sources += [arg.get_examples(name) for name, field in arg.fields.items() if field is self]
56 | else:
57 | sources.append(arg)
58 |
59 | for x in generate(sources):
60 | if x is not None:
61 | counter.update([x])
62 |
63 | self.vocab = Vocab(counter, specials=[])
64 |
--------------------------------------------------------------------------------
/perin/data/field/edge_label_field.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | from data.field.mini_torchtext.field import RawField
6 | from data.field.mini_torchtext.vocab import Vocab
7 | from collections import Counter
8 | import types
9 |
10 |
11 | class EdgeLabelField(RawField):
12 | def process(self, edges, device=None):
13 | edges, masks = self.numericalize(edges)
14 | edges, masks = self.pad(edges, masks, device)
15 |
16 | return edges, masks
17 |
18 | def pad(self, edges, masks, device):
19 | n_labels = len(self.vocab)
20 |
21 | tensor = torch.zeros(edges[0], edges[1], n_labels, dtype=torch.long, device=device)
22 | mask_tensor = torch.zeros(edges[0], edges[1], dtype=torch.bool, device=device)
23 |
24 | for edge in edges[-1]:
25 | tensor[edge[0], edge[1], edge[2]] = 1
26 |
27 | for mask in masks[-1]:
28 | mask_tensor[mask[0], mask[1]] = mask[2]
29 |
30 | return tensor, mask_tensor
31 |
32 | def numericalize(self, arr):
33 | def multi_map(array, function):
34 | if isinstance(array, tuple):
35 | return (array[0], array[1], function(array[2]))
36 | elif isinstance(array, list):
37 | return [multi_map(array[i], function) for i in range(len(array))]
38 | else:
39 | return array
40 |
41 | mask = multi_map(arr, lambda x: x is None)
42 | arr = multi_map(arr, lambda x: self.vocab.stoi[x] if x in self.vocab.stoi else 0)
43 | return arr, mask
44 |
45 | def build_vocab(self, *args):
46 | def generate(l):
47 | if isinstance(l, tuple):
48 | yield l[2]
49 | elif isinstance(l, list) or isinstance(l, types.GeneratorType):
50 | for i in l:
51 | yield from generate(i)
52 | else:
53 | return
54 |
55 | counter = Counter()
56 | sources = []
57 | for arg in args:
58 | if isinstance(arg, torch.utils.data.Dataset):
59 | sources += [arg.get_examples(name) for name, field in arg.fields.items() if field is self]
60 | else:
61 | sources.append(arg)
62 |
63 | for x in generate(sources):
64 | if x is not None:
65 | counter.update([x])
66 |
67 | self.vocab = Vocab(counter, specials=[])
68 |
--------------------------------------------------------------------------------
/perin/data/field/field.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from data.field.mini_torchtext.field import Field as TorchTextField
3 | from collections import Counter, OrderedDict
4 |
5 |
6 | # small change of vocab building to correspond to our version of Dataset
7 | class Field(TorchTextField):
8 | def build_vocab(self, *args, **kwargs):
9 | counter = Counter()
10 | sources = []
11 | for arg in args:
12 | if isinstance(arg, torch.utils.data.Dataset):
13 | sources += [arg.get_examples(name) for name, field in arg.fields.items() if field is self]
14 | else:
15 | sources.append(arg)
16 | for data in sources:
17 | for x in data:
18 | if not self.sequential:
19 | x = [x]
20 | counter.update(x)
21 |
22 | specials = list(
23 | OrderedDict.fromkeys(
24 | tok
25 | for tok in [self.unk_token, self.pad_token, self.init_token, self.eos_token] + kwargs.pop("specials", [])
26 | if tok is not None
27 | )
28 | )
29 | self.vocab = self.vocab_cls(counter, specials=specials, **kwargs)
30 |
31 | def process(self, example, device=None):
32 | if self.include_lengths:
33 | example = example, len(example)
34 | tensor = self.numericalize(example, device=device)
35 | return tensor
36 |
37 | def numericalize(self, ex, device=None):
38 | if self.include_lengths and not isinstance(ex, tuple):
39 | raise ValueError("Field has include_lengths set to True, but input data is not a tuple of (data batch, batch lengths).")
40 |
41 | if isinstance(ex, tuple):
42 | ex, lengths = ex
43 | lengths = torch.tensor(lengths, dtype=self.dtype, device=device)
44 |
45 | if self.use_vocab:
46 | if self.sequential:
47 | ex = [self.vocab.stoi[x] for x in ex]
48 | else:
49 | ex = self.vocab.stoi[ex]
50 |
51 | if self.postprocessing is not None:
52 | ex = self.postprocessing(ex, self.vocab)
53 | else:
54 | numericalization_func = self.dtypes[self.dtype]
55 |
56 | if not self.sequential:
57 | ex = numericalization_func(ex) if isinstance(ex, str) else ex
58 | if self.postprocessing is not None:
59 | ex = self.postprocessing(ex, None)
60 |
61 | var = torch.tensor(ex, dtype=self.dtype, device=device)
62 |
63 | if self.sequential and not self.batch_first:
64 | var.t_()
65 | if self.sequential:
66 | var = var.contiguous()
67 |
68 | if self.include_lengths:
69 | return var, lengths
70 | return var
71 |
--------------------------------------------------------------------------------
/perin/data/field/label_field.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from data.field.mini_torchtext.field import RawField
3 | from data.field.mini_torchtext.vocab import Vocab
4 | from collections import Counter
5 |
6 |
7 | class LabelField(RawField):
8 | def __self__(self, preprocessing):
9 | super(LabelField, self).__init__(preprocessing=preprocessing)
10 | self.vocab = None
11 |
12 | def build_vocab(self, *args, **kwargs):
13 | sources = []
14 | for arg in args:
15 | if isinstance(arg, torch.utils.data.Dataset):
16 | sources += [arg.get_examples(name) for name, field in arg.fields.items() if field is self]
17 | else:
18 | sources.append(arg)
19 |
20 | counter = Counter()
21 | for data in sources:
22 | for x in data:
23 | counter.update(x)
24 |
25 | self.vocab = Vocab(counter, specials=[])
26 |
27 | def process(self, example, device=None):
28 | tensor, lengths = self.numericalize(example, device=device)
29 | return tensor, lengths
30 |
31 | def numericalize(self, example, device=None):
32 | example = [self.vocab.stoi[x] + 1 for x in example]
33 | length = torch.LongTensor([len(example)], device=device).squeeze(0)
34 | tensor = torch.LongTensor(example, device=device)
35 |
36 | return tensor, length
37 |
--------------------------------------------------------------------------------
/perin/data/field/mini_torchtext/__pycache__/example.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/mini_torchtext/__pycache__/example.cpython-39.pyc
--------------------------------------------------------------------------------
/perin/data/field/mini_torchtext/__pycache__/field.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/mini_torchtext/__pycache__/field.cpython-39.pyc
--------------------------------------------------------------------------------
/perin/data/field/mini_torchtext/__pycache__/pipeline.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/mini_torchtext/__pycache__/pipeline.cpython-39.pyc
--------------------------------------------------------------------------------
/perin/data/field/mini_torchtext/__pycache__/utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/mini_torchtext/__pycache__/utils.cpython-39.pyc
--------------------------------------------------------------------------------
/perin/data/field/mini_torchtext/__pycache__/vocab.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/field/mini_torchtext/__pycache__/vocab.cpython-39.pyc
--------------------------------------------------------------------------------
/perin/data/field/mini_torchtext/pipeline.py:
--------------------------------------------------------------------------------
1 | class Pipeline(object):
2 | """Defines a pipeline for transforming sequence data.
3 |
4 | The input is assumed to be utf-8 encoded `str` (Python 3) or
5 | `unicode` (Python 2).
6 |
7 | Attributes:
8 | convert_token: The function to apply to input sequence data.
9 | pipes: The Pipelines that will be applied to input sequence
10 | data in order.
11 | """
12 |
13 | def __init__(self, convert_token=None):
14 | """Create a pipeline.
15 |
16 | Arguments:
17 | convert_token: The function to apply to input sequence data.
18 | If None, the identity function is used. Default: None
19 | """
20 | if convert_token is None:
21 | self.convert_token = Pipeline.identity
22 | elif callable(convert_token):
23 | self.convert_token = convert_token
24 | else:
25 | raise ValueError("Pipeline input convert_token {} is not None "
26 | "or callable".format(convert_token))
27 | self.pipes = [self]
28 |
29 | def __call__(self, x, *args):
30 | """Apply the the current Pipeline(s) to an input.
31 |
32 | Arguments:
33 | x: The input to process with the Pipeline(s).
34 | Positional arguments: Forwarded to the `call` function
35 | of the Pipeline(s).
36 | """
37 | for pipe in self.pipes:
38 | x = pipe.call(x, *args)
39 | return x
40 |
41 | def call(self, x, *args):
42 | """Apply _only_ the convert_token function of the current pipeline
43 | to the input. If the input is a list, a list with the results of
44 | applying the `convert_token` function to all input elements is
45 | returned.
46 |
47 | Arguments:
48 | x: The input to apply the convert_token function to.
49 | Positional arguments: Forwarded to the `convert_token` function
50 | of the current Pipeline.
51 | """
52 | if isinstance(x, list):
53 | return [self.convert_token(tok, *args) for tok in x]
54 | return self.convert_token(x, *args)
55 |
56 | def add_before(self, pipeline):
57 | """Add a Pipeline to be applied before this processing pipeline.
58 |
59 | Arguments:
60 | pipeline: The Pipeline or callable to apply before this
61 | Pipeline.
62 | """
63 | if not isinstance(pipeline, Pipeline):
64 | pipeline = Pipeline(pipeline)
65 | self.pipes = pipeline.pipes[:] + self.pipes[:]
66 | return self
67 |
68 | def add_after(self, pipeline):
69 | """Add a Pipeline to be applied after this processing pipeline.
70 |
71 | Arguments:
72 | pipeline: The Pipeline or callable to apply after this
73 | Pipeline.
74 | """
75 | if not isinstance(pipeline, Pipeline):
76 | pipeline = Pipeline(pipeline)
77 | self.pipes = self.pipes[:] + pipeline.pipes[:]
78 | return self
79 |
80 | @staticmethod
81 | def identity(x):
82 | """Return a copy of the input.
83 |
84 | This is here for serialization compatibility with pickle.
85 | """
86 | return x
87 |
--------------------------------------------------------------------------------
/perin/data/field/nested_field.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | from data.field.mini_torchtext.field import NestedField as TorchTextNestedField
6 |
7 |
8 | class NestedField(TorchTextNestedField):
9 | def pad(self, example):
10 | self.nesting_field.include_lengths = self.include_lengths
11 | if not self.include_lengths:
12 | return self.nesting_field.pad(example)
13 |
14 | sentence_length = len(example)
15 | example, word_lengths = self.nesting_field.pad(example)
16 | return example, sentence_length, word_lengths
17 |
18 | def numericalize(self, arr, device=None):
19 | numericalized = []
20 | self.nesting_field.include_lengths = False
21 | if self.include_lengths:
22 | arr, sentence_length, word_lengths = arr
23 |
24 | numericalized = self.nesting_field.numericalize(arr, device=device)
25 |
26 | self.nesting_field.include_lengths = True
27 | if self.include_lengths:
28 | sentence_length = torch.tensor(sentence_length, dtype=self.dtype, device=device)
29 | word_lengths = torch.tensor(word_lengths, dtype=self.dtype, device=device)
30 | return (numericalized, sentence_length, word_lengths)
31 | return numericalized
32 |
33 | def build_vocab(self, *args, **kwargs):
34 | sources = []
35 | for arg in args:
36 | if isinstance(arg, torch.utils.data.Dataset):
37 | sources += [arg.get_examples(name) for name, field in arg.fields.items() if field is self]
38 | else:
39 | sources.append(arg)
40 |
41 | flattened = []
42 | for source in sources:
43 | flattened.extend(source)
44 |
45 | # just build vocab and does not load vector
46 | self.nesting_field.build_vocab(*flattened, **kwargs)
47 | super(TorchTextNestedField, self).build_vocab()
48 | self.vocab.extend(self.nesting_field.vocab)
49 | self.vocab.freqs = self.nesting_field.vocab.freqs.copy()
50 | self.nesting_field.vocab = self.vocab
51 |
--------------------------------------------------------------------------------
/perin/data/parser/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/.DS_Store
--------------------------------------------------------------------------------
/perin/data/parser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/__init__.py
--------------------------------------------------------------------------------
/perin/data/parser/from_mrp/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/from_mrp/.DS_Store
--------------------------------------------------------------------------------
/perin/data/parser/from_mrp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/from_mrp/__init__.py
--------------------------------------------------------------------------------
/perin/data/parser/from_mrp/abstract_parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | from data.parser.json_parser import example_from_json
6 |
7 |
8 | class AbstractParser(torch.utils.data.Dataset):
9 | def __init__(self, fields, data, filter_pred=None):
10 | super(AbstractParser, self).__init__()
11 |
12 | self.examples = [example_from_json(d, fields) for _, d in sorted(data.items())]
13 |
14 | if isinstance(fields, dict):
15 | fields, field_dict = [], fields
16 | for field in field_dict.values():
17 | if isinstance(field, list):
18 | fields.extend(field)
19 | else:
20 | fields.append(field)
21 |
22 | if filter_pred is not None:
23 | make_list = isinstance(self.examples, list)
24 | self.examples = filter(filter_pred, self.examples)
25 | if make_list:
26 | self.examples = list(self.examples)
27 |
28 | self.fields = dict(fields)
29 |
30 | # Unpack field tuples
31 | for n, f in list(self.fields.items()):
32 | if isinstance(n, tuple):
33 | self.fields.update(zip(n, f))
34 | del self.fields[n]
35 |
36 | def __getitem__(self, i):
37 | item = self.examples[i]
38 | processed_item = {}
39 | for (name, field) in self.fields.items():
40 | if field is not None:
41 | processed_item[name] = field.process(getattr(item, name), device=None)
42 | return processed_item
43 |
44 | def __len__(self):
45 | return len(self.examples)
46 |
47 | def get_examples(self, attr):
48 | if attr in self.fields:
49 | for x in self.examples:
50 | yield getattr(x, attr)
51 |
--------------------------------------------------------------------------------
/perin/data/parser/from_mrp/evaluation_parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | from data.parser.from_mrp.abstract_parser import AbstractParser
5 | import utility.parser_utils as utils
6 |
7 |
8 | class EvaluationParser(AbstractParser):
9 | def __init__(self, args, fields):
10 | path = args.test_data
11 | self.data = utils.load_dataset(path)
12 |
13 | for sentence in self.data.values():
14 | sentence["token anchors"] = [[a["from"], a["to"]] for a in sentence["token anchors"]]
15 |
16 | utils.create_bert_tokens(self.data, args.encoder)
17 |
18 | super(EvaluationParser, self).__init__(fields, self.data)
19 |
--------------------------------------------------------------------------------
/perin/data/parser/from_mrp/labeled_edge_parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | from data.parser.from_mrp.abstract_parser import AbstractParser
5 | import utility.parser_utils as utils
6 |
7 |
8 | class LabeledEdgeParser(AbstractParser):
9 | def __init__(self, args, part: str, fields, filter_pred=None, **kwargs):
10 | assert part == "training" or part == "validation"
11 | path = args.training_data if part == "training" else args.validation_data
12 |
13 | self.data = utils.load_dataset(path)
14 | utils.anchor_ids_from_intervals(self.data)
15 |
16 | self.node_counter, self.edge_counter, self.no_edge_counter = 0, 0, 0
17 | anchor_count, n_node_token_pairs = 0, 0
18 |
19 | for sentence_id, sentence in list(self.data.items()):
20 | for edge in sentence["edges"]:
21 | if "label" not in edge:
22 | del self.data[sentence_id]
23 | break
24 |
25 | for node, sentence in utils.node_generator(self.data):
26 | node["label"] = "Node"
27 |
28 | self.node_counter += 1
29 |
30 | utils.create_bert_tokens(self.data, args.encoder)
31 |
32 | # create edge vectors
33 | for sentence in self.data.values():
34 | assert sentence["tops"] == [0], sentence
35 | N = len(sentence["nodes"])
36 |
37 | edge_count = utils.create_edges(sentence)
38 | self.edge_counter += edge_count
39 | self.no_edge_counter += N * (N - 1) - edge_count
40 |
41 | sentence["nodes"] = sentence["nodes"][1:]
42 | N = len(sentence["nodes"])
43 |
44 | sentence["anchor edges"] = [N, len(sentence["input"]), []]
45 | sentence["anchored labels"] = [len(sentence["input"]), []]
46 | for i, node in enumerate(sentence["nodes"]):
47 | anchored_labels = []
48 |
49 | for anchor in node["anchors"]:
50 | sentence["anchor edges"][-1].append((i, anchor))
51 | anchored_labels.append((anchor, node["label"]))
52 |
53 | sentence["anchored labels"][1].append(anchored_labels)
54 |
55 | anchor_count += len(node["anchors"])
56 | n_node_token_pairs += len(sentence["input"])
57 |
58 | sentence["id"] = [sentence["id"]]
59 |
60 | self.anchor_freq = anchor_count / n_node_token_pairs
61 | self.input_count = sum(len(sentence["input"]) for sentence in self.data.values())
62 |
63 | super(LabeledEdgeParser, self).__init__(fields, self.data, filter_pred)
64 |
65 | @staticmethod
66 | def node_similarity_key(node):
67 | return tuple([node["label"]] + node["anchors"])
68 |
--------------------------------------------------------------------------------
/perin/data/parser/from_mrp/request_parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import utility.parser_utils as utils
5 | from data.parser.from_mrp.abstract_parser import AbstractParser
6 |
7 |
8 | class RequestParser(AbstractParser):
9 | def __init__(self, sentences, args, language: str, fields):
10 | self.data = {i: {"id": str(i), "sentence": sentence} for i, sentence in enumerate(sentences)}
11 |
12 | sentences = [example["sentence"] for example in self.data.values()]
13 |
14 | for example in zip(self.data.values()):
15 | example["input"] = example["input"].strip().split(' ')
16 | utils.create_token_anchors(example)
17 |
18 | for example in self.data.values():
19 | example["token anchors"] = [[a["from"], a["to"]] for a in example["token anchors"]]
20 |
21 | utils.create_bert_tokens(self.data, args.encoder)
22 |
23 | super(RequestParser, self).__init__(fields, self.data)
24 |
--------------------------------------------------------------------------------
/perin/data/parser/json_parser.py:
--------------------------------------------------------------------------------
1 | from functools import reduce
2 | from data.field.mini_torchtext.example import Example
3 |
4 |
5 | def example_from_json(obj, fields):
6 | ex = Example()
7 | for key, vals in fields.items():
8 | if vals is not None:
9 | if not isinstance(vals, list):
10 | vals = [vals]
11 | for val in vals:
12 | # for processing the key likes 'foo.bar'
13 | name, field = val
14 | ks = key.split(".")
15 |
16 | def reducer(obj, key):
17 | if isinstance(obj, list):
18 | results = []
19 | for data in obj:
20 | if key not in data:
21 | # key error
22 | raise ValueError("Specified key {} was not found in " "the input data".format(key))
23 | else:
24 | results.append(data[key])
25 | return results
26 | else:
27 | # key error
28 | if key not in obj:
29 | raise ValueError("Specified key {} was not found in " "the input data".format(key))
30 | else:
31 | return obj[key]
32 |
33 | v = reduce(reducer, ks, obj)
34 | setattr(ex, name, field.preprocess(v))
35 | return ex
36 |
--------------------------------------------------------------------------------
/perin/data/parser/to_mrp/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/to_mrp/.DS_Store
--------------------------------------------------------------------------------
/perin/data/parser/to_mrp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/data/parser/to_mrp/__init__.py
--------------------------------------------------------------------------------
/perin/data/parser/to_mrp/abstract_parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | class AbstractParser:
5 | def __init__(self, dataset):
6 | self.dataset = dataset
7 |
8 | def create_nodes(self, prediction):
9 | return [
10 | {"id": i, "label": self.label_to_str(l, prediction["anchors"][i], prediction)}
11 | for i, l in enumerate(prediction["labels"])
12 | ]
13 |
14 | def label_to_str(self, label, anchors, prediction):
15 | return self.dataset.label_field.vocab.itos[label - 1]
16 |
17 | def create_edges(self, prediction, nodes):
18 | N = len(nodes)
19 | node_sets = [{"id": n, "set": set([n])} for n in range(N)]
20 | _, indices = prediction["edge presence"][:N, :N].reshape(-1).sort(descending=True)
21 | sources, targets = indices // N, indices % N
22 |
23 | edges = []
24 | for i in range((N - 1) * N // 2):
25 | source, target = sources[i].item(), targets[i].item()
26 | p = prediction["edge presence"][source, target]
27 |
28 | if p < 0.5 and len(edges) >= N - 1:
29 | break
30 |
31 | if node_sets[source]["set"] is node_sets[target]["set"] and p < 0.5:
32 | continue
33 |
34 | self.create_edge(source, target, prediction, edges, nodes)
35 |
36 | if node_sets[source]["set"] is not node_sets[target]["set"]:
37 | from_set = node_sets[source]["set"]
38 | for n in node_sets[target]["set"]:
39 | from_set.add(n)
40 | node_sets[n]["set"] = from_set
41 |
42 | return edges
43 |
44 | def create_edge(self, source, target, prediction, edges, nodes):
45 | label = self.get_edge_label(prediction, source, target)
46 | edge = {"source": source, "target": target, "label": label}
47 |
48 | edges.append(edge)
49 |
50 | def create_anchors(self, prediction, nodes, join_contiguous=True, at_least_one=False, single_anchor=False, mode="anchors"):
51 | for i, node in enumerate(nodes):
52 | threshold = 0.5 if not at_least_one else min(0.5, prediction[mode][i].max().item())
53 | node[mode] = (prediction[mode][i] >= threshold).nonzero(as_tuple=False).squeeze(-1)
54 | node[mode] = prediction["token intervals"][node[mode], :]
55 |
56 | if single_anchor and len(node[mode]) > 1:
57 | start = min(a[0].item() for a in node[mode])
58 | end = max(a[1].item() for a in node[mode])
59 | node[mode] = [{"from": start, "to": end}]
60 | continue
61 |
62 | node[mode] = [{"from": f.item(), "to": t.item()} for f, t in node[mode]]
63 | node[mode] = sorted(node[mode], key=lambda a: a["from"])
64 |
65 | if join_contiguous and len(node[mode]) > 1:
66 | cleaned_anchors = []
67 | end, start = node[mode][0]["from"], node[mode][0]["from"]
68 | for anchor in node[mode]:
69 | if end < anchor["from"]:
70 | cleaned_anchors.append({"from": start, "to": end})
71 | start = anchor["from"]
72 | end = anchor["to"]
73 | cleaned_anchors.append({"from": start, "to": end})
74 |
75 | node[mode] = cleaned_anchors
76 |
77 | return nodes
78 |
79 | def get_edge_label(self, prediction, source, target):
80 | return self.dataset.edge_label_field.vocab.itos[prediction["edge labels"][source, target].item()]
81 |
--------------------------------------------------------------------------------
/perin/inference.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import argparse
5 | import torch
6 | import os
7 |
8 | from model.model import Model
9 | from data.dataset import Dataset
10 | from utility.initialize import initialize
11 | from config.params import Params
12 | from utility.predict import predict
13 |
14 |
15 |
16 | if __name__ == "__main__":
17 | parser = argparse.ArgumentParser()
18 | parser.add_argument("--checkpoint_dir", type=str, required=True)
19 | parser.add_argument("--data_directory", type=str, default="../dataset")
20 | args = parser.parse_args()
21 |
22 | checkpoint_dir = args.checkpoint_dir
23 |
24 | checkpoint = torch.load(f"{checkpoint_dir}/best_checkpoint.h5", map_location=torch.device('cpu'))
25 | args = Params().load_state_dict(checkpoint["args"]).init_data_paths()
26 | args.log_wandb = False
27 |
28 |
29 |
30 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
31 |
32 | dataset = Dataset(args, verbose=False)
33 |
34 | model = Model(dataset, args).to(device)
35 | model.load_state_dict(checkpoint["model"])
36 |
37 | os.makedirs(f"{checkpoint_dir}/inference", exist_ok=True)
38 |
39 | print("inference of test data", flush=True)
40 |
41 | predict(model, dataset.test, args.test_data, args.raw_testing_data, args, None, f"{checkpoint_dir}/inference", device, mode="test")
42 |
--------------------------------------------------------------------------------
/perin/model/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/.DS_Store
--------------------------------------------------------------------------------
/perin/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/__init__.py
--------------------------------------------------------------------------------
/perin/model/head/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/head/.DS_Store
--------------------------------------------------------------------------------
/perin/model/head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/head/__init__.py
--------------------------------------------------------------------------------
/perin/model/head/labeled_edge_head.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | from model.head.abstract_head import AbstractHead
8 | from data.parser.to_mrp.labeled_edge_parser import LabeledEdgeParser
9 | from utility.cross_entropy import binary_cross_entropy
10 | from utility.hungarian_matching import match_label
11 |
12 |
13 | class LabeledEdgeHead(AbstractHead):
14 | def __init__(self, dataset, args, initialize):
15 | config = {
16 | "label": True,
17 | "edge presence": True,
18 | "edge label": True,
19 | "anchor": True
20 | }
21 | super(LabeledEdgeHead, self).__init__(dataset, args, config, initialize)
22 |
23 | self.top_node = nn.Parameter(torch.randn(1, 1, args.hidden_size), requires_grad=True)
24 | self.parser = LabeledEdgeParser(dataset)
25 |
26 | def init_label_classifier(self, dataset, args, config, initialize: bool):
27 | classifier = nn.Sequential(
28 | nn.Dropout(args.dropout_label),
29 | nn.Linear(args.hidden_size, 1, bias=True)
30 | )
31 | if initialize:
32 | bias_init = torch.tensor([dataset.label_freqs[1]])
33 | classifier[1].bias.data = (bias_init / (1.0 - bias_init)).log()
34 |
35 | return classifier
36 |
37 | def forward_label(self, decoder_output):
38 | return self.label_classifier(decoder_output)
39 |
40 | def forward_edge(self, decoder_output):
41 | top_node = self.top_node.expand(decoder_output.size(0), -1, -1)
42 | decoder_output = torch.cat([top_node, decoder_output], dim=1)
43 | return self.edge_classifier(decoder_output)
44 |
45 | def loss_label(self, prediction, target, mask, matching):
46 | prediction = prediction["label"]
47 | target = match_label(
48 | target["labels"][0], matching, prediction.shape[:-1], prediction.device, self.query_length
49 | )
50 | return {"label": binary_cross_entropy(prediction.squeeze(-1), target.float(), mask, focal=self.focal)}
51 |
52 | def inference_label(self, prediction):
53 | return (prediction.squeeze(-1) > 0.0).long()
54 |
55 | def label_cost_matrix(self, output, batch, decoder_lens, b: int):
56 | if output["label"] is None:
57 | return 1.0
58 |
59 | target_labels = batch["anchored_labels"][b] # shape: (num_nodes, num_inputs, 2)
60 | label_prob = output["label"][b, : decoder_lens[b], :].sigmoid().unsqueeze(0) # shape: (1, num_queries, 1)
61 | label_prob = torch.cat([1.0 - label_prob, label_prob], dim=-1) # shape: (1, num_queries, 2)
62 | tgt_label = target_labels.repeat_interleave(self.query_length, dim=1) # shape: (num_nodes, num_queries, 2)
63 | cost_matrix = ((tgt_label * label_prob).sum(-1) * label_prob[:, :, 1:].sum(-1)).t().sqrt() # shape: (num_queries, num_nodes)
64 |
65 | return cost_matrix
66 |
--------------------------------------------------------------------------------
/perin/model/model.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | from model.module.encoder import Encoder
8 |
9 | from model.module.transformer import Decoder
10 | from model.head.labeled_edge_head import LabeledEdgeHead
11 | from utility.utils import create_padding_mask
12 |
13 |
14 | class Model(nn.Module):
15 | def __init__(self, dataset, args, initialize=True):
16 | super(Model, self).__init__()
17 | self.encoder = Encoder(args, dataset)
18 | if args.n_layers > 0:
19 | self.decoder = Decoder(args)
20 | else:
21 | self.decoder = lambda x, *args: x # identity function, which ignores all arguments except the first one
22 |
23 | if args.graph_mode == "labeled-edge":
24 | self.head = LabeledEdgeHead(dataset, args, initialize)
25 | self.query_length = args.query_length
26 | self.dataset = dataset
27 | self.args = args
28 |
29 | def forward(self, batch, inference=False, **kwargs):
30 | every_input, word_lens = batch["every_input"]
31 | decoder_lens = self.query_length * word_lens
32 | batch_size, input_len = every_input.size(0), every_input.size(1)
33 | device = every_input.device
34 |
35 | encoder_mask = create_padding_mask(batch_size, input_len, word_lens, device)
36 | decoder_mask = create_padding_mask(batch_size, self.query_length * input_len, decoder_lens, device)
37 |
38 | encoder_output, decoder_input = self.encoder(batch["input"], batch["char_form_input"], batch["input_scatter"], input_len)
39 |
40 | decoder_output = self.decoder(decoder_input, encoder_output, decoder_mask, encoder_mask)
41 |
42 | if inference:
43 | return self.head.predict(encoder_output, decoder_output, encoder_mask, decoder_mask, batch)
44 | else:
45 | return self.head(encoder_output, decoder_output, encoder_mask, decoder_mask, batch)
46 |
47 | def get_params_for_optimizer(self, args):
48 | encoder_decay, encoder_no_decay = self.get_encoder_parameters(args.n_encoder_layers)
49 | decoder_decay, decoder_no_decay = self.get_decoder_parameters()
50 |
51 | parameters = [{"params": p, "weight_decay": args.encoder_weight_decay} for p in encoder_decay]
52 | parameters += [{"params": p, "weight_decay": 0.0} for p in encoder_no_decay]
53 | parameters += [
54 | {"params": decoder_decay, "weight_decay": args.decoder_weight_decay},
55 | {"params": decoder_no_decay, "weight_decay": 0.0},
56 | ]
57 | return parameters
58 |
59 | def get_decoder_parameters(self):
60 | no_decay = ["bias", "LayerNorm.weight", "_norm.weight"]
61 | decay_params = (p for name, p in self.named_parameters() if not any(nd in name for nd in no_decay) and not name.startswith("encoder.bert") and p.requires_grad)
62 | no_decay_params = (p for name, p in self.named_parameters() if any(nd in name for nd in no_decay) and not name.startswith("encoder.bert") and p.requires_grad)
63 |
64 | return decay_params, no_decay_params
65 |
66 | def get_encoder_parameters(self, n_layers):
67 | no_decay = ["bias", "LayerNorm.weight", "_norm.weight"]
68 | decay_params = [
69 | [p for name, p in self.named_parameters() if not any(nd in name for nd in no_decay) and name.startswith(f"encoder.bert.encoder.layer.{n_layers - 1 - i}.") and p.requires_grad] for i in range(n_layers)
70 | ]
71 | no_decay_params = [
72 | [p for name, p in self.named_parameters() if any(nd in name for nd in no_decay) and name.startswith(f"encoder.bert.encoder.layer.{n_layers - 1 - i}.") and p.requires_grad] for i in range(n_layers)
73 | ]
74 |
75 | return decay_params, no_decay_params
76 |
--------------------------------------------------------------------------------
/perin/model/module/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/module/.DS_Store
--------------------------------------------------------------------------------
/perin/model/module/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/model/module/__init__.py
--------------------------------------------------------------------------------
/perin/model/module/anchor_classifier.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from model.module.biaffine import Biaffine
8 |
9 |
10 | class AnchorClassifier(nn.Module):
11 | def __init__(self, dataset, args, initialize: bool, bias=True, mode="anchor"):
12 | super(AnchorClassifier, self).__init__()
13 |
14 | self.token_f = nn.Linear(args.hidden_size, args.hidden_size_anchor)
15 | self.label_f = nn.Linear(args.hidden_size, args.hidden_size_anchor)
16 | self.dropout = nn.Dropout(args.dropout_anchor)
17 |
18 | if bias and initialize:
19 | bias_init = torch.tensor([getattr(dataset, f"{mode}_freq")])
20 | bias_init = (bias_init / (1.0 - bias_init)).log()
21 | else:
22 | bias_init = None
23 |
24 | self.output = Biaffine(args.hidden_size_anchor, 1, bias=bias, bias_init=bias_init)
25 |
26 | def forward(self, label, tokens, encoder_mask):
27 | tokens = self.dropout(F.elu(self.token_f(tokens))) # shape: (B, T_w, H)
28 | label = self.dropout(F.elu(self.label_f(label))) # shape: (B, T_l, H)
29 | anchor = self.output(label, tokens).squeeze(-1) # shape: (B, T_l, T_w)
30 |
31 | anchor = anchor.masked_fill(encoder_mask.unsqueeze(1), float("-inf")) # shape: (B, T_l, T_w)
32 | return anchor
33 |
--------------------------------------------------------------------------------
/perin/model/module/biaffine.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch.nn as nn
5 | from model.module.bilinear import Bilinear
6 |
7 |
8 | class Biaffine(nn.Module):
9 | def __init__(self, input_dim, output_dim, bias=True, bias_init=None):
10 | super(Biaffine, self).__init__()
11 |
12 | self.linear_1 = nn.Linear(input_dim, output_dim, bias=False)
13 | self.linear_2 = nn.Linear(input_dim, output_dim, bias=False)
14 |
15 | self.bilinear = Bilinear(input_dim, input_dim, output_dim, bias=bias)
16 | if bias_init is not None:
17 | self.bilinear.bias.data = bias_init
18 |
19 | def forward(self, x, y):
20 | return self.bilinear(x, y) + self.linear_1(x).unsqueeze(2) + self.linear_2(y).unsqueeze(1)
21 |
--------------------------------------------------------------------------------
/perin/model/module/bilinear.py:
--------------------------------------------------------------------------------
1 | # from https://github.com/NLPInBLCU/BiaffineDependencyParsing/blob/master/modules/biaffine.py
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 |
7 | class Bilinear(nn.Module):
8 | """
9 | 使用版本
10 | A bilinear module that deals with broadcasting for efficient memory usage.
11 | Input: tensors of sizes (N x L1 x D1) and (N x L2 x D2)
12 | Output: tensor of size (N x L1 x L2 x O)"""
13 |
14 | def __init__(self, input1_size, input2_size, output_size, bias=True):
15 | super(Bilinear, self).__init__()
16 |
17 | self.input1_size = input1_size
18 | self.input2_size = input2_size
19 | self.output_size = output_size
20 |
21 | self.weight = nn.Parameter(torch.Tensor(input1_size, input2_size, output_size))
22 | self.bias = nn.Parameter(torch.Tensor(output_size)) if bias else None
23 |
24 | self.reset_parameters()
25 |
26 | def reset_parameters(self):
27 | nn.init.zeros_(self.weight)
28 |
29 | def forward(self, input1, input2):
30 | input1_size = list(input1.size())
31 | input2_size = list(input2.size())
32 |
33 | intermediate = torch.mm(input1.view(-1, input1_size[-1]), self.weight.view(-1, self.input2_size * self.output_size),)
34 |
35 | input2 = input2.transpose(1, 2)
36 | output = intermediate.view(input1_size[0], input1_size[1] * self.output_size, input2_size[2]).bmm(input2)
37 |
38 | output = output.view(input1_size[0], input1_size[1], self.output_size, input2_size[1]).transpose(2, 3)
39 |
40 | if self.bias is not None:
41 | output = output + self.bias
42 |
43 | return output
44 |
--------------------------------------------------------------------------------
/perin/model/module/char_embedding.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_packed_sequence
7 |
8 |
9 | class CharEmbedding(nn.Module):
10 | def __init__(self, vocab_size: int, embedding_size: int, output_size: int):
11 | super(CharEmbedding, self).__init__()
12 |
13 | self.embedding = nn.Embedding(vocab_size, embedding_size, sparse=False)
14 | self.layer_norm = nn.LayerNorm(embedding_size)
15 | self.gru = nn.GRU(embedding_size, embedding_size, num_layers=1, bidirectional=True)
16 | self.out_linear = nn.Linear(2*embedding_size, output_size)
17 | self.layer_norm_2 = nn.LayerNorm(output_size)
18 |
19 | def forward(self, words, sentence_lens, word_lens):
20 | # input shape: (B, W, C)
21 | n_words = words.size(1)
22 | sentence_lens = sentence_lens.cpu()
23 | sentence_packed = pack_padded_sequence(words, sentence_lens, batch_first=True) # shape: (B*W, C)
24 | lens_packed = pack_padded_sequence(word_lens, sentence_lens, batch_first=True) # shape: (B*W)
25 | word_packed = pack_padded_sequence(sentence_packed.data, lens_packed.data.cpu(), batch_first=True, enforce_sorted=False) # shape: (B*W*C)
26 |
27 | embedded = self.embedding(word_packed.data) # shape: (B*W*C, D)
28 | embedded = self.layer_norm(embedded) # shape: (B*W*C, D)
29 |
30 | embedded_packed = PackedSequence(embedded, word_packed[1], word_packed[2], word_packed[3])
31 | _, embedded = self.gru(embedded_packed) # shape: (layers * 2, B*W, D)
32 |
33 | embedded = embedded[-2:, :, :].transpose(0, 1).flatten(1, 2) # shape: (B*W, 2*D)
34 | embedded = F.relu(embedded)
35 | embedded = self.out_linear(embedded)
36 | embedded = self.layer_norm_2(embedded)
37 |
38 | embedded, _ = pad_packed_sequence(
39 | PackedSequence(embedded, sentence_packed[1], sentence_packed[2], sentence_packed[3]), batch_first=True, total_length=n_words,
40 | ) # shape: (B, W, 2*D)
41 |
42 | return embedded # shape: (B, W, 2*D)
43 |
--------------------------------------------------------------------------------
/perin/model/module/edge_classifier.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from model.module.biaffine import Biaffine
8 |
9 |
10 | class EdgeClassifier(nn.Module):
11 | def __init__(self, dataset, args, initialize: bool, presence: bool, label: bool):
12 | super(EdgeClassifier, self).__init__()
13 |
14 | self.presence = presence
15 | if self.presence:
16 | if initialize:
17 | presence_init = torch.tensor([dataset.edge_presence_freq])
18 | presence_init = (presence_init / (1.0 - presence_init)).log()
19 | else:
20 | presence_init = None
21 |
22 | self.edge_presence = EdgeBiaffine(
23 | args.hidden_size, args.hidden_size_edge_presence, 1, args.dropout_edge_presence, bias_init=presence_init
24 | )
25 |
26 | self.label = label
27 | if self.label:
28 | label_init = (dataset.edge_label_freqs / (1.0 - dataset.edge_label_freqs)).log() if initialize else None
29 | n_labels = len(dataset.edge_label_field.vocab)
30 | self.edge_label = EdgeBiaffine(
31 | args.hidden_size, args.hidden_size_edge_label, n_labels, args.dropout_edge_label, bias_init=label_init
32 | )
33 |
34 | def forward(self, x):
35 | presence, label = None, None
36 |
37 | if self.presence:
38 | presence = self.edge_presence(x).squeeze(-1) # shape: (B, T, T)
39 | if self.label:
40 | label = self.edge_label(x) # shape: (B, T, T, O_1)
41 |
42 | return presence, label
43 |
44 |
45 | class EdgeBiaffine(nn.Module):
46 | def __init__(self, hidden_dim, bottleneck_dim, output_dim, dropout, bias_init=None):
47 | super(EdgeBiaffine, self).__init__()
48 | self.hidden = nn.Linear(hidden_dim, 2 * bottleneck_dim)
49 | self.output = Biaffine(bottleneck_dim, output_dim, bias_init=bias_init)
50 | self.dropout = nn.Dropout(dropout)
51 |
52 | def forward(self, x):
53 | x = self.dropout(F.elu(self.hidden(x))) # shape: (B, T, 2H)
54 | predecessors, current = x.chunk(2, dim=-1) # shape: (B, T, H), (B, T, H)
55 | edge = self.output(current, predecessors) # shape: (B, T, T, O)
56 | return edge
57 |
--------------------------------------------------------------------------------
/perin/model/module/transformer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 |
8 | def checkpoint(module, *args, **kwargs):
9 | dummy = torch.empty(1, requires_grad=True)
10 | return torch.utils.checkpoint.checkpoint(lambda d, *a, **k: module(*a, **k), dummy, *args, **kwargs)
11 |
12 |
13 | class Attention(nn.Module):
14 | def __init__(self, args):
15 | super().__init__()
16 | self.attention = nn.MultiheadAttention(args.hidden_size, args.n_attention_heads, args.dropout_transformer_attention)
17 | self.dropout = nn.Dropout(args.dropout_transformer)
18 |
19 | def forward(self, q_input, kv_input, mask=None):
20 | output, _ = self.attention(q_input, kv_input, kv_input, mask, need_weights=False)
21 | output = self.dropout(output)
22 | return output
23 |
24 |
25 | class FeedForward(nn.Module):
26 | def __init__(self, args):
27 | super().__init__()
28 | self.f = nn.Sequential(
29 | nn.Linear(args.hidden_size, args.hidden_size_ff),
30 | self._get_activation_f(args.activation),
31 | nn.Dropout(args.dropout_transformer),
32 | nn.Linear(args.hidden_size_ff, args.hidden_size),
33 | nn.Dropout(args.dropout_transformer),
34 | )
35 |
36 | def forward(self, x):
37 | return self.f(x)
38 |
39 | def _get_activation_f(self, activation: str):
40 | return {"relu": nn.ReLU, "gelu": nn.GELU}[activation]()
41 |
42 |
43 | class DecoderLayer(nn.Module):
44 | def __init__(self, args):
45 | super().__init__()
46 | self.self_f = Attention(args)
47 | #self.cross_f = Attention(args)
48 | self.feedforward_f = FeedForward(args)
49 |
50 | self.pre_self_norm = nn.LayerNorm(args.hidden_size) if args.pre_norm else nn.Identity()
51 | #self.pre_cross_norm = nn.LayerNorm(args.hidden_size) if args.pre_norm else nn.Identity()
52 | self.pre_feedforward_norm = nn.LayerNorm(args.hidden_size) if args.pre_norm else nn.Identity()
53 | self.post_self_norm = nn.Identity() if args.pre_norm else nn.LayerNorm(args.hidden_size)
54 | #self.post_cross_norm = nn.Identity() if args.pre_norm else nn.LayerNorm(args.hidden_size)
55 | self.post_feedforward_norm = nn.Identity() if args.pre_norm else nn.LayerNorm(args.hidden_size)
56 |
57 | def forward(self, x, encoder_output, x_mask, encoder_mask):
58 | x_ = self.pre_self_norm(x)
59 | x = self.post_self_norm(x + self.self_f(x_, x_, x_mask))
60 |
61 | #x_ = self.pre_cross_norm(x)
62 | #x = self.post_cross_norm(x + self.cross_f(x_, encoder_output, encoder_mask))
63 |
64 | x_ = self.pre_feedforward_norm(x)
65 | x = self.post_feedforward_norm(x + self.feedforward_f(x_))
66 |
67 | return x
68 |
69 |
70 | class Decoder(nn.Module):
71 | def __init__(self, args):
72 | super(Decoder, self).__init__()
73 | self.layers = nn.ModuleList([DecoderLayer(args) for _ in range(args.n_layers)])
74 |
75 | def forward(self, target, encoder, target_mask, encoder_mask):
76 | target = target.transpose(0, 1) # shape: (T, B, D)
77 | encoder = encoder.transpose(0, 1) # shape: (T, B, D)
78 |
79 | for layer in self.layers[:-1]:
80 | target = checkpoint(layer, target, encoder, target_mask, encoder_mask)
81 | target = self.layers[-1](target, encoder, target_mask, encoder_mask) # don't checkpoint due to grad_norm
82 | target = target.transpose(0, 1) # shape: (B, T, D)
83 |
84 | return target
85 |
--------------------------------------------------------------------------------
/perin/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #SBATCH --job-name=ACE
4 | #SBATCH --account=ec30
5 | #SBATCH --time=02-00:00:00
6 | #SBATCH --nodes=1
7 | #SBATCH --ntasks=1
8 | #SBATCH --cpus-per-task=2
9 | #SBATCH --ntasks-per-node=1
10 | #SBATCH --mem-per-cpu=8G
11 | #SBATCH --partition=accel
12 | #SBATCH --gpus=1
13 |
14 |
15 | # sanity: exit on all errors and disallow unset environment variables
16 | set -o errexit
17 | set -o nounset
18 |
19 | # the important bit: unload all current modules (just in case) and load only the necessary ones
20 |
21 | module purge
22 |
23 | module use -a /fp/projects01/ec30/software/easybuild/modules/all/
24 | module load nlpl-pytorch/1.7.1-foss-2019b-cuda-11.1.1-Python-3.7.4
25 | module load nlpl-transformers/4.14.1-foss-2019b-Python-3.7.4
26 | module load nlpl-nlptools/2021.01-foss-2019b-Python-3.7.4
27 | module load nlpl-scipy-ecosystem/2021.01-foss-2019b-Python-3.7.4
28 | module load sentencepiece/0.1.96-foss-2019b-Python-3.7.4
29 | module load nlpl-nltk/3.5-foss-2019b-Python-3.7.4
30 | module load nlpl-wandb/0.12.6-foss-2019b-Python-3.7.4
31 |
32 |
33 | #nvidia-smi --query-gpu=timestamp,utilization.gpu,utilization.memory \
34 | # --format=csv --loop=1 > "gpu_util-$SLURM_JOB_ID.csv" &
35 | #NVIDIA_MONITOR_PID=$! # Capture PID of monitoring process
36 |
37 | TRANSFORMERS_OFFLINE=1 python3 train.py --log_wandb --config "$1" --name "$2"
38 |
--------------------------------------------------------------------------------
/perin/run_infer.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #SBATCH --job-name=ACE_EVAL
4 | #SBATCH --account=ec30
5 | #SBATCH --time=02-00:00:00
6 | #SBATCH --nodes=1
7 | #SBATCH --ntasks=1
8 | #SBATCH --cpus-per-task=2
9 | #SBATCH --ntasks-per-node=1
10 | #SBATCH --mem-per-cpu=8G
11 | #SBATCH --partition=accel
12 | #SBATCH --gpus=1
13 |
14 |
15 | # sanity: exit on all errors and disallow unset environment variables
16 | set -o errexit
17 | set -o nounset
18 |
19 | # the important bit: unload all current modules (just in case) and load only the necessary ones
20 |
21 | module purge
22 |
23 | module use -a /fp/projects01/ec30/software/easybuild/modules/all/
24 | module load nlpl-pytorch/1.7.1-foss-2019b-cuda-11.1.1-Python-3.7.4
25 | module load nlpl-transformers/4.14.1-foss-2019b-Python-3.7.4
26 | module load nlpl-nlptools/2021.01-foss-2019b-Python-3.7.4
27 | module load nlpl-scipy-ecosystem/2021.01-foss-2019b-Python-3.7.4
28 | module load sentencepiece/0.1.96-foss-2019b-Python-3.7.4
29 | module load nlpl-nltk/3.5-foss-2019b-Python-3.7.4
30 | module load nlpl-wandb/0.12.6-foss-2019b-Python-3.7.4
31 |
32 | python3 inference.py --checkpoint_dir "$1"
--------------------------------------------------------------------------------
/perin/utility/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/utility/.DS_Store
--------------------------------------------------------------------------------
/perin/utility/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/utility/__init__.py
--------------------------------------------------------------------------------
/perin/utility/autoclip.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 |
8 | class AutoClip:
9 | def __init__(self, parameters, initial_clipping=0.1, percentile=50, history_len=1000):
10 | self.parameters = list(parameters)
11 | self.grad_history = [torch.full([history_len], initial_clipping) for _ in self.parameters]
12 |
13 | self.index = 0
14 | self.history_len = history_len
15 | self.percentile = percentile
16 |
17 | @torch.no_grad()
18 | def __call__(self):
19 | self._add_to_history(self.parameters)
20 |
21 | grad_norms = []
22 | for parameter, history in zip(self.parameters, self.grad_history):
23 | if parameter.grad is None or not parameter.grad.abs().sum().is_nonzero():
24 | continue
25 |
26 | clip_value = self._get_percentile(history, self.percentile)
27 | grad_norms.append(nn.utils.clip_grad_norm_(parameter, clip_value).item())
28 |
29 | return sum(grad_norms) / len(grad_norms)
30 |
31 | def _add_to_history(self, parameters):
32 | for i, param in enumerate(parameters):
33 | if param.grad is None or not param.grad.abs().sum().is_nonzero():
34 | continue
35 |
36 | self.grad_history[i][self.index] = param.grad.data.norm(2)
37 |
38 | self.index = (self.index + 1) % self.history_len
39 |
40 | def _get_percentile(self, tensor, percentile):
41 | k = 1 + round(0.01 * percentile * (tensor.numel() - 1))
42 | return tensor.kthvalue(k).values.item()
43 |
--------------------------------------------------------------------------------
/perin/utility/cross_entropy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | import torch.nn.functional as F
6 |
7 |
8 | def masked_sum(loss, mask, label_weight=1, eps=1e-8, reduction=True):
9 | if mask is not None:
10 | loss = loss.masked_fill(mask, 0.0)
11 | if reduction:
12 | return loss.sum() / (((1 - mask.long()) * label_weight).sum() + eps)
13 |
14 | if reduction:
15 | return loss.mean()
16 |
17 | return loss
18 |
19 |
20 | def cross_entropy(log_prob, target, mask, focal=False, label_weight=None, reduction=True):
21 | target = target.unsqueeze(-1)
22 | if focal:
23 | focal_coeff = log_prob.exp().gather(-1, target).squeeze(-1)
24 | focal_coeff = (1.0 - focal_coeff) ** 2
25 | else:
26 | focal_coeff = 1.0
27 |
28 | loss = -focal_coeff * log_prob.gather(-1, target).squeeze(-1)
29 |
30 | if label_weight is not None:
31 | loss = loss * label_weight
32 | return masked_sum(loss, mask, label_weight=label_weight, reduction=reduction)
33 | else:
34 | return masked_sum(loss, mask, reduction=reduction)
35 |
36 |
37 | def binary_cross_entropy(logits, target, mask, focal=False, reduction=True):
38 | if focal:
39 | prob = logits.sigmoid()
40 | focal_coeff = target * prob + (1.0 - target) * (1.0 - prob)
41 | focal_coeff = (1.0 - focal_coeff) ** 2
42 | else:
43 | focal_coeff = 1.0
44 |
45 | loss = focal_coeff * F.binary_cross_entropy_with_logits(logits, target, reduction="none")
46 | return masked_sum(loss, mask, reduction=reduction)
47 |
--------------------------------------------------------------------------------
/perin/utility/hungarian_matching.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | from scipy.optimize import linear_sum_assignment
6 |
7 |
8 | @torch.no_grad()
9 | def match_label(target, matching, shape, device, compute_mask=True):
10 | idx = _get_src_permutation_idx(matching)
11 |
12 | target_classes = torch.zeros(shape, dtype=torch.long, device=device)
13 | target_classes[idx] = torch.cat([t[J] for t, (_, J) in zip(target, matching)])
14 |
15 | return target_classes
16 |
17 |
18 | @torch.no_grad()
19 | def match_anchor(anchor, matching, shape, device):
20 | target, _ = anchor
21 |
22 | idx = _get_src_permutation_idx(matching)
23 | target_classes = torch.zeros(shape, dtype=torch.long, device=device)
24 | target_classes[idx] = torch.cat([t[J, :] for t, (_, J) in zip(target, matching)])
25 |
26 | matched_mask = torch.ones(shape[:2], dtype=torch.bool, device=device)
27 | matched_mask[idx] = False
28 |
29 | return target_classes, matched_mask
30 |
31 |
32 | def _get_src_permutation_idx(indices):
33 | # permute predictions following indices
34 | batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
35 | src_idx = torch.cat([src for (src, _) in indices])
36 | return batch_idx, src_idx
37 |
38 |
39 | @torch.no_grad()
40 | def get_matching(cost_matrices):
41 | output = []
42 | for cost_matrix in cost_matrices:
43 | indices = linear_sum_assignment(cost_matrix, maximize=True)
44 | indices = (torch.tensor(indices[0], dtype=torch.long), torch.tensor(indices[1], dtype=torch.long))
45 | output.append(indices)
46 |
47 | return output
48 |
49 |
50 | def sort_by_target(matchings):
51 | new_matching = []
52 | for matching in matchings:
53 | source, target = matching
54 | target, indices = target.sort()
55 | source = source[indices]
56 | new_matching.append((source, target))
57 | return new_matching
58 |
59 |
60 | def reorder(hidden, matchings, max_length):
61 | batch_size, _, hidden_dim = hidden.shape
62 | matchings = sort_by_target(matchings)
63 |
64 | result = torch.zeros(batch_size, max_length, hidden_dim, device=hidden.device)
65 | for b in range(batch_size):
66 | indices = matchings[b][0]
67 | result[b, : len(indices), :] = hidden[b, indices, :]
68 |
69 | return result
70 |
--------------------------------------------------------------------------------
/perin/utility/initialize.py:
--------------------------------------------------------------------------------
1 | import random
2 | import torch
3 | import os
4 |
5 |
6 | def seed_everything(seed_value=42):
7 | os.environ['PYTHONHASHSEED'] = str(seed_value)
8 | random.seed(seed_value)
9 | torch.manual_seed(seed_value)
10 | torch.cuda.manual_seed_all(seed_value)
11 |
12 | torch.backends.cudnn.enabled = True
13 | torch.backends.cudnn.deterministic = True
14 | torch.backends.cudnn.benchmark = False
15 |
16 |
17 | def initialize(args, init_wandb: bool):
18 | seed_everything(args.seed)
19 |
20 | if init_wandb:
21 | import wandb
22 | tags = args.framework, args.language
23 | wandb.init(name=f"{args.framework}_{args.language}_{args.graph_mode}_{args.name}", config=args, project="eGraph", tags=list(tags))
24 | print("Connection to Weights & Biases initialized.", flush=True)
25 |
--------------------------------------------------------------------------------
/perin/utility/loading_bar.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | class LoadingBar:
5 | def __init__(self, length: int = 40):
6 | self.length = length
7 | self.symbols = ["┈", "░", "▒", "▓"]
8 |
9 | def __call__(self, progress: float) -> str:
10 | p = int(progress * self.length * 4 + 0.5)
11 | d, r = p // 4, p % 4
12 | return "┠┈" + d * "█" + ((self.symbols[r]) + max(0, self.length - 1 - d) * "┈" if p < self.length * 4 else "") + "┈┨"
13 |
--------------------------------------------------------------------------------
/perin/utility/parser_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import json
5 | from itertools import chain
6 | from transformers import AutoTokenizer
7 |
8 | from utility.subtokenize import subtokenize
9 |
10 | import os
11 | os.environ["TOKENIZERS_PARALLELISM"] = "true"
12 |
13 |
14 | def load_dataset(path):
15 | data = {}
16 | with open(path, encoding="utf8") as f:
17 | for sentence in f.readlines():
18 | sentence = json.loads(sentence)
19 | data[sentence["id"]] = sentence
20 |
21 | if "nodes" not in sentence:
22 | sentence["nodes"] = []
23 |
24 | if "edges" not in sentence:
25 | sentence["edges"] = []
26 |
27 | for sample in list(data.values()):
28 | sample["sentence"] = sample["input"]
29 | sample["input"] = sample["sentence"].split(' ')
30 | sample["token anchors"], offset = [], 0
31 | for token in sample["input"]:
32 | sample["token anchors"].append({"from": offset, "to": offset + len(token)})
33 | offset += len(token) + 1
34 | return data
35 |
36 |
37 | def node_generator(data):
38 | for d in data.values():
39 | for n in d["nodes"]:
40 | yield n, d
41 |
42 |
43 | def anchor_ids_from_intervals(data):
44 | for node, sentence in node_generator(data):
45 | if "anchors" not in node:
46 | node["anchors"] = []
47 | node["anchors"] = sorted(node["anchors"], key=lambda a: (a["from"], a["to"]))
48 | node["token references"] = set()
49 |
50 | for anchor in node["anchors"]:
51 | for i, token_anchor in enumerate(sentence["token anchors"]):
52 | if token_anchor["to"] <= anchor["from"]:
53 | continue
54 | if token_anchor["from"] >= anchor["to"]:
55 | break
56 |
57 | node["token references"].add(i)
58 |
59 | node["anchor intervals"] = node["anchors"]
60 | node["anchors"] = sorted(list(node["token references"]))
61 | del node["token references"]
62 |
63 | for sentence in data.values():
64 | sentence["token anchors"] = [[a["from"], a["to"]] for a in sentence["token anchors"]]
65 |
66 |
67 | def create_bert_tokens(data, encoder: str):
68 | tokenizer = AutoTokenizer.from_pretrained(encoder, use_fast=True)
69 |
70 | for sentence in data.values():
71 | sentence["bert input"], sentence["to scatter"] = subtokenize(sentence["input"], tokenizer)
72 |
73 |
74 | def create_edges(sentence, label_f=None):
75 | N = len(sentence["nodes"])
76 |
77 | sentence["edge presence"] = [N, N, []]
78 | sentence["edge labels"] = [N, N, []]
79 |
80 | for e in sentence["edges"]:
81 | source, target = e["source"], e["target"]
82 | label = e["label"] if "label" in e else "none"
83 |
84 | if label_f is not None:
85 | label = label_f(label)
86 |
87 | sentence["edge presence"][-1].append((source, target, 1))
88 | sentence["edge labels"][-1].append((source, target, label))
89 |
90 | edge_counter = len(sentence["edge presence"][-1])
91 | return edge_counter
92 |
--------------------------------------------------------------------------------
/perin/utility/predict.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import torch
4 | import sys
5 |
6 | from subprocess import run
7 | from data.batch import Batch
8 |
9 | sys.path.append("../evaluation")
10 | from evaluate_single_dataset import evaluate
11 |
12 |
13 | def predict(model, data, input_path, raw_input_path, args, logger, output_directory, device, mode="validation", epoch=None):
14 | model.eval()
15 |
16 | framework, language = args.framework, args.language
17 | sentences = {}
18 | with open(input_path, encoding="utf8") as f:
19 | for line in f.readlines():
20 | line = json.loads(line)
21 | line["nodes"], line["edges"], line["tops"] = [], [], []
22 | line["framework"], line["language"] = framework, language
23 | sentences[line["id"]] = line
24 |
25 | for i, batch in enumerate(data):
26 | with torch.no_grad():
27 | predictions = model(Batch.to(batch, device), inference=True)
28 | for prediction in predictions:
29 | for key, value in prediction.items():
30 | sentences[prediction["id"]][key] = value
31 |
32 | if epoch is not None:
33 | output_path = f"{output_directory}/prediction_{mode}_{epoch}_{framework}_{language}.json"
34 | else:
35 | output_path = f"{output_directory}/prediction.json"
36 |
37 | with open(output_path, "w", encoding="utf8") as f:
38 | for sentence in sentences.values():
39 | json.dump(sentence, f, ensure_ascii=False)
40 | f.write("\n")
41 | f.flush()
42 |
43 | run(["./convert.sh", output_path])
44 |
45 | if raw_input_path:
46 | results = evaluate(raw_input_path, f"{output_path}_converted")
47 | print(mode, results, flush=True)
48 |
49 | if logger is not None:
50 | logger.log_evaluation(results, mode, epoch)
51 |
52 | return results
53 |
--------------------------------------------------------------------------------
/perin/utility/schedule/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/perin/utility/schedule/__init__.py
--------------------------------------------------------------------------------
/perin/utility/schedule/linear_lr.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import math
5 |
6 |
7 | class LinearLr:
8 | def __init__(self, param_group, learning_rate: float, total_steps: int, delay: bool, multiplier: int):
9 | self.total_steps = total_steps
10 | self.delay_steps = total_steps / 20 if delay else 0
11 | self.max_lr = learning_rate
12 | self.steps = 0
13 | self.param_group = param_group
14 | self.decay_multiplier = multiplier
15 |
16 | def __call__(self, _):
17 | self.steps += 1
18 |
19 | if self.steps < self.delay_steps:
20 | lr = 0.0
21 | elif self.steps < self.total_steps / 10:
22 | lr = self.max_lr * (self.steps - self.delay_steps) / (self.total_steps / 10 - self.delay_steps)
23 | else:
24 | max_lr = self.max_lr - self.max_lr / self.decay_multiplier
25 | min_lr = self.max_lr / self.decay_multiplier
26 | lr = max_lr * (math.cos(math.pi * (self.steps - self.total_steps / 10) / (self.total_steps * 9 / 10)) + 1) / 2 + min_lr
27 | #lr = self.max_lr * (self.total_steps - self.steps) / (self.total_steps * 9 / 10)
28 |
29 | # Safety first!
30 | if lr < 0.0:
31 | lr = 0.0
32 |
33 | self.param_group["lr"] = lr
34 |
35 | def lr(self) -> float:
36 | return self.param_group["lr"]
37 |
--------------------------------------------------------------------------------
/perin/utility/schedule/multi_scheduler.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | from utility.schedule.linear_lr import LinearLr
5 |
6 |
7 | def multi_scheduler_wrapper(optimizer, args, steps_per_epoch):
8 | n_layers = (len(optimizer.param_groups) - 2) // 2
9 |
10 | return MultiScheduler(
11 | [
12 | LinearLr(optimizer.param_groups[i], args.encoder_learning_rate * (args.layerwise_lr_decay ** i), args.epochs * steps_per_epoch, False, args.lr_decay_multiplier)
13 | for i in range(n_layers)
14 | ]
15 | +
16 | [
17 | LinearLr(optimizer.param_groups[n_layers + i], args.encoder_learning_rate * (args.layerwise_lr_decay ** i), args.epochs * steps_per_epoch, False, args.lr_decay_multiplier)
18 | for i in range(n_layers)
19 | ]
20 | +
21 | [
22 | LinearLr(optimizer.param_groups[-2], args.decoder_learning_rate, args.epochs * steps_per_epoch, False, args.lr_decay_multiplier),
23 | LinearLr(optimizer.param_groups[-1], args.decoder_learning_rate, args.epochs * steps_per_epoch, False, args.lr_decay_multiplier)
24 | ]
25 | )
26 |
27 |
28 | class MultiScheduler:
29 | def __init__(self, schedulers):
30 | self.schedulers = schedulers
31 |
32 | def __call__(self, epoch):
33 | for scheduler in self.schedulers:
34 | scheduler(epoch)
35 |
36 | def lr(self) -> float:
37 | return [scheduler.lr() for scheduler in self.schedulers]
38 |
--------------------------------------------------------------------------------
/perin/utility/utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # coding=utf-8
3 |
4 | import torch
5 | from PIL import Image
6 |
7 |
8 | def create_padding_mask(batch_size, total_length, lengths, device):
9 | mask = torch.arange(total_length, device=device).expand(batch_size, total_length)
10 | mask = mask >= lengths.unsqueeze(1) # shape: (B, T)
11 | return mask
12 |
13 |
14 | def resize_to_square(image, target_size: int, background_color="white"):
15 | width, height = image.size
16 | if width / 2 > height:
17 | result = Image.new(image.mode, (width, width // 2), background_color)
18 | result.paste(image, (0, (width // 2 - height) // 2))
19 | image = result
20 | elif height * 2 > width:
21 | result = Image.new(image.mode, (height * 2, height), background_color)
22 | result.paste(image, ((height * 2 - width) // 2, 0))
23 | image = result
24 |
25 | image = image.resize([target_size * 2, target_size], resample=Image.BICUBIC)
26 | return image
27 |
--------------------------------------------------------------------------------
/preprocess/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huiling-y/EventGraph/5b18a3048c4fd8381554f9b88462a4ba76a0c1e3/preprocess/.DS_Store
--------------------------------------------------------------------------------