├── amr_verbnet_semantics
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── models.py
    │   ├── extract_timeline.py
    │   └── spacy_nlp_parse.py
    ├── grpc_defs
    │   ├── __init__.py
    │   ├── ACEDoc_pb2_grpc.py
    │   ├── enhanced_amr_pb2_grpc.py
    │   ├── ace_pb2_grpc.py
    │   ├── enhanced_amr_pb2.py
    │   └── ace_pb2.py
    ├── service
    │   ├── __init__.py
    │   ├── abstract_kb.py
    │   ├── ulkb.py
    │   ├── corpus.py
    │   ├── verbnet.py
    │   ├── propbank.py
    │   ├── amr.py
    │   ├── sparql.py
    │   └── semlink.py
    ├── corpus_readers
    │   ├── __init__.py
    │   ├── verbnet_reader.py
    │   └── propbank_reader.py
    ├── grpc_clients
    │   ├── __init__.py
    │   └── clients.py
    ├── test
    │   ├── test_issue_13.py
    │   ├── test_local_amr_client.py
    │   ├── test_verbnet.py
    │   ├── test_ground_text_to_verbnet.py
    │   ├── test_query_semantics.py
    │   ├── test_reification.py
    │   ├── test_amr_parsing.py
    │   ├── test_visualize_dep_parse.py
    │   ├── test_visualize_sem_graph.py
    │   ├── test_propbank.py
    │   ├── test_issue_12.py
    │   ├── test_cache_amr_client.py
    │   ├── test_service.py
    │   ├── test_issue_8.py
    │   ├── test_issue_10.py
    │   ├── test_issue_11.py
    │   ├── test_issue_7.py
    │   ├── test_issue_9.py
    │   ├── test_issue_6.py
    │   ├── test_rdf_mappings.py
    │   ├── test_ground_amr.py
    │   └── test_ground_text_to_verbnet.ipynb
    ├── utils
    │   ├── format_util.py
    │   ├── eval_util.py
    │   ├── text_util.py
    │   ├── reification_util.py
    │   └── amr_util.py
    └── web_app
    │   ├── __init__.py
    │   ├── status.py
    │   ├── routes.py
    │   └── error_handlers.py
├── KG
    ├── images
    │   ├── README
    │   ├── ULKB_SUMMARY2.png
    │   ├── ulkb_access_img_pb.jpg
    │   ├── ulkb_access_img_vn.jpg
    │   └── ulkb_access_img_LClass.jpg
    ├── UL_KB_V3_PUB.ttl.zip
    ├── UL_KB_V4_PUB.ttl.zip
    ├── UL_KB_V5_PUB.ttl.zip
    ├── ingestion
    │   ├── ULKB_UnifiedMapping_V1.xlsx
    │   └── README.md
    ├── service
    │   ├── README.md
    │   ├── config_sparql.yaml
    │   └── config.py
    ├── config_sparql.yaml
    ├── README
    ├── config.py
    └── ulkb_access.py
├── setup.cfg
├── scripts
    ├── remove_snapshot.sh
    ├── create_conda_env.sh
    ├── remove_conda_env.sh
    ├── refactor_code.sh
    ├── download_semlink.sh
    ├── download_third_party.sh
    ├── download_propbank.sh
    ├── download_verbnet.sh
    └── install.sh
├── assets
    ├── blazegraph_install_1.jpg
    ├── blazegraph_install_2.jpg
    └── blazegraph_install_3.jpg
├── set_environment.sh
├── config.py
├── requirements.txt
├── README.md
├── app_config.py
├── config_template.yaml
├── setup.py
├── DESIGN.md
├── .gitignore
├── INSTALLATION.md
└── LICENSE


/amr_verbnet_semantics/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/grpc_defs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/service/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/corpus_readers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/grpc_clients/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/KG/images/README:
--------------------------------------------------------------------------------
1 | Directory for images in the Jupyter notebook
2 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | test=pytest
3 | 
4 | [flake8]
5 | max-line-length=120


--------------------------------------------------------------------------------
/scripts/remove_snapshot.sh:
--------------------------------------------------------------------------------
1 | find . -type f -name "*snapshot.pickle*" | xargs rm
2 | 


--------------------------------------------------------------------------------
/KG/UL_KB_V3_PUB.ttl.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AMR-CSLogic/HEAD/KG/UL_KB_V3_PUB.ttl.zip


--------------------------------------------------------------------------------
/KG/UL_KB_V4_PUB.ttl.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AMR-CSLogic/HEAD/KG/UL_KB_V4_PUB.ttl.zip


--------------------------------------------------------------------------------
/KG/UL_KB_V5_PUB.ttl.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AMR-CSLogic/HEAD/KG/UL_KB_V5_PUB.ttl.zip


--------------------------------------------------------------------------------
/scripts/create_conda_env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | conda create -n amr-verbnet -y python=3.7


--------------------------------------------------------------------------------
/KG/images/ULKB_SUMMARY2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AMR-CSLogic/HEAD/KG/images/ULKB_SUMMARY2.png


--------------------------------------------------------------------------------
/scripts/remove_conda_env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | conda remove --name amr-verbnet --all -y
5 | 


--------------------------------------------------------------------------------
/assets/blazegraph_install_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AMR-CSLogic/HEAD/assets/blazegraph_install_1.jpg


--------------------------------------------------------------------------------
/assets/blazegraph_install_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AMR-CSLogic/HEAD/assets/blazegraph_install_2.jpg


--------------------------------------------------------------------------------
/assets/blazegraph_install_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AMR-CSLogic/HEAD/assets/blazegraph_install_3.jpg


--------------------------------------------------------------------------------
/KG/images/ulkb_access_img_pb.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AMR-CSLogic/HEAD/KG/images/ulkb_access_img_pb.jpg


--------------------------------------------------------------------------------
/KG/images/ulkb_access_img_vn.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AMR-CSLogic/HEAD/KG/images/ulkb_access_img_vn.jpg


--------------------------------------------------------------------------------
/KG/images/ulkb_access_img_LClass.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AMR-CSLogic/HEAD/KG/images/ulkb_access_img_LClass.jpg


--------------------------------------------------------------------------------
/set_environment.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | [ ! -d venv ] && virtualenv venv --python=python3.6
4 | . venv/bin/activate
5 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/grpc_defs/ACEDoc_pb2_grpc.py:
--------------------------------------------------------------------------------
1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
2 | 


--------------------------------------------------------------------------------
/KG/ingestion/ULKB_UnifiedMapping_V1.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AMR-CSLogic/HEAD/KG/ingestion/ULKB_UnifiedMapping_V1.xlsx


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_issue_13.py:
--------------------------------------------------------------------------------
1 | 
2 | from amr_verbnet_semantics.core.amr_verbnet_enhance import \
3 |     ground_text_to_verbnet
4 | 


--------------------------------------------------------------------------------
/KG/service/README.md:
--------------------------------------------------------------------------------
1 | The code here is designed to access the ULKB version. Make sure you match the ULKB_access_VX with the right version of the KG. 
2 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Config file for FLASK web service.
3 | """
4 | import logging
5 | 
6 | # Secret for session management
7 | LOGGING_LEVEL = logging.INFO
8 | 


--------------------------------------------------------------------------------
/scripts/refactor_code.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | find . -type f -name "*.py" | xargs autoflake --in-place --remove-all-unused-imports
3 | find . -type f -name "*.py" | xargs isort
4 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_local_amr_client.py:
--------------------------------------------------------------------------------
 1 | from amr_verbnet_semantics.service.amr import LocalAMRClient
 2 | 
 3 | if __name__ == "__main__":
 4 | 
 5 |     amr_client = LocalAMRClient()
 6 | 
 7 |     list_text = [
 8 |         "I loved him writing novels.",
 9 |         "I admired him for his honesty."
10 |     ]
11 |     for text in list_text:
12 |         amr = amr_client.get_amr(text)
13 |         print(amr)
14 | 
15 | 


--------------------------------------------------------------------------------
/scripts/download_semlink.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Download Semlink from its GitHub repository
 4 | # Example: sh scripts/download_semlink.sh [save_dir]
 5 | # Usage: sh scripts/download_semlink.sh ./data
 6 | 
 7 | set -e
 8 | save_dir=$1
 9 | 
10 | mkdir -p ${save_dir}
11 | pushd .
12 | cd ${save_dir}
13 | git clone https://github.com/cu-clear/semlink.git
14 | popd
15 | 
16 | echo "Saved to ${save_dir}/semlink"
17 | echo "DONE."


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_verbnet.py:
--------------------------------------------------------------------------------
 1 | from nltk.corpus.reader import VerbnetCorpusReader
 2 | from nltk.corpus.util import LazyCorpusLoader
 3 | 
 4 | verbnet = LazyCorpusLoader("verbnet3.4", VerbnetCorpusReader, r"(?!\.).*\.xml")
 5 | print(verbnet.frames("escape-51.1-1"))
 6 | try:
 7 |     print(verbnet.frames("escape-51.1-2"))
 8 | except Exception as e:
 9 |     print(e)
10 | 
11 | print(verbnet.frames("leave-51.2"))
12 | print(verbnet.subclasses("escape-51.1-1"))
13 | 
14 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_ground_text_to_verbnet.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from amr_verbnet_semantics.core.amr_verbnet_enhance \
 3 |     import ground_text_to_verbnet
 4 | 
 5 | if __name__ == '__main__':
 6 |     text = "You're in a corridor. You can see a shoe cabinet."
 7 |     ret = ground_text_to_verbnet(text, use_coreference=False, verbose=True)
 8 |     # print(json.dumps(ret, indent=2))
 9 |     print(ret['sentence_parses'][0]['amr'])
10 |     print(ret['sentence_parses'][1]['amr'])
11 | 
12 | 


--------------------------------------------------------------------------------
/KG/config_sparql.yaml:
--------------------------------------------------------------------------------
 1 | # THIS IS THE CONFIGURATION NEEDED FOR THE STANDALONE UL_KG 
 2 | 
 3 | 
 4 | 
 5 | # To run within the AMR-to-CS logic, use the project-wide config.yaml. The config.py 
 6 | # file in this directory will attempt to read it first. 
 7 | # Configure RDF store
 8 | # Use this if you are going to use the server
 9 | # SPARQL_ENDPOINT = "http://goedel.sl.cloud9.ibm.com:9999/blazegraph/namespace/UL_KB_V4"
10 | #Use this if you install the server locally 
11 | SPARQL_ENDPOINT: "http://localhost:9999/blazegraph/namespace/UL_KB_V4_PUB"
12 | 
13 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | requests==2.25.1
 2 | nltk==3.6.2
 3 | jsonlines==2.0.0
 4 | grpcio==1.37.1
 5 | grpcio-tools==1.37.1
 6 | penman==1.2.0
 7 | fuzzywuzzy==0.17.0
 8 | networkx==2.4
 9 | #spacy==2.1.0
10 | spacy==2.0.12
11 | neuralcoref==3.1
12 | gensim==3.8.1
13 | protobuf==3.17.3
14 | 
15 | python-Levenshtein
16 | flask
17 | matplotlib
18 | graphviz
19 | ipykernel
20 | SPARQLWrapper
21 | ipdb
22 | pyyaml
23 | notebook
24 | autoflake
25 | isort
26 | prettytable
27 | 
28 | # third party dependencies
29 | torch==1.3.0
30 | # fairseq==0.8.0
31 | packaging
32 | 


--------------------------------------------------------------------------------
/KG/service/config_sparql.yaml:
--------------------------------------------------------------------------------
 1 | # THIS IS THE CONFIGURATION NEEDED FOR THE STANDALONE UL_KG 
 2 | 
 3 | 
 4 | 
 5 | # To run within the AMR-to-CS logic, use the project-wide config.yaml. The config.py 
 6 | # file in this directory will attempt to read it first. 
 7 | # Configure RDF store
 8 | #Use this if you are going to use the server
 9 | #SPARQL_ENDPOINT = "http://goedel.sl.cloud9.ibm.com:9999/blazegraph/namespace/UL_KB_V5"
10 | #Use this if you install the server locally 
11 | SPARQL_ENDPOINT: "http://localhost:9999/blazegraph/namespace/UL_KB_V5"
12 | 
13 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/service/abstract_kb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Unified query interfaces for the KB.
 3 | """
 4 | 
 5 | 
 6 | class AbstractKb:
 7 |     def __init__(self):
 8 |         pass
 9 | 
10 |     def query_semantics(
11 |             self, verbnet_id, verbnet_version=None, verbose=False):
12 |         raise NotImplementedError()
13 | 
14 |     def query_propbank_verbnet_class_mapping(
15 |             self, propbank_id, verbnet_version=None, verbose=False):
16 |         raise NotImplementedError()
17 | 
18 |     def query_verbnet_semantic_roles(self, propbank_id, verbose=False):
19 |         raise NotImplementedError()
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # amr-verbnet-semantics
 2 | 
 3 | ## Install
 4 | Please read [INSTALLATION.md](./INSTALLATION.md)
 5 | 
 6 | ## Note
 7 | - How to clear a cache
 8 |   - This implementation uses a cache to speed up the AMR parser. A snapshot is stored on disk to store the cache permanently. The file name of the snapshot takes this format `snapshot.pickle_%Y-%m-%d_%H-%M-%S`. Here is an example of a file name; `snapshot.pickle_2021-10-06_14-52-41`
 9 |   - If you want to delete snapshot files, use `bash scripts/remove_snapshot.sh /`. Then please start up the server again to clear a cache. The method is written in [INSTALLATION.md](./INSTALLATION.md). 
10 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_query_semantics.py:
--------------------------------------------------------------------------------
 1 | from amr_verbnet_semantics.service.verbnet import query_semantics
 2 | 
 3 | verbnet_version = 'verbnet3.4'
 4 | 
 5 | verbnet_id = 'escape-51.1'
 6 | semantics = query_semantics(verbnet_id, verbnet_version)
 7 | print(semantics)
 8 | 
 9 | verbnet_id = 'spray-9.7-2'
10 | semantics = query_semantics(verbnet_id, verbnet_version)
11 | print(semantics)
12 | 
13 | verbnet_id = 'build-26.1-1'
14 | semantics = query_semantics(verbnet_id, verbnet_version)
15 | print(semantics)
16 | 
17 | verbnet_id = 'stop-55.4-1-1'
18 | semantics = query_semantics(verbnet_id, verbnet_version)
19 | print(semantics)
20 | 
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/scripts/download_third_party.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # this is called from setup.py in the main project dir
 4 | 
 5 | echo "** Installing third_party **"
 6 | rm -rf third_party
 7 | mkdir third_party
 8 | 
 9 | rm -rf transition-amr-parser-0.4.2
10 | wget https://github.com/IBM/transition-amr-parser/archive/refs/tags/v0.4.2.zip
11 | unzip v0.4.2.zip
12 | pushd transition-amr-parser-0.4.2
13 | sed -I "" 's/torch<=1.2,<=1.3/torch==1.3.0/g' setup.py # -i   doesn't work on osx
14 | pip install -e .
15 | popd
16 | rm v0.4.2.zip
17 | 
18 | wget https://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz
19 | tar -zxvf roberta.large.tar.gz
20 | mv roberta.large third_party
21 | rm roberta.large.tar.gz
22 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_reification.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test module for reification using penman package.
 3 | """
 4 | 
 5 | from penman.codec import PENMANCodec
 6 | from penman.models.amr import model
 7 | from penman.transform import reify_edges
 8 | 
 9 | codec = PENMANCodec(model=model)
10 | g = codec.decode("""
11 | (d / dress
12 |       :ARG1-of (r / red-02)
13 |       :ARG1-of (c / clean-04)
14 |       :location (n / nightstand))
15 | """)
16 | g = reify_edges(g, model)
17 | print(codec.encode(g))
18 | 
19 | 
20 | """
21 | Output:
22 | 
23 | (d / dress
24 |    :ARG1-of (r / red-02)
25 |    :ARG1-of (c / clean-04)
26 |    :ARG1-of (_ / be-located-at-91
27 |                :ARG2 (n / nightstand)))
28 | """
29 | 
30 | 


--------------------------------------------------------------------------------
/KG/README:
--------------------------------------------------------------------------------
 1 | The KG is in RDF/OWL. The files are as follows: 
 2 | 
 3 | UL_KB_V3_PUB.ttl -->  the graph file is self-contained. It can be installed in any triplestore and queried with SPARQL
 4 | The two previous versions, V2 and V1 are added here for completeness
 5 | 
 6 | KBAccess_RDF -->  is the Jupyter notebook with examples of the JSON API and the underlying SPARQL queries.  The API calls use the  ulkb_access code and 
 7 |             the images in the directory. 
 8 | 
 9 | ulkb_access.py --> the implementation of the JSON API (v3) 
10 | 
11 | ulkb_access_v4.py --> Access for version V4. Make sure the yaml configuration file points to the right place
12 | 
13 | 
14 | Any issues or questions about the graph, contact rosariou@us.ibm.com
15 | 
16 | 


--------------------------------------------------------------------------------
/scripts/download_propbank.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Download Propbank
 4 | # Example: sh scripts/download_propbank.sh [save_dir]
 5 | # Usage: sh scripts/download_propbank.sh ~/nltk_data/corpora/
 6 | 
 7 | set -e
 8 | save_dir=$1
 9 | 
10 | # Propbank frames 3.1
11 | mkdir -p ${save_dir}
12 | wget -O ${save_dir}/propbank-frames-3.1.zip https://github.com/propbank/propbank-frames/archive/refs/tags/v3.1.zip
13 | echo "Unzip ..."
14 | unzip ${save_dir}/propbank-frames-3.1.zip -d ${save_dir}
15 | echo "Saved to ${save_dir}/propbank-frames-3.1"
16 | 
17 | # Replace with the latest propbank frames folder
18 | cp -R ${save_dir}/propbank ${save_dir}/propbank-latest
19 | rm -rf ${save_dir}/propbank-latest/frames/
20 | cp -R ${save_dir}/propbank-frames-3.1/frames/ ${save_dir}/propbank-latest
21 | 
22 | echo "DONE."


--------------------------------------------------------------------------------
/amr_verbnet_semantics/core/models.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Models defined for the services
 3 | """
 4 | 
 5 | 
 6 | class PredicateCalculus(object):
 7 |     def __init__(self, predicate, arguments, is_negative=False):
 8 |         self.predicate = predicate
 9 |         self.arguments = arguments
10 |         self.is_negative = is_negative
11 | 
12 |     def __repr__(self):
13 |         stmt = self.predicate + "(" + ", ".join(self.arguments) + ")"
14 |         if self.is_negative:
15 |             stmt = "NOT({})".format(stmt)
16 |         return stmt
17 | 
18 |     def to_json(self):
19 |         return {
20 |             "predicate": self.predicate,
21 |             "arguments": self.arguments,
22 |             "is_negative": self.is_negative
23 |         }
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     pass
28 | 
29 | 


--------------------------------------------------------------------------------
/app_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Load configuration from .yaml file.
 3 | """
 4 | import os
 5 | 
 6 | import yaml
 7 | from amr_verbnet_semantics.utils.format_util import DictObject, to_json
 8 | 
 9 | config_file_path = os.path.join(os.path.dirname(__file__), "config.yaml")
10 | if not os.path.exists(config_file_path):
11 |     raise \
12 |         Exception("Please create a config.yaml file "
13 |                   "following the config_template.yaml "
14 |                   "in the project root dir ...")
15 | 
16 | with open(config_file_path, "r") as f:
17 |     config = yaml.safe_load(f)
18 | 
19 | config = DictObject(config)
20 | 
21 | env_port_num = os.environ.get('AMR_LOCAL_SERVICE_PORT', '')
22 | if env_port_num != '':
23 |     config.LOCAL_SERVICE_PORT = env_port_num
24 | 
25 | if __name__ == "__main__":
26 |     print(to_json(config))
27 |     print(config.SPARQL_ENDPOINT)
28 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_amr_parsing.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from amr_verbnet_semantics.grpc_clients import AMRClientTransformer
 3 | 
 4 | amr_host = "mnlp-demo.sl.cloud9.ibm.com"
 5 | amr_port = 59990
 6 | amr_client = AMRClientTransformer(f"{amr_host}:{amr_port}")
 7 | 
 8 | 
 9 | if __name__ == "__main__":
10 |     """
11 |     list_text = [
12 |         "The contractor builds houses for $100,000.",
13 |         "$100,000 builds a house.",
14 |         "$100,000 will build you a house.",
15 |         "$100,000 builds a house out of sticks.",
16 |         "$100,000 builds you a house out of sticks.",
17 |         "The dresser is made out of maple carefully finished with Danish oil."
18 |     ]
19 |     """
20 |     list_text = [
21 |         "I loved him writing novels.",
22 |         "I admired him for his honesty."
23 |     ]
24 |     for text in list_text:
25 |         amr = amr_client.get_amr(text)
26 |         print(amr)
27 | 
28 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_visualize_dep_parse.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | import spacy
 4 | from spacy import displacy
 5 | 
 6 | nlp = spacy.load("en_core_web_sm")
 7 | doc = nlp("This is a sentence.")
 8 | displacy.serve(doc, style="dep")
 9 | """
10 | 
11 | import spacy
12 | from spacy import displacy
13 | from pathlib import Path
14 | 
15 | nlp = spacy.load("en_core_web_sm")
16 | sentences = ["For the fifth grade play , the chairs have been put into 27 rows with 16 chairs in each row .",
17 |              "How many chairs have been put out for the play ?"]
18 | for sent in sentences:
19 |     doc = nlp(sent)
20 |     svg = displacy.render(doc, style="dep", jupyter=False)
21 |     Path("./test-output/").mkdir(parents=True, exist_ok=True)
22 |     file_name = '-'.join([w.text for w in doc if not w.is_punct]) + ".svg"
23 |     output_path = Path("./test-output/" + file_name)
24 |     output_path.open("w", encoding="utf-8").write(svg)
25 | 
26 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_visualize_sem_graph.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import requests
 3 | import json
 4 | 
 5 | from amr_verbnet_semantics.core.amr_verbnet_enhance import (
 6 |     build_graph_from_amr, visualize_semantic_graph)
 7 | 
 8 | host = "0.0.0.0"
 9 | port = 5000
10 | 
11 | # text = "For the fifth grade play , the chairs have been put into 27 rows with 16 chairs in each row . How many chairs have been put out for the play ?"
12 | text = "You are carrying : a bronze-hilted dagger, a clay ocarina, armor, and silks ( worn ) ."
13 | res = requests.get("http://{}:{}/amr_parsing".format(host, port), params={'text': text})
14 | res = json.loads(res.text)
15 | print(res)
16 | for i, amr in enumerate(res["result"]):
17 |     graph, amr_obj = build_graph_from_amr(amr=amr)
18 |     visualize_semantic_graph(graph, graph_name="amr_semantic_graph_{}".format(i),
19 |                              out_dir="./test-output/")
20 | print("amr_semantic_graph DONE ...")
21 | 
22 | 


--------------------------------------------------------------------------------
/KG/ingestion/README.md:
--------------------------------------------------------------------------------
1 | This directory contains both code and input data for the generation of ULKB.
2 | 
3 | Code in ulkb_generate_unified_mapping.py generates the excel ULKB_UnifiedMapping_V1.xlsx directly from the input files. It carries out a phantom generation of the graph that, instead of writing to .ttl, it produces an excel that can be easily modified and changed. This allows us to correct the generation quickly. 
4 | 
5 | The file ULKB_UnifiedMapping_V1.xlsx contains the analysis of mappings of verbs and constraints from the version V4. Of particular interest is the red column in UM_RAW_V1, which indicates vocabulary in the mapping that doesn't exist in the target class. This means the mapping cannot lead to a successful grounding. 
6 | 
7 | Once this mapping is curated, the excel is used in the ingestion, instead of Semlink versions and Propbank. The code in ulkb_ingest_PB_VN.py does this. It takes the excel above, plus all the versions of Verbnet and Propbank to generate UL_KB_V5. 
8 | 


--------------------------------------------------------------------------------
/scripts/download_verbnet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Download VerbNet
 4 | # Example: sh scripts/download_verbnet.sh [save_dir]
 5 | # Usage: sh scripts/download_verbnet.sh ~/nltk_data/corpora/
 6 | 
 7 | set -e
 8 | save_dir=$1
 9 | 
10 | # VerbNet 3.2
11 | wget -P ${save_dir} http://verbs.colorado.edu/verb-index/vn/verbnet-3.2.tar.gz
12 | echo "Unzip ..."
13 | tar xvzf ${save_dir}/verbnet-3.2.tar.gz -C ${save_dir} && mv ${save_dir}/new_vn ${save_dir}/verbnet3.2
14 | echo "Saved to ${save_dir}/verbnet3.2"
15 | 
16 | # VerbNet 3.3
17 | wget -P ${save_dir} http://verbs.colorado.edu/verb-index/vn/verbnet-3.3.tar.gz
18 | echo "Unzip ..."
19 | tar xvzf ${save_dir}/verbnet-3.3.tar.gz -C ${save_dir}
20 | echo "Saved to ${save_dir}/verbnet3.3"
21 | 
22 | # VerbNet 3.4
23 | git clone https://github.com/cu-clear/verbnet.git ./verbnet_clone
24 | cp -R ./verbnet_clone/verbnet3.4 ${save_dir}
25 | rm -rf ./verbnet_clone
26 | echo "Saved to ${save_dir}/verbnet3.4"
27 | 
28 | echo "Download & unzip DONE."
29 | 
30 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_propbank.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from xml.etree import ElementTree
 3 | 
 4 | from nltk.corpus import propbank
 5 | 
 6 | """
 7 | pb_instances = propbank.instances()
 8 | inst = pb_instances[103]
 9 | print(inst.fileid, inst.sentnum, inst.wordnum)
10 | print(inst.tagger)
11 | print(inst.inflection)
12 | print(inst.roleset)
13 | print(inst.predicate)
14 | print(inst.arguments)
15 | print("rolesets:", propbank.roleset("enter.01"))
16 | """
17 | 
18 | enter_01 = propbank.roleset("enter.01")
19 | for role in enter_01.findall("roles/role"):
20 |     vn_role = role.find('vnrole')
21 |     print("vncls:", vn_role.attrib["vncls"])
22 |     print("vntheta:", vn_role.attrib["vntheta"])
23 |     print(ElementTree.tostring(vn_role).decode('utf8').strip())
24 |     print(role.attrib['n'], role.attrib['descr'])
25 | 
26 | print("====================")
27 | print(ElementTree.tostring(enter_01).decode('utf8').strip())
28 | # print(ElementTree.tostring(enter_01.find('roles')).decode('utf8').strip())
29 | 
30 | 


--------------------------------------------------------------------------------
/scripts/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Install dependencies and setups
 4 | # Usage: sh ./install.sh
 5 | 
 6 | set -e
 7 | 
 8 | echo "** ensuring latest version of pip is installed **"
 9 | pip install --upgrade pip
10 | 
11 | if [[ ${CONDA_DEFAULT_ENV:-''} != amr-verbnet ]]; then
12 |   echo Create and activate the amr-verbnet env
13 |   echo "Sorry, we can't do that for you in this script, at least, not on a Mac"
14 |   exit 1
15 | fi
16 | 
17 | pip install numpy
18 | 
19 | echo "** installing packages **"
20 | pip install -e .
21 | 
22 | echo "** Downloading spaCy corpus **"
23 | python -m spacy download en
24 | 
25 | # For constituency parsing
26 | # pip install benepar --no-cache-dir
27 | # python -c "import benepar; benepar.download('benepar_en3')"
28 | 
29 | # Install PyTorch 1.3
30 | # pip install torch==1.3
31 | 
32 | echo "** Downloading Blazegraph for storing the KG **"
33 | mkdir -p blazegraph
34 | wget -O ./blazegraph/blazegraph.jar https://github.com/blazegraph/database/releases/download/BLAZEGRAPH_2_1_6_RC/blazegraph.jar
35 | 


--------------------------------------------------------------------------------
/KG/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Config file for isolating endpoint configuration difference. Checks for 
 3 | config.yaml in the project OR a config_sparql.yaml in the current directory
 4 | """
 5 | import os
 6 | 
 7 | import yaml
 8 | 
 9 | SPARQL_ENDPOINT = "" 
10 | config = "" 
11 | 
12 | thisDir = os.path.join(os.getcwd())
13 | 
14 | try:
15 |     from app_config import config
16 |     SPARQL_ENDPOINT = config.SPARQL_ENDPOINT
17 | except ImportError:
18 |     try :     
19 |       with open(thisDir + "/config_sparql.yaml", "r") as f:
20 |           config = yaml.safe_load(f)
21 |           if type(config) is dict : 
22 |               SPARQL_ENDPOINT = config['SPARQL_ENDPOINT']
23 |           else :               
24 |               SPARQL_ENDPOINT = config
25 |               SPARQL_ENDPOINT = SPARQL_ENDPOINT.split("\"")[1]
26 |     except : 
27 |         raise Exception("Cannot find config_sparql.yaml in current directory (standalone) or config.yaml in root directory (AMR-CSlogic)")
28 | 
29 | if __name__ == "__main__":
30 |     print(str(config))
31 |     print(SPARQL_ENDPOINT)
32 | 
33 | 


--------------------------------------------------------------------------------
/KG/service/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Config file for isolating endpoint configuration difference. Checks for 
 3 | config.yaml in the project OR a config_sparql.yaml in the current directory
 4 | """
 5 | import yaml
 6 | import json
 7 | import os
 8 | 
 9 | SPARQL_ENDPOINT = "" 
10 | config = "" 
11 | 
12 | thisDir = os.path.join(os.getcwd())
13 | 
14 | try:
15 |     from app_config import config
16 |     SPARQL_ENDPOINT = config.SPARQL_ENDPOINT
17 | except ImportError:
18 |     try :     
19 |       with open(thisDir + "/config_sparql.yaml", "r") as f:
20 |           config = yaml.safe_load(f)
21 |           if type(config) is dict : 
22 |               SPARQL_ENDPOINT = config['SPARQL_ENDPOINT']
23 |           else :               
24 |               SPARQL_ENDPOINT = config
25 |               SPARQL_ENDPOINT = SPARQL_ENDPOINT.split("\"")[1]
26 |     except : 
27 |         raise Exception("Cannot find config_sparql.yaml in current directory (standalone) or config.yaml in root directory (AMR-CSlogic)")
28 | 
29 | if __name__ == "__main__":
30 |     print(str(config))
31 |     print(SPARQL_ENDPOINT)
32 | 
33 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_issue_12.py:
--------------------------------------------------------------------------------
 1 | from amr_verbnet_semantics.core.amr_verbnet_enhance import ground_amr
 2 | import requests
 3 | 
 4 | text = 'wet hoodie'
 5 | endpoint = f'http://9.116.32.235:5000/verbnet_semantics'  # temporary development server
 6 | 
 7 | params = {'text': text, 'use_coreference': 0}
 8 | r = requests.get(endpoint, params=params)
 9 | r = r.json()
10 | amr = r['amr_parse'][0]['amr']
11 | amr_cal_str = r['amr_parse'][0]['amr_cal_str']
12 | 
13 | 
14 | print(f'***** amr *****')
15 | print(amr)
16 | print(f'***** amr_cal_str *****')
17 | print(amr_cal_str)
18 | 
19 | g_res = ground_amr(amr)
20 | print(f'\n***** grounded_stmt_str *****')
21 | print(g_res['grounded_stmt_str'])
22 | 
23 | '''
24 | As of 15th Oct, the output
25 | 
26 | ***** amr *****
27 | # ::tok wet hoodie <ROOT>
28 | # ::node	1	wet-01	0-1
29 | # ::node	2	hoodie	1-2
30 | # ::root	2	hoodie
31 | # ::edge	hoodie	ARG1-of	wet-01	2	1	
32 | # ::short	{1: 'w', 2: 'h'}	
33 | (h / hoodie
34 |       :ARG1-of (w / wet-01))
35 | 
36 | 
37 | ***** amr_cal_str *****
38 | [hoodie(h), wet-01(w), wet-01.arg1(w, h)]
39 | 
40 | ***** grounded_stmt_str *****
41 | {'wet.01': {'other_cos-45.4': [[NOT(HAS_STATE(e1, h, V_Final_State)), HAS_STATE(e2, h, V_Final_State)]]}}
42 | '''


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_cache_amr_client.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from amr_verbnet_semantics.service.amr import CacheClient, LocalAMRClient
 4 | 
 5 | if __name__ == "__main__":
 6 | 
 7 |     amr_client = LocalAMRClient()
 8 |     amr_client = CacheClient(amr_client, use_snapshot=False)
 9 | 
10 |     list_text = ['You drop the peanut oil on the ground.',
11 |                  'You pick up the peanut oil from the ground.',
12 |                  'You take the peanut oil from the folding chair.',
13 |                  'You drop the red hot pepper on the ground.',
14 |                  'But the thing is empty.',
15 |                  'The shelf is wooden.',
16 |                  'Now why would someone leave that there?',
17 |                  'When you stand up, you notice a shelf.',
18 |                  'You bend down to tie your shoe.']
19 | 
20 |     # before caching
21 |     start = time.time()
22 |     for text in list_text:
23 |         amr = amr_client.get_amr(text)
24 |     print(f'before caching: {time.time() - start:.3f} seconds')
25 | 
26 | 
27 |     # after caching
28 |     start = time.time()
29 |     for text in list_text:
30 |         amr = amr_client.get_amr(text)
31 |     print(f'after caching: {time.time() - start:.3f} seconds')
32 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/utils/format_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility functions for formatting
 3 | """
 4 | import inspect
 5 | 
 6 | 
 7 | def to_json(obj):
 8 |     if isinstance(obj, dict):
 9 |         results = {}
10 |         for key in obj:
11 |             results[key] = to_json(obj[key])
12 |         return results
13 |     elif isinstance(obj, list):
14 |         results = []
15 |         for elem in obj:
16 |             results.append(to_json(elem))
17 |         return results
18 |     elif isinstance(obj, DictObject):
19 |         results = {}
20 |         for key in dir(obj):
21 |             if key.startswith('_'):
22 |                 continue
23 |             results[key] = to_json(getattr(obj, key))
24 |         return results
25 |     elif hasattr(obj, "to_json") and inspect.ismethod(getattr(obj, "to_json")):
26 |         return obj.to_json()
27 |     else:
28 |         return obj
29 | 
30 | 
31 | class DictObject(object):
32 |     def __init__(self, in_dict):
33 |         for key, val in in_dict.items():
34 |             if isinstance(val, (list, tuple)):
35 |                 setattr(self, key, [DictObject(x) if isinstance(x, dict) else x for x in val])
36 |             else:
37 |                 setattr(self, key, DictObject(val) if isinstance(val, dict) else val)
38 | 
39 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/service/ulkb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RDF query interfaces for the KB.
 3 | """
 4 | 
 5 | from amr_verbnet_semantics.service.abstract_kb import AbstractKb
 6 | from amr_verbnet_semantics.service.semlink import query_pb_vn_mapping_from_rdf
 7 | from amr_verbnet_semantics.service.sparql import \
 8 |     query_verbnet_semantic_roles_from_rdf
 9 | from amr_verbnet_semantics.service.verbnet import query_semantics_from_rdf
10 | 
11 | 
12 | class UnifiedKb(AbstractKb):
13 |     def __init__(self):
14 |         super().__init__()
15 | 
16 |     def query_semantics(self, verbnet_id, verbnet_version=None, verbose=False):
17 |         return query_semantics_from_rdf(verbnet_id, verbnet_version)
18 | 
19 |     def query_propbank_verbnet_class_mapping(self, propbank_id,
20 |                                              verbnet_version=None, verbose=False):
21 |         return query_pb_vn_mapping_from_rdf(propbank_id)
22 | 
23 |     def query_verbnet_semantic_roles(self, propbank_id, verbose=False):
24 |         return query_verbnet_semantic_roles_from_rdf(propbank_id)
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     ulkb = UnifiedKb()
29 |     print(ulkb.query_semantics("escape-51.1", verbnet_version="verbnet3.4"))
30 |     print()
31 |     print(ulkb.query_semantics("spray-9.7-2", verbnet_version="verbnet3.4"))
32 |     print()
33 |     print(ulkb.query_propbank_verbnet_class_mapping("enter.01"))
34 |     print()
35 |     print(ulkb.query_verbnet_semantic_roles("enter.01"))
36 | 
37 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/service/corpus.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Corpus query interfaces for the KB.
 3 | """
 4 | 
 5 | from amr_verbnet_semantics.service.abstract_kb import AbstractKb
 6 | from amr_verbnet_semantics.service.propbank import \
 7 |     query_verbnet_semantic_roles_from_corpus
 8 | from amr_verbnet_semantics.service.semlink import \
 9 |     query_pb_vn_mapping_from_semlink
10 | from amr_verbnet_semantics.service.verbnet import query_semantics_from_corpus
11 | 
12 | 
13 | class CorpusKb(AbstractKb):
14 |     def __init__(self):
15 |         super().__init__()
16 | 
17 |     def query_semantics(self, verbnet_id, verbnet_version=None, verbose=False):
18 |         return query_semantics_from_corpus(verbnet_id, verbnet_version)
19 | 
20 |     def query_propbank_verbnet_class_mapping(
21 |             self, propbank_id, verbnet_version=None, verbose=False):
22 |         return query_pb_vn_mapping_from_semlink(propbank_id)
23 | 
24 |     def query_verbnet_semantic_roles(self, propbank_id, verbose=False):
25 |         return query_verbnet_semantic_roles_from_corpus(propbank_id, verbose)
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     corpus_kb = CorpusKb()
30 |     print(corpus_kb.query_semantics("escape-51.1", verbnet_version="verbnet3.4"))
31 |     print()
32 |     print(corpus_kb.query_semantics("spray-9.7-2", verbnet_version="verbnet3.4"))
33 |     print()
34 |     print(corpus_kb.query_propbank_verbnet_class_mapping("enter.01"))
35 |     print()
36 |     print(corpus_kb.query_verbnet_semantic_roles("enter.01"))
37 | 
38 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_service.py:
--------------------------------------------------------------------------------
 1 | # Test for services
 2 | 
 3 | import argparse
 4 | import json
 5 | from pprint import pprint
 6 | 
 7 | import requests
 8 | 
 9 | parser = argparse.ArgumentParser()
10 | 
11 | parser.add_argument('--ip', type=str, default='0.0.0.0')
12 | parser.add_argument('--port', type=int, default=5000)
13 | parser.add_argument('--text', type=str, default='You enter a kitchen.')
14 | 
15 | args = parser.parse_args()
16 | 
17 | host = args.ip
18 | port = args.port
19 | 
20 | text = args.text
21 | 
22 | res = requests.get("http://{}:{}/verbnet_semantics".format(host, port),
23 |                    params={'text': text})
24 | 
25 | print("\nres.text:")
26 | print(res.text)
27 | 
28 | res = json.loads(res.text)
29 | if "amr_parse" in res:
30 |     for i in range(len(res["amr_parse"])):
31 |         print("\namr:")
32 |         print(res["amr_parse"][i]["amr"])
33 |         print("\npb_vn_mappings:")
34 |         pprint(res["amr_parse"][i]["pb_vn_mappings"])
35 | 
36 |         print("\namr_cal:")
37 |         print(res["amr_parse"][i]["amr_cal"])
38 | 
39 |         print("\nsem_cal:")
40 |         print(res["amr_parse"][i]["sem_cal"])
41 | 
42 |         print("\ngrounded_stmt:")
43 |         print(res["amr_parse"][i]["grounded_stmt"])
44 | 
45 |         print("\namr_cal:")
46 |         print(res["amr_parse"][i]["amr_cal_str"])
47 | 
48 |         print("\nsem_cal:")
49 |         print(res["amr_parse"][i]["sem_cal_str"])
50 | 
51 |         print("\ngrounded_stmt:")
52 |         print(res["amr_parse"][i]["grounded_stmt_str"])
53 | 


--------------------------------------------------------------------------------
/config_template.yaml:
--------------------------------------------------------------------------------
 1 | # Use this config template to create your own config yaml file
 2 | 
 3 | # Configure AMR parsing
 4 | AMR_MODEL_CHECKPOINT_PATH: "DATA/AMR2.0/models/exp_cofill_o8.3_act-states_RoBERTa-large-top24/_act-pos-grh_vmask1_shiftpos1_ptr-lay6-h1_grh-lay123-h2-allprev_1in1out_cam-layall-h2-abuf/ep60-seed44/checkpoint_wiki.smatch_best1.pt"
 5 | ROBERTA_CACHE_PATH: "roberta.large"
 6 | THIRD_PARTY_PATH: 'third_party'
 7 | use_cuda: false
 8 | 
 9 | # Configure RDF store
10 | SPARQL_ENDPOINT: "http://localhost:9999/blazegraph/namespace/UL_KB_V5_PUB"
11 | 
12 | # Configure VerbNet
13 | VERBNET_VERSION: "verbnet3.2"
14 | 
15 | # whether to remove contradicting statements
16 | # when the time dimension is ignored.
17 | FILTER_INVALID_STATEMENTS: true
18 | 
19 | # Configure KB source
20 | # Set to "rdf" if you want to query from the RDF triple store
21 | # KB_SOURCE: "corpus"
22 | KB_SOURCE: "rdf"
23 | 
24 | # Configure Conda home
25 | CONDA_HOME: "~/anaconda3"
26 | 
27 | # Configure local Stanford NLP parser (deprecated)
28 | # STANFORD_CORENLP_PATH: "./stanford-corenlp-full-2018-10-05"
29 | # STANFORD_CORENLP_PATH: null
30 | # STANFORD_CORENLP_HOST: "http://localhost"
31 | # STANFORD_CORENLP_PORT: 9000
32 | 
33 | # Configure AMR parsing
34 | # AMR_PARSING_MODEL: "remote"
35 | AMR_PARSING_MODEL: "local"
36 | 
37 | # Configure remote AMR parsing service
38 | REMOTE_AMR_HOST: "mnlp-demo.sl.cloud9.ibm.com"
39 | REMOTE_AMR_PORT: 59990
40 | 
41 | # Configure local FLASK service
42 | LOCAL_SERVICE_HOST: "0.0.0.0"
43 | LOCAL_SERVICE_PORT: 5000
44 | USE_FLASK: true


--------------------------------------------------------------------------------
/amr_verbnet_semantics/corpus_readers/verbnet_reader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | An extension class based on NLTK VerbnetCorpusReader to capture the negation of statement.
 3 | Reference: https://www.nltk.org/_modules/nltk/corpus/reader/verbnet.html
 4 | """
 5 | from nltk.corpus.reader import VerbnetCorpusReader
 6 | 
 7 | 
 8 | class VerbnetCorpusReaderEx(VerbnetCorpusReader):
 9 |     def __init__(self, root, fileids, wrap_etree=False):
10 |         VerbnetCorpusReader.__init__(self, root, fileids, wrap_etree)
11 | 
12 |     def _get_semantics_within_frame(self, vnframe):
13 |         """Returns semantics within a single frame
14 | 
15 |         A utility function to retrieve semantics within a frame in VerbNet
16 |         Members of the semantics dictionary:
17 |         1) Predicate value
18 |         2) Arguments
19 | 
20 |         :param vnframe: An ElementTree containing the xml contents of
21 |             a VerbNet frame.
22 |         :return: semantics: semantics dictionary
23 |         """
24 |         semantics_within_single_frame = []
25 |         for pred in vnframe.findall("SEMANTICS/PRED"):
26 |             arguments = [
27 |                 {"type": arg.get("type"), "value": arg.get("value")}
28 |                 for arg in pred.findall("ARGS/ARG")
29 |             ]
30 |             semantics_within_single_frame.append(
31 |                 {
32 |                     "predicate_value": pred.get("value"),
33 |                     "arguments": arguments,
34 |                     "is_negative": pred.get("bool", False) == "!"
35 |                 }
36 |             )
37 |         return semantics_within_single_frame
38 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_issue_8.py:
--------------------------------------------------------------------------------
 1 | from amr_verbnet_semantics.core.amr_verbnet_enhance import ground_amr
 2 | import requests
 3 | 
 4 | text = 'He entered the room.'
 5 | endpoint = f'http://9.116.32.235:5000/verbnet_semantics'  # temporary development server
 6 | 
 7 | params = {'text': text, 'use_coreference': 0}
 8 | r = requests.get(endpoint, params=params)
 9 | r = r.json()
10 | amr = r['amr_parse'][0]['amr']
11 | amr_cal_str = r['amr_parse'][0]['amr_cal_str']
12 | 
13 | 
14 | print(f'***** amr *****')
15 | print(amr)
16 | print(f'***** amr_cal_str *****')
17 | print(amr_cal_str)
18 | 
19 | g_res = ground_amr(amr)
20 | print(f'\n***** grounded_stmt_str *****')
21 | print(g_res['grounded_stmt_str'])
22 | 
23 | print(f'\n***** pb_vn_mappings *****')
24 | print(g_res['pb_vn_mappings'])
25 | 
26 | 
27 | 
28 | '''
29 | As of 15th Oct, the output
30 | 
31 | ***** amr *****
32 | # ::tok He entered the room . <ROOT>
33 | # ::node	1	he	0-1
34 | # ::node	2	enter-01	1-2
35 | # ::node	3	room	3-4
36 | # ::root	2	enter-01
37 | # ::edge	enter-01	ARG0	he	2	1	
38 | # ::edge	enter-01	ARG1	room	2	3	
39 | # ::short	{1: 'h', 2: 'e', 3: 'r'}	
40 | (e / enter-01
41 |       :ARG0 (h / he)
42 |       :ARG1 (r / room))
43 | 
44 | 
45 | ***** amr_cal_str *****
46 | [enter-01(e), he(h), room(r), enter-01.arg0(e, h), enter-01.arg1(e, r)]
47 | 
48 | ***** grounded_stmt_str *****
49 | {'enter.01': {'escape-51.1-2': [[MOTION(during(E), h), LOCATION(start(E), r, ?Initial_Location), LOCATION(end(E), r, ?Trajectory)]]}}
50 | 
51 | ***** pb_vn_mappings *****
52 | {'enter.01': [{'mapping': 'escape-51.1-2', 'source': 'verbnet3.4'}]}
53 | 
54 | '''


--------------------------------------------------------------------------------
/amr_verbnet_semantics/web_app/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Package: service
 3 | Package for the application models and service routes
 4 | This module creates and configures the Flask app and sets up the logging
 5 | """
 6 | import importlib
 7 | import logging
 8 | import os
 9 | 
10 | from flask import Flask
11 | 
12 | # Create Flask application
13 | app = Flask(__name__)
14 | app.config.from_object("config")
15 | 
16 | # Import the routes After the Flask app is created
17 | user_module_name = \
18 |     os.path.split(
19 |         os.path.dirname(os.path.abspath(os.path.dirname(__file__))))[1]
20 | submodule_name = os.path.split(os.path.abspath(os.path.dirname(__file__)))[1]
21 | for file in os.listdir(os.path.dirname(__file__)):
22 |     if file.endswith('.py') and not file.startswith('_'):
23 |         module = file[:file.find('.py')]
24 |         importlib.import_module(user_module_name + '.' +
25 |                                 submodule_name + '.' + module)
26 | 
27 | # Set up logging for production
28 | print("Setting up logging for {}...".format(__name__))
29 | app.logger.propagate = False
30 | if __name__ != "__main__":
31 |     gunicorn_logger = logging.getLogger("gunicorn.error")
32 |     app.logger.handlers = gunicorn_logger.handlers
33 |     app.logger.setLevel(gunicorn_logger.level)
34 |     # Make all log formats consistent
35 |     formatter = logging.Formatter(
36 |         "[%(asctime)s] [%(levelname)s] [%(module)s] %(message)s",
37 |         "%Y-%m-%d %H:%M:%S %z"
38 |     )
39 |     for handler in app.logger.handlers:
40 |         handler.setFormatter(formatter)
41 |     app.logger.info("Logging handler established")
42 | 
43 | app.logger.info("Service inititalized!")
44 | 
45 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/grpc_defs/enhanced_amr_pb2_grpc.py:
--------------------------------------------------------------------------------
 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
 2 | import grpc
 3 | 
 4 | from . import enhanced_amr_pb2 as enhanced__amr__pb2
 5 | 
 6 | 
 7 | class AMR_EnhancerStub(object):
 8 |   # missing associated documentation comment in .proto file
 9 |   pass
10 | 
11 |   def __init__(self, channel):
12 |     """Constructor.
13 | 
14 |     Args:
15 |       channel: A grpc.Channel.
16 |     """
17 |     self.get_enhanced_amr = channel.unary_unary(
18 |         '/AMR_Enhancer/get_enhanced_amr',
19 |         request_serializer=enhanced__amr__pb2.Sentence.SerializeToString,
20 |         response_deserializer=enhanced__amr__pb2.Enhanced_amr.FromString,
21 |         )
22 | 
23 | 
24 | class AMR_EnhancerServicer(object):
25 |   # missing associated documentation comment in .proto file
26 |   pass
27 | 
28 |   def get_enhanced_amr(self, request, context):
29 |     # missing associated documentation comment in .proto file
30 |     context.set_code(grpc.StatusCode.UNIMPLEMENTED)
31 |     context.set_details('Method not implemented!')
32 |     raise NotImplementedError('Method not implemented!')
33 | 
34 | 
35 | def add_AMR_EnhancerServicer_to_server(servicer, server):
36 |   rpc_method_handlers = {
37 |       'get_enhanced_amr': grpc.unary_unary_rpc_method_handler(
38 |           servicer.get_enhanced_amr,
39 |           request_deserializer=enhanced__amr__pb2.Sentence.FromString,
40 |           response_serializer=enhanced__amr__pb2.Enhanced_amr.SerializeToString,
41 |       ),
42 |   }
43 |   generic_handler = grpc.method_handlers_generic_handler(
44 |       'AMR_Enhancer', rpc_method_handlers)
45 |   server.add_generic_rpc_handlers((generic_handler,))


--------------------------------------------------------------------------------
/amr_verbnet_semantics/core/extract_timeline.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import re
 4 | 
 5 | import wikipedia
 6 | 
 7 | 
 8 | def extract_time_line(page_title, output_dir):
 9 |     print("Crawling page <{}>".format(page_title))
10 | 
11 |     timelines = dict()
12 |     t = wikipedia.page(page_title)
13 |     # print(t.content)
14 |     # input()
15 | 
16 |     pattern = r"(===(?P<time>\s(.*)\s)===(\n+)(?P<description>(.*))(\n+))|(==(?P<category>\s(.*)\s)==\n)"
17 |     matched = re.finditer(pattern, t.content)
18 |     cur_category = None
19 |     for m in matched:
20 |         if m.groupdict()["category"] is not None:
21 |             if m.groupdict()["category"] not in timelines:
22 |                 cur_category = m.groupdict()["category"].strip()
23 |                 timelines[cur_category] = list()
24 |         else:
25 |             timeline = dict()
26 |             timeline["time"] = m.group("time").strip()
27 |             timeline["description"] = m.group("description").strip()
28 |             timelines[cur_category].append(timeline)
29 |             # print(cur_category)
30 |             # print(timeline)
31 |             # input()
32 | 
33 |     if not os.path.exists(output_dir):
34 |         os.mkdir(output_dir)
35 | 
36 |     output_path = os.path.join(output_dir, "_".join(page_title.split()) + ".json")
37 |     with open(output_path, "w") as f:
38 |         json.dump(timelines, f)
39 |         print("Written to {}".format(output_path))
40 | 
41 |     print("DONE.")
42 |     return timeline
43 | 
44 | 
45 | def test_summary():
46 |     # print(wikipedia.summary("Wikipedia"))
47 |     print(wikipedia.summary("Timeline of the COVID-19 pandemic in January 2020"))
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     extract_time_line("Timeline of the COVID-19 pandemic in January 2020", "./data/wikipedia")
52 | 
53 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # this is called from the main install.sh command
 2 | from setuptools import setup, find_packages
 3 | from setuptools.command.develop import develop
 4 | from setuptools.command.install import install
 5 | from pip._internal.req import parse_requirements
 6 | from subprocess import run
 7 | 
 8 | VERSION = '0.1.0'
 9 | 
10 | def run_script(path):
11 |     print("Running {} ...".format(path))
12 |     run("bash {}".format(path).split(), check=True)
13 | 
14 | 
15 | class PostDevelopCommand(develop):
16 |     """Post-installation for development mode."""
17 |     def run(self):
18 |         develop.run(self)
19 |         run_script("scripts/download_third_party.sh")
20 | 
21 | 
22 | class PostInstallCommand(install):
23 |     """Post-installation for installation mode."""
24 |     def run(self):
25 |         install.run(self)
26 |         run_script("scripts/download_third_party.sh")
27 | 
28 | 
29 | install_reqs = parse_requirements('requirements.txt', session='install')
30 | reqs = [str(ir.requirement) for ir in install_reqs]
31 | 
32 | setup(
33 |     name='amr_verbnet_semantics',
34 |     version=VERSION,
35 |     description='Enhancing AMR with VerbNet semantics',
36 |     author='Zhicheng (Jason) Liang and Dr. Rosario Uceda-Sosa',
37 |     author_email='liangz4@rpi.edu and rosariou@us.ibm.com',
38 |     url='https://github.com/IBM/AMR-CSLogic',
39 |     packages=find_packages(include=[
40 |         'amr_verbnet_semantics',
41 |         'amr_verbnet_semantics.*',
42 |         'KG.*']),
43 |     install_requires=reqs,
44 |     cmdclass={
45 |         'develop': PostDevelopCommand,
46 |         'install': PostInstallCommand,
47 |     },
48 |     extras_require={
49 |         'interactive': ['matplotlib>=2.2.0', 'jupyter'],
50 |     },
51 |     setup_requires=[
52 |         'pytest-runner', 
53 |         'flake8'],
54 |     tests_require=[
55 |         'pytest', 
56 |         'pytest-flask'],
57 |     package_data={'': ['*.yaml', '*.ttl.zip']}
58 | )
59 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/utils/eval_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility functions for evaluation
 3 | """
 4 | from collections import Counter, defaultdict
 5 | 
 6 | 
 7 | class Metric(object):
 8 |     def __init__(self):
 9 |         self.cnt_samples = 0
10 |         self.cnt_samples_wo_true_triples = 0
11 |         self.cnt_samples_wo_pred_triples = 0
12 |         self.sum_prec = 0
13 |         self.sum_recall = 0
14 |         self.sum_f1 = 0
15 |         self.rel_counter = Counter()
16 |         self.true_triples_by_rel = defaultdict(set)
17 |         self.pred_triples_by_rel = defaultdict(set)
18 | 
19 |     def compute_relationwise_metric(self):
20 |         """
21 |         compute relation-wise precision, recall and F1 scores
22 |         :param true_triples_by_rel:
23 |         :param pred_triples_by_rel:
24 |         :return:
25 |         """
26 |         rel2metric = dict()
27 |         for rel in self.true_triples_by_rel:
28 |             true_triples = self.true_triples_by_rel[rel]
29 |             if rel in self.pred_triples_by_rel:
30 |                 pred_triples = self.pred_triples_by_rel[rel]
31 |             else:
32 |                 pred_triples = set()
33 |             true_pred_triples = pred_triples.intersection(true_triples)
34 |             if len(pred_triples) == 0:
35 |                 prec = 0
36 |             else:
37 |                 prec = len(true_pred_triples) / len(pred_triples)
38 |             if len(true_triples) == 0:
39 |                 recall = 0
40 |             else:
41 |                 recall = len(true_pred_triples) / len(true_triples)
42 |             if prec + recall > 0:
43 |                 f1 = 2 * prec * recall / (prec + recall)
44 |             else:
45 |                 f1 = 0
46 | 
47 |             # use percentage points
48 |             rel2metric[rel] = {
49 |                 "prec": prec * 100,
50 |                 "recall": recall * 100,
51 |                 "f1": f1 * 100
52 |             }
53 |         return rel2metric
54 | 
55 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_issue_10.py:
--------------------------------------------------------------------------------
 1 | from amr_verbnet_semantics.core.amr_verbnet_enhance import ground_amr
 2 | import requests
 3 | 
 4 | text = 'You put the wet hoodie on the patio chair.'
 5 | endpoint = f'http://9.116.32.235:5000/verbnet_semantics'  # temporary development server
 6 | 
 7 | params = {'text': text, 'use_coreference': 0}
 8 | r = requests.get(endpoint, params=params)
 9 | r = r.json()
10 | amr = r['amr_parse'][0]['amr']
11 | amr_cal_str = r['amr_parse'][0]['amr_cal_str']
12 | 
13 | 
14 | print(f'***** amr *****')
15 | print(amr)
16 | print(f'***** amr_cal_str *****')
17 | print(amr_cal_str)
18 | 
19 | g_res = ground_amr(amr)
20 | print(f'\n***** grounded_stmt_str *****')
21 | print(g_res['grounded_stmt_str'])
22 | 
23 | '''
24 | As of 15th Oct, the output
25 | 
26 | ***** amr *****
27 | # ::tok You put the wet hoodie on the patio chair . <ROOT>
28 | # ::node	1	you	0-1
29 | # ::node	2	put-01	1-2
30 | # ::node	3	wet-01	3-4
31 | # ::node	4	hoodie	4-5
32 | # ::node	5	patio	7-8
33 | # ::node	6	chair	8-9
34 | # ::root	2	put-01
35 | # ::edge	put-01	ARG0	you	2	1	
36 | # ::edge	hoodie	ARG1-of	wet-01	4	3	
37 | # ::edge	put-01	ARG1	hoodie	2	4	
38 | # ::edge	chair	mod	patio	6	5	
39 | # ::edge	put-01	ARG2	chair	2	6	
40 | # ::short	{1: 'y', 2: 'p', 3: 'w', 4: 'h', 5: 'p2', 6: 'c'}	
41 | (p / put-01
42 |       :ARG0 (y / you)
43 |       :ARG1 (h / hoodie
44 |             :ARG1-of (w / wet-01))
45 |       :ARG2 (c / chair
46 |             :mod (p2 / patio)))
47 | 
48 | 
49 | ***** amr_cal_str *****
50 | [put-01(p), you(y), hoodie(h), wet-01(w), chair(c), patio(p2), put-01.arg0(p, y), put-01.arg1(p, h), wet-01.arg1(w, h), put-01.arg2(p, c), mod(c, p2)]
51 | 
52 | ***** grounded_stmt_str *****
53 | {'put.01': {'put-9.1-2': [[HAS_LOCATION(e1, Theme , Initial_Location), DO(y, e2), MOTION(ë3, h, Trajectory), NOT(HAS_LOCATION(ë3, h, Initial_Location)), HAS_LOCATION(h, c, e4), CAUSE(e2, ë3)]]}, 'wet.01': {'other_cos-45.4': [[NOT(HAS_STATE(e1, h, V_Final_State)), HAS_STATE(e2, h, V_Final_State)]]}}
54 | '''


--------------------------------------------------------------------------------
/amr_verbnet_semantics/grpc_clients/clients.py:
--------------------------------------------------------------------------------
 1 | """
 2 | GRPC clients
 3 | """
 4 | import grpc
 5 | 
 6 | from ..grpc_defs import ace_pb2, ace_pb2_grpc
 7 | from ..grpc_defs.enhanced_amr_pb2 import Sentence
 8 | from ..grpc_defs.enhanced_amr_pb2_grpc import AMR_EnhancerStub
 9 | 
10 | ## Note: the following clients adapted from NSQA pipeline
11 | 
12 | 
13 | class AMRClientTransformer(object):
14 |     def __init__(self, host):
15 |         # configure the host and the
16 |         # the port to which the client should connect
17 |         # to.
18 |         self.host = host
19 |         # instantiate a communication channel
20 |         self.channel = grpc.insecure_channel(host)
21 |         # bind the client to the server channel
22 |         self.stub = ace_pb2_grpc.aceStub(self.channel)
23 | 
24 |     def get_amr(self, text):
25 |         # req_sent = self.text_to_sentence(sentence)
26 |         answer = self.stub.process_text(ace_pb2.acedoc_request(text=text))
27 |         for sent in answer.sents:
28 |             # print(sent.amr)
29 |             return sent.amr
30 | 
31 | 
32 | class EnhancedAMRClient(object):
33 |     """
34 |     Client for accessing the gRPC functionality
35 |     """
36 | 
37 |     def __init__(self, host = 'localhost', server_port = 46001):
38 |         # configure the host and the
39 |         # the port to which the client should connect
40 |         # to.
41 |         self.host = host
42 |         self.server_port = server_port
43 | 
44 |         # instantiate a communication channel
45 |         self.channel = grpc.insecure_channel(
46 |             '{}:{}'.format(self.host, self.server_port))
47 | 
48 |         # bind the client to the server channel
49 |         self.stub = AMR_EnhancerStub(self.channel)
50 | 
51 |     def get_amr(self, message, has_amr = False, org_amr=None):
52 |         """
53 |         Client function to call the rpc for get_enhancedamr
54 |         """
55 |         sentence = Sentence(text=message, has_amr=has_amr, org_amr=org_amr)
56 |         return self.stub.get_enhanced_amr(sentence)
57 | 
58 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_issue_11.py:
--------------------------------------------------------------------------------
 1 | from amr_verbnet_semantics.core.amr_verbnet_enhance import ground_amr
 2 | import requests
 3 | 
 4 | text = 'You put the wet hoodie on the patio chair.'
 5 | endpoint = f'http://128.113.12.96:5000/verbnet_semantics'  # temporary development server
 6 | # endpoint = f'http://9.116.32.235:5000/verbnet_semantics'  # temporary development server
 7 | 
 8 | params = {'text': text, 'use_coreference': 0}
 9 | r = requests.get(endpoint, params=params)
10 | r = r.json()
11 | print(r)
12 | amr = r['amr_parse'][0]['amr']
13 | amr_cal_str = r['amr_parse'][0]['amr_cal_str']
14 | 
15 | 
16 | print(f'***** amr *****')
17 | print(amr)
18 | print(f'***** amr_cal_str *****')
19 | print(amr_cal_str)
20 | 
21 | g_res = ground_amr(amr)
22 | print(f'\n***** grounded_stmt_str *****')
23 | print(g_res['grounded_stmt_str'])
24 | 
25 | '''
26 | As of 15th Oct, the output
27 | 
28 | ***** amr *****
29 | # ::tok You put the wet hoodie on the patio chair . <ROOT>
30 | # ::node	1	you	0-1
31 | # ::node	2	put-01	1-2
32 | # ::node	3	wet-01	3-4
33 | # ::node	4	hoodie	4-5
34 | # ::node	5	patio	7-8
35 | # ::node	6	chair	8-9
36 | # ::root	2	put-01
37 | # ::edge	put-01	ARG0	you	2	1	
38 | # ::edge	hoodie	ARG1-of	wet-01	4	3	
39 | # ::edge	put-01	ARG1	hoodie	2	4	
40 | # ::edge	chair	mod	patio	6	5	
41 | # ::edge	put-01	ARG2	chair	2	6	
42 | # ::short	{1: 'y', 2: 'p', 3: 'w', 4: 'h', 5: 'p2', 6: 'c'}	
43 | (p / put-01
44 |       :ARG0 (y / you)
45 |       :ARG1 (h / hoodie
46 |             :ARG1-of (w / wet-01))
47 |       :ARG2 (c / chair
48 |             :mod (p2 / patio)))
49 | 
50 | 
51 | ***** amr_cal_str *****
52 | [put-01(p), you(y), hoodie(h), wet-01(w), chair(c), patio(p2), put-01.arg0(p, y), put-01.arg1(p, h), wet-01.arg1(w, h), put-01.arg2(p, c), mod(c, p2)]
53 | 
54 | ***** grounded_stmt_str *****
55 | {'put.01': {'put-9.1-2': [[HAS_LOCATION(e1, Theme , Initial_Location), DO(y, e2), MOTION(ë3, h, Trajectory), NOT(HAS_LOCATION(ë3, h, Initial_Location)), HAS_LOCATION(h, c, e4), CAUSE(e2, ë3)]]}, 'wet.01': {'other_cos-45.4': [[NOT(HAS_STATE(e1, h, V_Final_State)), HAS_STATE(e2, h, V_Final_State)]]}}
56 | '''


--------------------------------------------------------------------------------
/DESIGN.md:
--------------------------------------------------------------------------------
 1 | # Architecture
 2 | 
 3 | ```
 4 | amr_verbnet_semantics
 5 | ├── core
 6 | ├── corpus_readers
 7 | ├── etl
 8 | ├── grpc_clients
 9 | ├── grpc_defs
10 | ├── jericho_world
11 | ├── rest
12 | ├── service
13 | │   └── predicate_kb
14 | ├── test
15 | ├── utils
16 | └── web_app
17 | 
18 | third_party
19 | ├── (DATA)
20 | ├── (fairseq_ext)
21 | ├── (roberta.large)
22 | └── (transition_amr_parser)
23 | ```
24 | 
25 | 
26 | # Subcomponent
27 | ## third_party
28 | The content "third_party" directory is IBM AMR parser and will be mostly produced by scripts/download_third_party.sh. 
29 | According to the IP department, when we make this code OSS, we need to put all the external code in “third_party” directory.
30 | 
31 | 
32 | ## knowledge base
33 | For querying the linguistic resource KB, either via corpus reader or via RDF triple store, we wrap the functionalities to provide several interfaces for use, which are defined in the abstract class as the following:
34 | ```
35 | class AbstractKb:
36 |     def __init__(self):
37 |         pass
38 | 
39 |     def query_semantics(
40 |             self, verbnet_id, verbnet_version=None, verbose=False):
41 |         raise NotImplementedError()
42 | 
43 |     def query_propbank_verbnet_class_mapping(
44 |             self, propbank_id, verbnet_version=None, verbose=False):
45 |         raise NotImplementedError()
46 | 
47 |     def query_verbnet_semantic_roles(self, propbank_id, verbose=False):
48 |         raise NotImplementedError()
49 | ```
50 | 
51 | The query_propbank_verbnet_class_mapping method is for querying the class id mappings between Propbank and VerbNet, which could be one-to-many mappings;
52 | The query_verbnet_semantic_roles method is for querying the semantic roles defined by VerbNet for different arguments of a Propbank frame;
53 | The query_semantics method is for querying the predicate calculus, i.e. verb semantics for a VerbNet class.
54 | 
55 | Under the amr_verbnet_semantics.service package, we implement the above interfaces in corpus.py that reads from the NLTK corpus, and in ulkb.py that reads from the RDF triple store. The results of using these two should be exactly the same unless there are some differences occured during the data curation stage for the triple store.


--------------------------------------------------------------------------------
/amr_verbnet_semantics/service/verbnet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | VerbNet query wrapper
 3 | """
 4 | from collections import defaultdict
 5 | from nltk.corpus.util import LazyCorpusLoader
 6 | 
 7 | from amr_verbnet_semantics.corpus_readers.verbnet_reader import \
 8 |     VerbnetCorpusReaderEx
 9 | from amr_verbnet_semantics.service.sparql import query_semantics_from_rdf
10 | from app_config import config
11 | 
12 | 
13 | vn_dict = None
14 | 
15 | 
16 | def load_vn_dict():
17 |     global vn_dict
18 |     vn_dict = {
19 |         "verbnet3.2": LazyCorpusLoader("verbnet3.2", VerbnetCorpusReaderEx, r"(?!\.).*\.xml"),
20 |         "verbnet3.3": LazyCorpusLoader("verbnet3.3", VerbnetCorpusReaderEx, r"(?!\.).*\.xml"),
21 |         "verbnet3.4": LazyCorpusLoader("verbnet3.4", VerbnetCorpusReaderEx, r"(?!\.).*\.xml")
22 |     }
23 | 
24 | 
25 | def query_semantics_from_corpus(verbnet_id, verbnet_version,
26 |                                 include_example=False, verbose=False):
27 |     global vn_dict
28 |     if vn_dict is None:
29 |         load_vn_dict()
30 | 
31 |     semantics = defaultdict(list)
32 |     frames = vn_dict[verbnet_version].frames(verbnet_id)
33 | 
34 |     for frame in frames:
35 |         role_set = set()
36 |         for element in frame["syntax"]:
37 |             role = element["modifiers"]["value"].strip()
38 |             if len(role) > 0 and role.istitle():
39 |                 # not empty string and is title-case
40 |                 role_set.add(role)
41 | 
42 |         if include_example:
43 |             example = frame["example"].replace("\n", "")
44 |             semantics[(example, tuple(sorted(role_set)))].append(frame["semantics"])
45 |         else:
46 |             semantics[tuple(sorted(role_set))].append(frame["semantics"])
47 |     return dict(semantics)
48 | 
49 | 
50 | def query_semantics(verbnet_id, verbnet_version):
51 |     if config.KB_SOURCE == "rdf":
52 |         return query_semantics_from_rdf(verbnet_id, verbnet_version)
53 |     return query_semantics_from_corpus(verbnet_id, verbnet_version)
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     verbnet_id = "escape-51.1"
58 |     verbnet_version = "verbnet3.4"
59 |     print("\nQuerying verbnet class {} in version {} ...".format(verbnet_id, verbnet_version))
60 |     print("\nresult:")
61 |     print(query_semantics(verbnet_id, verbnet_version))
62 | 
63 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/corpus_readers/propbank_reader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | An extension class based on NLTK PropbankCorpusReader to capture all rolesets.
 3 | Reference: https://www.nltk.org/_modules/nltk/corpus/reader/propbank.html
 4 | """
 5 | from collections import defaultdict
 6 | from xml.etree import ElementTree
 7 | 
 8 | from nltk.corpus.reader import PropbankCorpusReader
 9 | 
10 | 
11 | class PropbankCorpusReaderEx(PropbankCorpusReader):
12 |     def __init__(self, root, propfile, framefiles="", verbsfile=None,
13 |                  parse_fileid_xform=None, parse_corpus=None, encoding="utf8"):
14 |         PropbankCorpusReader.__init__(
15 |             self, root, propfile, framefiles=framefiles, verbsfile=verbsfile,
16 |             parse_fileid_xform=parse_fileid_xform, parse_corpus=parse_corpus,
17 |             encoding=encoding)
18 | 
19 |         self._frame_file_roleset_dict, self._roleset_frame_file_dict \
20 |             = self._build_framefile_roleset_dict()
21 | 
22 |     def _build_framefile_roleset_dict(self):
23 |         frame_file_roleset_dict = defaultdict(set)
24 |         roleset_frame_file_dict = dict()
25 | 
26 |         for framefile in self._framefiles:
27 |             with self.abspath(framefile).open() as fp:
28 |                 etree = ElementTree.parse(fp).getroot()
29 | 
30 |             for roleset in etree.findall("predicate/roleset"):
31 |                 frame_file_roleset_dict[framefile].add(roleset.attrib["id"])
32 |                 roleset_frame_file_dict[roleset.attrib["id"]] = framefile
33 |         return frame_file_roleset_dict, roleset_frame_file_dict
34 | 
35 |     def roleset(self, roleset_id):
36 |         """
37 |         :return: the xml description for the given roleset.
38 |         """
39 |         if roleset_id not in self._roleset_frame_file_dict:
40 |             raise ValueError("Frameset file for %s not found" % roleset_id)
41 | 
42 |         framefile = self._roleset_frame_file_dict[roleset_id]
43 | 
44 |         # n.b.: The encoding for XML fileids is specified by the file
45 |         # itself; so we ignore self._encoding here.
46 |         with self.abspath(framefile).open() as fp:
47 |             etree = ElementTree.parse(fp).getroot()
48 |         for roleset in etree.findall("predicate/roleset"):
49 |             if roleset.attrib["id"] == roleset_id:
50 |                 return roleset
51 |         raise ValueError("Roleset %s not found in %s" % (roleset_id, framefile))
52 | 
53 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_issue_7.py:
--------------------------------------------------------------------------------
 1 | from amr_verbnet_semantics.core.amr_verbnet_enhance import ground_amr
 2 | import requests
 3 | 
 4 | texts = ['The nightstand is stable.',
 5 |          'The workbench is stable.']
 6 | endpoint = f'http://9.116.32.235:5000/verbnet_semantics'  # temporary development server
 7 | 
 8 | for text in texts:
 9 |     params = {'text': text, 'use_coreference': 0}
10 |     r = requests.get(endpoint, params=params)
11 |     r = r.json()
12 |     amr = r['amr_parse'][0]['amr']
13 |     amr_cal_str = r['amr_parse'][0]['amr_cal_str']
14 | 
15 | 
16 |     print(f'***** amr *****')
17 |     print(amr)
18 |     print(f'***** amr_cal_str *****')
19 |     print(amr_cal_str)
20 | 
21 |     g_res = ground_amr(amr)
22 |     print(f'\n***** grounded_stmt_str *****')
23 |     print(g_res['grounded_stmt_str'])
24 | 
25 |     print(f'\n***** role_mappings *****')
26 |     print(g_res['role_mappings'])
27 |     print('\n')
28 | 
29 | 
30 | 
31 | '''
32 | As of 15th Oct, the output
33 | 
34 | ***** amr *****
35 | # ::tok The nightstand is stable . <ROOT>
36 | # ::node	1	nightstand	1-2
37 | # ::node	2	stable-03	3-4
38 | # ::root	2	stable-03
39 | # ::edge	stable-03	ARG1	nightstand	2	1	
40 | # ::short	{1: 'n', 2: 's'}	
41 | (s / stable-03
42 |       :ARG1 (n / nightstand))
43 | 
44 | 
45 | ***** amr_cal_str *****
46 | [stable-03(s), nightstand(n), stable-03.arg1(s, n)]
47 | 
48 | ***** grounded_stmt_str *****
49 | {'stable.03': {'entity_specific_cos-45.5': [[NOT(HAS_STATE(e1, Patient, V_Final_State)), HAS_STATE(Patient, e2, V_Final_State)]]}}
50 | 
51 | ***** role_mappings *****
52 | {'stable.03': {'ARG2': [{'vncls': '45.5', 'vntheta': 'patient', 'description': 'position/attribute held in equilibrium'}]}}
53 | 
54 | 
55 | ***** amr *****
56 | # ::tok The workbench is stable . <ROOT>
57 | # ::node	1	workbench	1-2
58 | # ::node	2	stable-03	3-4
59 | # ::root	2	stable-03
60 | # ::edge	stable-03	ARG1	workbench	2	1	
61 | # ::short	{1: 'w', 2: 's'}	
62 | (s / stable-03
63 |       :ARG1 (w / workbench))
64 | 
65 | 
66 | ***** amr_cal_str *****
67 | [stable-03(s), workbench(w), stable-03.arg1(s, w)]
68 | 
69 | ***** grounded_stmt_str *****
70 | {'stable.03': {'entity_specific_cos-45.5': [[NOT(HAS_STATE(e1, Patient, V_Final_State)), HAS_STATE(Patient, e2, V_Final_State)]]}}
71 | 
72 | ***** role_mappings *****
73 | {'stable.03': {'ARG2': [{'vncls': '45.5', 'vntheta': 'patient', 'description': 'position/attribute held in equilibrium'}]}}
74 | 
75 | '''


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_issue_9.py:
--------------------------------------------------------------------------------
 1 | from amr_verbnet_semantics.core.amr_verbnet_enhance import ground_amr
 2 | import requests
 3 | 
 4 | texts = ['On the nightstand is a clean red dress.',
 5 |          'On the chair is a hoodie.',
 6 |          'The bench is shaky.']
 7 | endpoint = f'http://9.116.32.235:5000/verbnet_semantics'  # temporary development server
 8 | 
 9 | for text in texts:
10 |     params = {'text': text, 'use_coreference': 0}
11 |     r = requests.get(endpoint, params=params)
12 |     r = r.json()
13 |     amr = r['amr_parse'][0]['amr']
14 |     amr_cal_str = r['amr_parse'][0]['amr_cal_str']
15 | 
16 | 
17 |     print(f'***** amr *****')
18 |     print(amr)
19 |     print(f'***** amr_cal_str *****')
20 |     print(amr_cal_str)
21 | 
22 |     g_res = ground_amr(amr)
23 |     print(f'\n***** grounded_stmt_str *****')
24 |     print(g_res['grounded_stmt_str'])
25 |     print('\n')
26 | 
27 | '''
28 | As of 15th Oct, the output
29 | 
30 | ***** amr *****
31 | # ::tok On the nightstand is a clean red dress . <ROOT>
32 | # ::node	1	nightstand	2-3
33 | # ::node	2	clean-04	5-6
34 | # ::node	3	red-02	6-7
35 | # ::node	4	dress	7-8
36 | # ::root	4	dress
37 | # ::edge	dress	ARG1-of	red-02	4	3	
38 | # ::edge	dress	ARG1-of	clean-04	4	2	
39 | # ::edge	dress	location	nightstand	4	1	
40 | # ::short	{1: 'n', 2: 'c', 3: 'r', 4: 'd'}	
41 | (d / dress
42 |       :ARG1-of (r / red-02)
43 |       :ARG1-of (c / clean-04)
44 |       :location (n / nightstand))
45 | 
46 | 
47 | ***** amr_cal_str *****
48 | [dress(d), red-02(r), clean-04(c), nightstand(n), red-02.arg1(r, d), clean-04.arg1(c, d), location(d, n)]
49 | 
50 | ***** grounded_stmt_str *****
51 | {}
52 | 
53 | 
54 | ***** amr *****
55 | # ::tok On the chair is a hoodie . <ROOT>
56 | # ::node	1	be-located-at-91	0-1
57 | # ::node	2	chair	2-3
58 | # ::node	3	hoodie	5-6
59 | # ::root	1	be-located-at-91
60 | # ::edge	be-located-at-91	ARG2	chair	1	2	
61 | # ::edge	be-located-at-91	ARG1	hoodie	1	3	
62 | # ::short	{1: 'b', 2: 'c', 3: 'h'}	
63 | (b / be-located-at-91
64 |       :ARG1 (h / hoodie)
65 |       :ARG2 (c / chair))
66 | 
67 | 
68 | ***** amr_cal_str *****
69 | [be-located-at-91(b), hoodie(h), chair(c), be-located-at-91.arg1(b, h), be-located-at-91.arg2(b, c)]
70 | 
71 | ***** grounded_stmt_str *****
72 | {}
73 | 
74 | 
75 | ***** amr *****
76 | # ::tok The bench is shaky . <ROOT>
77 | # ::node	1	bench	1-2
78 | # ::node	2	shaky	3-4
79 | # ::root	2	shaky
80 | # ::edge	shaky	domain	bench	2	1	
81 | # ::short	{1: 'b', 2: 's'}	
82 | (s / shaky
83 |       :domain (b / bench))
84 | 
85 | 
86 | ***** amr_cal_str *****
87 | [shaky(s), bench(b), domain(s, b)]
88 | 
89 | ***** grounded_stmt_str *****
90 | {}
91 | 
92 | 
93 | '''


--------------------------------------------------------------------------------
/amr_verbnet_semantics/web_app/status.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | # Copyright 2016, 2021 John J. Rofrano. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # https://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | Descriptive HTTP status codes, for code readability.
17 | See RFC 2616 and RFC 6585.
18 | RFC 2616: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
19 | RFC 6585: http://tools.ietf.org/html/rfc6585
20 | """
21 | 
22 | # Informational - 1xx
23 | HTTP_100_CONTINUE = 100
24 | HTTP_101_SWITCHING_PROTOCOLS = 101
25 | 
26 | # Successful - 2xx
27 | HTTP_200_OK = 200
28 | HTTP_201_CREATED = 201
29 | HTTP_202_ACCEPTED = 202
30 | HTTP_203_NON_AUTHORITATIVE_INFORMATION = 203
31 | HTTP_204_NO_CONTENT = 204
32 | HTTP_205_RESET_CONTENT = 205
33 | HTTP_206_PARTIAL_CONTENT = 206
34 | 
35 | # Redirection - 3xx
36 | HTTP_300_MULTIPLE_CHOICES = 300
37 | HTTP_301_MOVED_PERMANENTLY = 301
38 | HTTP_302_FOUND = 302
39 | HTTP_303_SEE_OTHER = 303
40 | HTTP_304_NOT_MODIFIED = 304
41 | HTTP_305_USE_PROXY = 305
42 | HTTP_306_RESERVED = 306
43 | HTTP_307_TEMPORARY_REDIRECT = 307
44 | 
45 | # Client Error - 4xx
46 | HTTP_400_BAD_REQUEST = 400
47 | HTTP_401_UNAUTHORIZED = 401
48 | HTTP_402_PAYMENT_REQUIRED = 402
49 | HTTP_403_FORBIDDEN = 403
50 | HTTP_404_NOT_FOUND = 404
51 | HTTP_405_METHOD_NOT_ALLOWED = 405
52 | HTTP_406_NOT_ACCEPTABLE = 406
53 | HTTP_407_PROXY_AUTHENTICATION_REQUIRED = 407
54 | HTTP_408_REQUEST_TIMEOUT = 408
55 | HTTP_409_CONFLICT = 409
56 | HTTP_410_GONE = 410
57 | HTTP_411_LENGTH_REQUIRED = 411
58 | HTTP_412_PRECONDITION_FAILED = 412
59 | HTTP_413_REQUEST_ENTITY_TOO_LARGE = 413
60 | HTTP_414_REQUEST_URI_TOO_LONG = 414
61 | HTTP_415_UNSUPPORTED_MEDIA_TYPE = 415
62 | HTTP_416_REQUESTED_RANGE_NOT_SATISFIABLE = 416
63 | HTTP_417_EXPECTATION_FAILED = 417
64 | HTTP_428_PRECONDITION_REQUIRED = 428
65 | HTTP_429_TOO_MANY_REQUESTS = 429
66 | HTTP_431_REQUEST_HEADER_FIELDS_TOO_LARGE = 431
67 | 
68 | # Server Error - 5xx
69 | HTTP_500_INTERNAL_SERVER_ERROR = 500
70 | HTTP_501_NOT_IMPLEMENTED = 501
71 | HTTP_502_BAD_GATEWAY = 502
72 | HTTP_503_SERVICE_UNAVAILABLE = 503
73 | HTTP_504_GATEWAY_TIMEOUT = 504
74 | HTTP_505_HTTP_VERSION_NOT_SUPPORTED = 505
75 | HTTP_511_NETWORK_AUTHENTICATION_REQUIRED = 511
76 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/core/spacy_nlp_parse.py:
--------------------------------------------------------------------------------
 1 | """spaCy NLP Parsing"""
 2 | 
 3 | from nltk.tokenize import sent_tokenize
 4 | import spacy
 5 | import neuralcoref
 6 | 
 7 | nlp = None
 8 | 
 9 | 
10 | def full_parsing(text, do_coreference=False, do_word_tokenize=False,
11 |                  do_pos_tag=False, do_dependency_parse=False,
12 |                  do_constituency_parse=False):
13 |     global nlp
14 |     if nlp is None:
15 |         nlp = spacy.load('en')
16 | 
17 |     annotation = dict()
18 | 
19 |     if do_coreference:
20 |         nlp = spacy.load('en')
21 |         neuralcoref.add_to_pipe(nlp)
22 |         doc = nlp(text)
23 |         annotation["coreference"] = doc._.coref_clusters
24 | 
25 |     if any([do_word_tokenize, do_pos_tag, do_dependency_parse, do_constituency_parse]):
26 |         annotation["sentences"] = []
27 | 
28 |         for sentence in sent_tokenize(text):
29 |             sentence_parse = dict()
30 |             sentence_parse["text"] = sentence
31 | 
32 |             doc = nlp(sentence)
33 |             if do_word_tokenize:
34 |                 tokens = []
35 |                 for token in doc:
36 |                     tokens.append(token.text)
37 |                 sentence_parse["word_tokenize"] = tokens
38 | 
39 |             if do_pos_tag:
40 |                 pos_tag = []
41 |                 for token in doc:
42 |                     pos_tag.append(token.pos_)
43 |                 sentence_parse["pos_tag"] = pos_tag
44 | 
45 |             if do_dependency_parse:
46 |                 dep_parse = []
47 |                 for token in doc:
48 |                     dep_parse.append((token.dep_, token.head.i, token.i))
49 |                 sentence_parse["dependency_parse"] = dep_parse
50 | 
51 |             if do_constituency_parse:
52 |                 import benepar
53 |                 nlp = spacy.load('en')
54 |                 if spacy.__version__.startswith('2'):
55 |                     nlp.add_pipe(benepar.BeneparComponent("benepar_en3"))
56 |                 else:
57 |                     nlp.add_pipe("benepar", config={"model": "benepar_en3"})
58 |                 doc = nlp(sentence)
59 |                 sent = list(doc.sents)[0]
60 |                 sentence_parse["constituency_parse"] = sent._.parse_string
61 | 
62 |             annotation["sentences"].append(sentence_parse)
63 |     return annotation
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     # text = 'My sister has a dog. She loves him .'
68 |     # text = 'Angela lives in Boston. She is quite happy in that city.'
69 |     text = 'Autonomous cars shift insurance liability toward manufacturers.'
70 |     print(full_parsing(text, do_coreference=True,
71 |                        do_word_tokenize=True,
72 |                        do_pos_tag=True,
73 |                        do_dependency_parse=True,
74 |                        do_constituency_parse=True))
75 | 
76 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/web_app/routes.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | AMR-VerbNet Service
 4 | 
 5 | Paths:
 6 | ------
 7 | GET /amr_parsing - Returns AMR parsing results
 8 | GET /verbnet_semantics - Returns grounding results using VerbNet semantics
 9 | """
10 | 
11 | 
12 | from flask import abort, jsonify, make_response, request
13 | 
14 | from amr_verbnet_semantics.core.amr_verbnet_enhance import \
15 |     ground_text_to_verbnet
16 | from amr_verbnet_semantics.service.amr import parse_text
17 | 
18 | # HTTP Status Codes
19 | # Import Flask application
20 | from . import app, status
21 | 
22 | 
23 | # GET INDEX
24 | @app.route("/")
25 | def index():
26 |     """Root URL response"""
27 |     app.logger.info("Request for Root URL")
28 |     return (
29 |         jsonify(
30 |             name="AMR-VerbNet REST API Service",
31 |             version="1.0"
32 |         ),
33 |         status.HTTP_200_OK,
34 |     )
35 | 
36 | 
37 | # AMR parsing
38 | @app.route("/amr_parsing", methods=["GET"])
39 | def amr_parsing():
40 |     """Returns AMR parse of input text"""
41 |     app.logger.info("AMR parsing of text ...")
42 |     text = request.args.get("text")
43 |     if len(text.strip()) == 0:
44 |         make_response(jsonify({
45 |             "error": "Empty input text."
46 |         }), status.HTTP_200_OK)
47 | 
48 |     parse = parse_text(text)
49 |     results = {
50 |         "result": parse
51 |     }
52 |     app.logger.info("Finished parsing ...")
53 |     return make_response(jsonify(results), status.HTTP_200_OK)
54 | 
55 | 
56 | # Ground text using VerbNet semantics
57 | @app.route("/verbnet_semantics", methods=["GET"])
58 | def verbnet_semantics():
59 |     """Returns VerbNet groundings of input text"""
60 |     app.logger.info("AMR parsing of text ...")
61 |     text = request.args.get("text")
62 |     if len(text.strip()) == 0:
63 |         make_response(jsonify({
64 |             "error": "Empty input text."
65 |         }), status.HTTP_200_OK)
66 | 
67 |     use_coreference = request.args.get('use_coreference', default=0, type=int)
68 | 
69 |     app.logger.info("text:", text)
70 |     parse = ground_text_to_verbnet(text,
71 |                                    use_coreference=use_coreference,
72 |                                    verbose=False)
73 |     results = {
74 |         "text": text,
75 |         "coreference": parse["coreference"],
76 |         "amr_parse": parse["sentence_parses"]
77 |     }
78 |     app.logger.info("Finished parsing ...")
79 |     return make_response(jsonify(results), status.HTTP_200_OK)
80 | 
81 | 
82 | #  Utility functions
83 | def check_content_type(media_type):
84 |     """Checks that the media type is correct"""
85 |     content_type = request.headers.get("Content-Type")
86 |     if content_type and content_type == media_type:
87 |         return
88 |     app.logger.error("Invalid Content-Type: %s", content_type)
89 |     abort(
90 |         status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
91 |         "Content-Type must be {}".format(media_type),
92 |     )
93 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | 
  3 | # project specific files
  4 | third_party
  5 | data/
  6 | stanford-corenlp-full-2018-10-05
  7 | test-output
  8 | test-debug
  9 | path_output
 10 | config.yaml
 11 | *snapshot.pickle*
 12 | *blazegraph.jnl
 13 | open-ie-output/
 14 | test-debug-openie/
 15 | train-debug-openie/
 16 | openie_output/
 17 | *.props
 18 | transition-amr-parser-0.4.2/
 19 | 
 20 | *.ttl
 21 | blazegraph/
 22 | 
 23 | # Byte-compiled / optimized / DLL files
 24 | __pycache__/
 25 | *.py[cod]
 26 | *$py.class
 27 | 
 28 | # C extensions
 29 | *.so
 30 | 
 31 | # Distribution / packaging
 32 | .Python
 33 | build/
 34 | develop-eggs/
 35 | dist/
 36 | downloads/
 37 | eggs/
 38 | .eggs/
 39 | lib/
 40 | lib64/
 41 | parts/
 42 | sdist/
 43 | var/
 44 | wheels/
 45 | pip-wheel-metadata/
 46 | share/python-wheels/
 47 | *.egg-info/
 48 | .installed.cfg
 49 | *.egg
 50 | MANIFEST
 51 | 
 52 | # PyInstaller
 53 | #  Usually these files are written by a python script from a template
 54 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 55 | *.manifest
 56 | *.spec
 57 | 
 58 | # Installer logs
 59 | pip-log.txt
 60 | pip-delete-this-directory.txt
 61 | 
 62 | # Unit test / coverage reports
 63 | htmlcov/
 64 | .tox/
 65 | .nox/
 66 | .coverage
 67 | .coverage.*
 68 | .cache
 69 | nosetests.xml
 70 | coverage.xml
 71 | *.cover
 72 | *.py,cover
 73 | .hypothesis/
 74 | .pytest_cache/
 75 | 
 76 | # Translations
 77 | *.mo
 78 | *.pot
 79 | 
 80 | # Django stuff:
 81 | *.log
 82 | local_settings.py
 83 | db.sqlite3
 84 | db.sqlite3-journal
 85 | 
 86 | # Flask stuff:
 87 | instance/
 88 | .webassets-cache
 89 | 
 90 | # Scrapy stuff:
 91 | .scrapy
 92 | 
 93 | # Sphinx documentation
 94 | docs/_build/
 95 | 
 96 | # PyBuilder
 97 | target/
 98 | 
 99 | # Jupyter Notebook
100 | .ipynb_checkpoints
101 | 
102 | # IPython
103 | profile_default/
104 | ipython_config.py
105 | 
106 | # pyenv
107 | .python-version
108 | 
109 | # pipenv
110 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
111 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
112 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
113 | #   install all needed dependencies.
114 | #Pipfile.lock
115 | 
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
117 | __pypackages__/
118 | 
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 | 
123 | # SageMath parsed files
124 | *.sage.py
125 | 
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | 
142 | # mkdocs documentation
143 | /site
144 | 
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 | 
150 | # Pyre type checker
151 | .pyre/
152 | 
153 | *.pyc
154 | 
155 | .idea/
156 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_issue_6.py:
--------------------------------------------------------------------------------
 1 | from amr_verbnet_semantics.core.amr_verbnet_enhance import ground_amr
 2 | import requests
 3 | 
 4 | text = 'The dresser is made out of maple carefully finished with Danish oil.'
 5 | endpoint = f'http://9.116.32.235:5000/verbnet_semantics'  # temporary development server
 6 | 
 7 | params = {'text': text, 'use_coreference': 0}
 8 | r = requests.get(endpoint, params=params)
 9 | r = r.json()
10 | amr = r['amr_parse'][0]['amr']
11 | amr_cal_str = r['amr_parse'][0]['amr_cal_str']
12 | 
13 | 
14 | print(f'***** amr *****')
15 | print(amr)
16 | print(f'***** amr_cal_str *****')
17 | print(amr_cal_str)
18 | 
19 | g_res = ground_amr(amr)
20 | print(f'\n***** grounded_stmt_str *****')
21 | print(g_res['grounded_stmt_str'])
22 | 
23 | print(f'\n***** role_mappings *****')
24 | print(g_res['role_mappings'])
25 | print('\n')
26 | 
27 | 
28 | 
29 | '''
30 | As of 15th Oct, the output
31 | 
32 | ***** amr *****
33 | # ::tok The dresser is made out of maple carefully finished with Danish oil . <ROOT>
34 | # ::node	1	dresser	1-2
35 | # ::node	2	make-01	3-4
36 | # ::node	3	maple	6-7
37 | # ::node	4	careful	7-8
38 | # ::node	5	finish-01	8-9
39 | # ::node	6	country	10-11
40 | # ::node	8	oil	11-12
41 | # ::node	9	"Denmark"	10-11
42 | # ::node	11	name	10-11
43 | # ::root	2	make-01
44 | # ::edge	make-01	ARG1	dresser	2	1	
45 | # ::edge	make-01	ARG2	maple	2	3	
46 | # ::edge	finish-01	manner	careful	5	4	
47 | # ::edge	maple	ARG1-of	finish-01	3	5	
48 | # ::edge	oil	mod	country	8	6	
49 | # ::edge	finish-01	ARG2	oil	5	8	
50 | # ::edge	country	name	name	6	11	
51 | # ::edge	name	op1	"Denmark"	11	9	
52 | # ::short	{1: 'd', 2: 'm', 3: 'm2', 4: 'c', 5: 'f', 6: 'c2', 8: 'o', 9: 'x0', 11: 'n'}	
53 | (m / make-01
54 |       :ARG1 (d / dresser)
55 |       :ARG2 (m2 / maple
56 |             :ARG1-of (f / finish-01
57 |                   :ARG2 (o / oil
58 |                         :mod (c2 / country
59 |                               :name (n / name
60 |                                     :op1 "Denmark")))
61 |                   :manner (c / careful))))
62 | 
63 | 
64 | ***** amr_cal_str *****
65 | [make-01(m), dresser(d), maple(m2), finish-01(f), oil(o), country(c2), name(n), careful(c), make-01.arg1(m, d), make-01.arg2(m, m2), finish-01.arg1(f, m2), finish-01.arg2(f, o), mod(o, c2), name(c2, n), manner(f, c)]
66 | 
67 | ***** grounded_stmt_str *****
68 | {'make.01': {'build-26.1-1': [[NOT(BE(d, e1)), DO(e2, Agent), BE(e3, d), MADE_OF(e3, d, m2), CAUSE(e2, e3), COST(E, d, Asset)]]}, 'finish.01': {'stop-55.4-1': [[END(E, m2)]]}}
69 | 
70 | ***** role_mappings *****
71 | {'make.01': {'ARG0': [{'vncls': '26.1-1', 'vntheta': 'agent', 'description': 'creator'}], 'ARG1': [{'vncls': '26.1-1', 'vntheta': 'product', 'description': 'creation'}], 'ARG2': [{'vncls': '26.1-1', 'vntheta': 'material', 'description': 'created-from, thing changed'}], 'ARG3': [{'vncls': '26.1-1', 'vntheta': 'beneficiary', 'description': 'benefactive'}]}, 'finish.01': {'ARG1': [{'vncls': '55.4-1', 'vntheta': 'theme', 'description': 'Thing finishing'}]}}
72 | 
73 | '''


--------------------------------------------------------------------------------
/amr_verbnet_semantics/utils/text_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility functions for text-related processing
 3 | """
 4 | from nltk.tokenize import word_tokenize, sent_tokenize
 5 | 
 6 | 
 7 | def is_extractable(text, triple):
 8 |     """
 9 |     Check if a triple can be extracted from text by examing the text spans
10 |     of subject and object.
11 |     :param text: the text to extract triple from
12 |     :param triple: the target triple to extract
13 |     :return:
14 |     """
15 |     subj, pred, obj = triple
16 |     text_tokens = word_tokenize(text.lower())
17 | 
18 |     subj_spans = []
19 |     obj_spans = []
20 |     subj_tokens = word_tokenize(subj.lower())
21 |     obj_tokens = word_tokenize(obj.lower())
22 | 
23 |     subj_matched = False
24 |     obj_matched = False
25 |     for idx, tok in enumerate(text_tokens):
26 |         cur_subj_idx = len(subj_spans)
27 |         cur_obj_idx = len(obj_spans)
28 |         if cur_subj_idx < len(subj_tokens) and tok == subj_tokens[cur_subj_idx]:
29 |             subj_spans.append(idx)
30 |             if len(subj_spans) == len(subj_tokens):
31 |                 subj_matched = True
32 |         else:
33 |             subj_spans = []
34 | 
35 |         if cur_obj_idx < len(obj_tokens) and tok == obj_tokens[cur_obj_idx]:
36 |             obj_spans.append(idx)
37 |             if len(obj_spans) == len(obj_tokens):
38 |                 obj_matched = True
39 |         else:
40 |             obj_spans = []
41 | 
42 |     extractable = subj_matched and obj_matched
43 |     return extractable
44 | 
45 | 
46 | def split_text_into_sentences(text):
47 |     sentences = []
48 |     for sent in sent_tokenize(text):
49 |         sent = sent.replace("\n\n", ": ")
50 |         sub_sentences = []
51 |         split_sents = sent.split("\n")
52 |         for idx, p in enumerate(split_sents):
53 |             if p.strip().endswith(":"):
54 |                 sub_sentences.append(p.strip())
55 |             elif len(p.strip()) > 0 and not p.strip().endswith(".") \
56 |                     and idx != len(split_sents) - 1:
57 |                 sub_sentences.append(p.strip() + ",")
58 |             else:
59 |                 sub_sentences.append(p.strip())
60 |         sentences.append(" ".join(sub_sentences))
61 |     return sentences
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     text = """End of Ledge
66 | A ledge from the east ends here, and a tunnel leads north into the wall. There is a rather odd smokey odor in the warm air of the tunnel.
67 | There is a dishevelled and slightly unkempt princess here.
68 | The princess walks east. She glances back at you as she goes.
69 | 
70 |  End of Ledge
71 | A ledge from the east ends here, and a tunnel leads north into the wall. There is a rather odd smokey odor in the warm air of the tunnel.
72 | 
73 |  You are carrying:
74 |   A golden dragon statuette
75 |   A blue crystal sphere
76 |   A newspaper
77 |   A matchbook
78 |   A china teapot
79 |   The china teapot contains:
80 |     A quantity of water
81 |   A lamp (providing light)"""
82 |     print(text)
83 |     print()
84 |     print(split_text_into_sentences(text))
85 | 
86 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/service/propbank.py:
--------------------------------------------------------------------------------
 1 | """PropBank query wrapper"""
 2 | import re
 3 | 
 4 | from nltk.corpus import treebank
 5 | from nltk.corpus.util import LazyCorpusLoader
 6 | 
 7 | from amr_verbnet_semantics.corpus_readers.propbank_reader import \
 8 |     PropbankCorpusReaderEx
 9 | from amr_verbnet_semantics.service.sparql import \
10 |     query_verbnet_semantic_roles_from_rdf
11 | from app_config import config
12 | 
13 | 
14 | propbank = None
15 | 
16 | 
17 | def load_propbank():
18 |     global propbank
19 |     propbank = LazyCorpusLoader(
20 |         "propbank-latest",
21 |         PropbankCorpusReaderEx,
22 |         "prop.txt",
23 |         r"frames/.*\.xml",
24 |         "verbs.txt",
25 |         lambda filename: re.sub(r"^wsj/\d\d/", "", filename),
26 |         treebank,
27 |     )  # Must be defined *after* treebank corpus.
28 | 
29 | 
30 | def query_verbnet_semantic_roles(propbank_id):
31 |     if config.KB_SOURCE == "rdf":
32 |         return query_verbnet_semantic_roles_from_rdf(propbank_id)
33 |     return query_verbnet_semantic_roles_from_corpus(propbank_id)
34 | 
35 | 
36 | def query_verbnet_semantic_roles_from_corpus(propbank_id, verbose=False):
37 |     global propbank
38 |     if propbank is None:
39 |         load_propbank()
40 | 
41 |     if verbose:
42 |         print("\nquery_propbank_roles for propbank_id {}".format(propbank_id))
43 | 
44 |     try:
45 |         role_set = propbank.roleset(propbank_id)
46 |     except Exception as e:
47 |         print(e)
48 |         return None
49 | 
50 |     role_mappings = dict()
51 |     # print("role_set:", ElementTree.tostring(role_set, encoding='unicode'))
52 |     for role in role_set.findall("roles/role"):
53 |         # print("role:", ElementTree.tostring(role, encoding='unicode'))
54 |         for vn_role in role.findall('vnrole'):
55 |             # print("vn_role:", ElementTree.tostring(vn_role, encoding='unicode'))
56 |             arg_key = "ARG{}".format(role.attrib['n'])
57 |             if arg_key not in role_mappings:
58 |                 role_mappings[arg_key] = []
59 |             role_mappings[arg_key].append({
60 |                 "vncls": vn_role.attrib["vncls"],
61 |                 "vntheta": vn_role.attrib["vntheta"],
62 |                 "description": role.attrib['descr']
63 |             })
64 | 
65 |     if verbose:
66 |         print("query_propbank_roles role_mappings:", role_mappings)
67 |     return role_mappings
68 | 
69 | 
70 | def query_propbank_roleset_ids():
71 |     global propbank
72 |     if propbank is None:
73 |         load_propbank()
74 | 
75 |     roleset_ids = set()
76 |     for roleset in propbank.rolesets():
77 |         roleset_ids.add(roleset.attrib["id"])
78 |     return roleset_ids
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     # print(query_propbank_roleset_ids())
83 |     # print(query_verbnet_semantic_roles_from_corpus("make_out.23"))
84 |     # print(query_verbnet_semantic_roles_from_corpus("make"))
85 |     # print(query_verbnet_semantic_roles_from_corpus("make.01"))
86 |     # print(query_verbnet_semantic_roles_from_corpus("make.02"))
87 |     # print(query_verbnet_semantic_roles_from_corpus("make_up.08"))
88 |     # print(query_verbnet_semantic_roles_from_corpus("possible.01"))
89 |     # print(query_verbnet_semantic_roles_from_corpus("green.02"))
90 |     # print(query_verbnet_semantic_roles_from_corpus("except.01"))
91 |     print(query_verbnet_semantic_roles_from_corpus("carry.01"))
92 | 
93 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/web_app/error_handlers.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016, 2021 John J. Rofrano. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # https://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | Module: error_handlers
 17 | """
 18 | from flask import jsonify
 19 | 
 20 | from . import app, status
 21 | 
 22 | 
 23 | class DataValidationError(Exception):
 24 |     """Used for an data validation errors when deserializing"""
 25 | 
 26 | 
 27 | # Error Handlers
 28 | @app.errorhandler(DataValidationError)
 29 | def request_validation_error(error):
 30 |     """Handles Value Errors from bad data"""
 31 |     return bad_request(error)
 32 | 
 33 | 
 34 | @app.errorhandler(status.HTTP_400_BAD_REQUEST)
 35 | def bad_request(error):
 36 |     """Handles bad reuests with 400_BAD_REQUEST"""
 37 |     message = str(error)
 38 |     app.logger.warning(message)
 39 |     return (
 40 |         jsonify(
 41 |             status=status.HTTP_400_BAD_REQUEST,
 42 |             error="Bad Request", message=message
 43 |         ),
 44 |         status.HTTP_400_BAD_REQUEST,
 45 |     )
 46 | 
 47 | 
 48 | @app.errorhandler(status.HTTP_404_NOT_FOUND)
 49 | def not_found(error):
 50 |     """Handles resources not found with 404_NOT_FOUND"""
 51 |     message = str(error)
 52 |     app.logger.warning(message)
 53 |     return (
 54 |         jsonify(status=status.HTTP_404_NOT_FOUND,
 55 |                 error="Not Found", message=message),
 56 |         status.HTTP_404_NOT_FOUND,
 57 |     )
 58 | 
 59 | 
 60 | @app.errorhandler(status.HTTP_405_METHOD_NOT_ALLOWED)
 61 | def method_not_supported(error):
 62 |     """Handles unsuppoted HTTP methods with 405_METHOD_NOT_SUPPORTED"""
 63 |     message = str(error)
 64 |     app.logger.warning(message)
 65 |     return (
 66 |         jsonify(
 67 |             status=status.HTTP_405_METHOD_NOT_ALLOWED,
 68 |             error="Method not Allowed",
 69 |             message=message,
 70 |         ),
 71 |         status.HTTP_405_METHOD_NOT_ALLOWED,
 72 |     )
 73 | 
 74 | 
 75 | @app.errorhandler(status.HTTP_415_UNSUPPORTED_MEDIA_TYPE)
 76 | def mediatype_not_supported(error):
 77 |     """Handles unsuppoted media requests with 415_UNSUPPORTED_MEDIA_TYPE"""
 78 |     message = str(error)
 79 |     app.logger.warning(message)
 80 |     return (
 81 |         jsonify(
 82 |             status=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
 83 |             error="Unsupported media type",
 84 |             message=message,
 85 |         ),
 86 |         status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
 87 |     )
 88 | 
 89 | 
 90 | @app.errorhandler(status.HTTP_500_INTERNAL_SERVER_ERROR)
 91 | def internal_server_error(error):
 92 |     """Handles unexpected server error with 500_SERVER_ERROR"""
 93 |     message = str(error)
 94 |     app.logger.error(message)
 95 |     return (
 96 |         jsonify(
 97 |             status=status.HTTP_500_INTERNAL_SERVER_ERROR,
 98 |             error="Internal Server Error",
 99 |             message=message,
100 |         ),
101 |         status.HTTP_500_INTERNAL_SERVER_ERROR,
102 |     )
103 | 
104 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_rdf_mappings.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import requests
  3 | 
  4 | url_query = "http://goedel.sl.cloud9.ibm.com:9999/blazegraph/namespace/VerbnetAndGroupings/#query"
  5 | 
  6 | # To query verbs in Propbank WITHOUT counterparts in Verbnet
  7 | query_str_1 = """
  8 | prefix rrp:<http://www.ibm.com/RRP#>
  9 | prefix glo:<http://www.ibm.com/GLO_V2#>
 10 | prefix ulvn: <http://www.ibm.com/UL_VN#>
 11 | SELECT DISTINCT ?PropbankMapID  WHERE {
 12 |   ?mappingGroup a rrp:MappingGroup .
 13 |   ?mappingGroup rrp:hasComponent ?mapping .
 14 |   #Verbnet _1
 15 |   FILTER NOT EXISTS {
 16 |   	?mapping rrp:mapsTo ?vnsense .
 17 |   	ulvn:VerbNet rrp:hasComponent ?vnsense.
 18 |   	?vnsense rrp:identifier ?VerbnetMapID .
 19 |   }
 20 |   #PropBank
 21 |   ?mapping rrp:mapsTo ?pbsense .
 22 |   ulvn:PropBank rrp:hasComponent ?pbsense.
 23 |   ?pbsense rrp:identifier ?PropbankMapID .
 24 |   ?mappingGroup rdfs:label ?mappingGroupName .
 25 |   } ORDER BY ?target
 26 | """
 27 | 
 28 | # To query examples of Propbank with at least two Verbnets
 29 | query_str_2 = """
 30 | prefix rrp:<http://www.ibm.com/RRP#>
 31 | prefix glo:<http://www.ibm.com/GLO_V2#>
 32 | prefix ulvn: <http://www.ibm.com/UL_VN#>
 33 | SELECT DISTINCT ?mappingGroupName ?PropbankMapID ?VerbnetMapID_1 ?VerbnetMapID_2 WHERE {
 34 |   ?mappingGroup a rrp:MappingGroup .
 35 |   ?mappingGroup rrp:hasComponent ?mapping .
 36 |   #Verbnet _1
 37 |   ?mapping rrp:mapsTo ?vnsense .
 38 |   ulvn:VerbNet rrp:hasComponent ?vnsense.
 39 |   ?vnsense rrp:identifier ?VerbnetMapID_1 .
 40 |   #Verbnet _1
 41 |   ?mapping rrp:mapsTo ?vnsense_2 .
 42 |   ulvn:VerbNet rrp:hasComponent ?vnsense_2.
 43 |   ?vnsense_2 rrp:identifier ?VerbnetMapID_2 .
 44 |   #PropBank
 45 |   ?mapping rrp:mapsTo ?pbsense .
 46 |   ulvn:PropBank rrp:hasComponent ?pbsense.
 47 |   ?pbsense rrp:identifier ?PropbankMapID .
 48 |   FILTER(?vnsense != ?vnsense_2)
 49 |   ?mappingGroup rdfs:label ?mappingGroupName .
 50 |   } ORDER BY ?target
 51 | """
 52 | 
 53 | # To query all PropBank and VerbNet mappings
 54 | query_mapping_str = """
 55 | prefix rrp:<http://www.ibm.com/RRP#>
 56 | prefix glo:<http://www.ibm.com/GLO_V2#>
 57 | prefix ulvn: <http://www.ibm.com/UL_VN#>
 58 | SELECT DISTINCT ?mappingGroupName ?PropbankMapID ?VerbnetMapID WHERE {
 59 |   ?mappingGroup a rrp:MappingGroup .
 60 |   ?mappingGroup rrp:hasComponent ?mapping .
 61 |   
 62 |   #Verbnet _1
 63 |   ?mapping rrp:mapsTo ?vnsense .
 64 |   ulvn:VerbNet rrp:hasComponent ?vnsense.
 65 |   ?vnsense rrp:identifier ?VerbnetMapID .
 66 | 
 67 |   #PropBank
 68 |   ?mapping rrp:mapsTo ?pbsense .
 69 |   ulvn:PropBank rrp:hasComponent ?pbsense.
 70 |   ?pbsense rrp:identifier ?PropbankMapID .
 71 | 
 72 |   ?mappingGroup rdfs:label ?mappingGroupName .
 73 |   } ORDER BY ?mappingGroupName
 74 | """
 75 | 
 76 | 
 77 | def request_wrapper(url, query, result_type="json"):
 78 |     try:
 79 |         print("query:", query)
 80 |         return requests.get(url, params={'format': result_type, 'query': query})
 81 |     except requests.exceptions.Timeout:
 82 |         print("TIMEOUT while processing \n" + query)
 83 |         return
 84 |     except requests.exceptions.TooManyRedirects:
 85 |         print("WRONG URL while processing \n" + query)
 86 |         return
 87 |     except requests.exceptions.RequestException as e:
 88 |         print("CATASTROPHIC ERROR " + str(e) + " while processing \n" + query)
 89 |         return
 90 |     except KeyboardInterrupt:
 91 |         raise
 92 |     except Exception as e:
 93 |         print("Unknown error " + str(e) + " while processing  \n" + query)
 94 |         return
 95 | 
 96 | 
 97 | if __name__ == '__main__':
 98 |     print(request_wrapper(url_query, query_str_1).json())
 99 |     input()
100 |     print(request_wrapper(url_query, query_str_2).json())
101 | 
102 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/utils/reification_util.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module to deal with reification of AMR parse.
  3 | """
  4 | import csv
  5 | from typing import Optional, Dict, Set, List, Tuple
  6 | import logging
  7 | 
  8 | from penman.types import (Variable, Target, BasicTriple, Node)
  9 | from penman.exceptions import ModelError
 10 | from penman.epigraph import (Epidatum, Epidata)
 11 | from penman.surface import (Alignment, RoleAlignment, alignments)
 12 | from penman.tree import (Tree, is_atomic)
 13 | from penman.graph import (Graph, CONCEPT_ROLE)
 14 | from penman.model import Model
 15 | from penman.layout import (
 16 |     Push,
 17 |     Pop,
 18 |     POP,
 19 |     appears_inverted,
 20 |     get_pushed_variable,
 21 | )
 22 | 
 23 | from penman.codec import PENMANCodec
 24 | from penman.models.amr import model
 25 | from penman.transform import _SplitMarkers, _reified_markers, _edge_markers
 26 | 
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | MAPPING_FILE_PATH = "data/reification/mappings.tsv"
 30 | 
 31 | 
 32 | def read_mappings(path):
 33 |     with open(path) as file:
 34 |         tsv_file = csv.reader(file, delimiter="\t")
 35 |         next(tsv_file)
 36 | 
 37 |         for line in tsv_file:
 38 |             print(line)
 39 | 
 40 | 
 41 | def reify_edges(g: Graph, model: Model) -> Graph:
 42 |     """
 43 |     Reify all edges in *g* that have reifications in *model*.
 44 | 
 45 |     Args:
 46 |         g: a :class:`~penman.graph.Graph` object
 47 |         model: a model defining reifications
 48 |     Returns:
 49 |         A new :class:`~penman.graph.Graph` object with reified edges.
 50 |     Example:
 51 |         >>> from penman.codec import PENMANCodec
 52 |         >>> from penman.models.amr import model
 53 |         >>> from penman.transform import reify_edges
 54 |         >>> codec = PENMANCodec(model=model)
 55 |         >>> g = codec.decode('(c / chapter :mod 7)')
 56 |         >>> g = reify_edges(g, model)
 57 |         >>> print(codec.encode(g))
 58 |         (c / chapter
 59 |            :ARG1-of (_ / have-mod-91
 60 |                        :ARG2 7))
 61 |     """
 62 |     vars = g.variables()
 63 |     if model is None:
 64 |         model = Model()
 65 |     new_epidata = dict(g.epidata)
 66 |     new_triples: List[BasicTriple] = []
 67 |     for triple in g.triples:
 68 |         if model.is_role_reifiable(triple[1]):
 69 |             in_triple, node_triple, out_triple = model.reify(triple, vars)
 70 |             if appears_inverted(g, triple):
 71 |                 in_triple, out_triple = out_triple, in_triple
 72 |             var = node_triple[2][0]
 73 |             # rename variable if it already exists
 74 |             if var in vars:
 75 |                 for var_idx in range(2, 100):
 76 |                     cur_var = "{}{}".format(var, var_idx)
 77 |                     if cur_var not in vars:
 78 |                         var = cur_var
 79 |                         break
 80 |             node_triple = (var, node_triple[1], node_triple[2])
 81 |             in_triple = (var, in_triple[1], in_triple[2])
 82 |             out_triple = (var, out_triple[1], out_triple[2])
 83 |             new_triples.extend((in_triple, node_triple, out_triple))
 84 |             vars.add(var)
 85 |             # manage epigraphical markers
 86 |             new_epidata[in_triple] = [Push(var)]
 87 |             old_epis = new_epidata.pop(triple) if triple in new_epidata else []
 88 |             node_epis, out_epis = _edge_markers(old_epis)
 89 |             new_epidata[node_triple] = node_epis
 90 |             new_epidata[out_triple] = out_epis
 91 |             # we don't know where to put the final POP without configuring
 92 |             # the tree; maybe this should be a tree operation?
 93 |         else:
 94 |             new_triples.append(triple)
 95 | 
 96 |     g = Graph(new_triples,
 97 |               epidata=new_epidata,
 98 |               metadata=g.metadata)
 99 |     logger.info('Reified edges: %s', g)
100 |     return g
101 | 
102 | 
103 | def reify_amr(amr):
104 |     codec = PENMANCodec(model=model)
105 |     g = codec.decode(amr)
106 |     g = reify_edges(g, model)
107 |     reified_amr = codec.encode(g)
108 |     return reified_amr
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     read_mappings(MAPPING_FILE_PATH)
113 | 
114 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/grpc_defs/ace_pb2_grpc.py:
--------------------------------------------------------------------------------
  1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
  2 | import grpc
  3 | 
  4 | from . import ACEDoc_pb2 as ACEDoc__pb2
  5 | from . import ace_pb2 as ace__pb2
  6 | 
  7 | 
  8 | class aceStub(object):
  9 |   # missing associated documentation comment in .proto file
 10 |   pass
 11 | 
 12 |   def __init__(self, channel):
 13 |     """Constructor.
 14 | 
 15 |     Args:
 16 |       channel: A grpc.Channel.
 17 |     """
 18 |     self.process_text = channel.unary_unary(
 19 |         '/ace/process_text',
 20 |         request_serializer=ace__pb2.acedoc_request.SerializeToString,
 21 |         response_deserializer=ACEDoc__pb2.acedoc.FromString,
 22 |         )
 23 |     self.process_doc = channel.unary_unary(
 24 |         '/ace/process_doc',
 25 |         request_serializer=ACEDoc__pb2.acedoc.SerializeToString,
 26 |         response_deserializer=ACEDoc__pb2.acedoc.FromString,
 27 |         )
 28 |     self.process_xml = channel.unary_unary(
 29 |         '/ace/process_xml',
 30 |         request_serializer=ace__pb2.acedoc_xml.SerializeToString,
 31 |         response_deserializer=ACEDoc__pb2.acedoc.FromString,
 32 |         )
 33 |     self.to_xml = channel.unary_unary(
 34 |         '/ace/to_xml',
 35 |         request_serializer=ACEDoc__pb2.acedoc.SerializeToString,
 36 |         response_deserializer=ace__pb2.acedoc_xml.FromString,
 37 |         )
 38 |     self.to_doc = channel.unary_unary(
 39 |         '/ace/to_doc',
 40 |         request_serializer=ace__pb2.acedoc_xml.SerializeToString,
 41 |         response_deserializer=ACEDoc__pb2.acedoc.FromString,
 42 |         )
 43 | 
 44 | 
 45 | class aceServicer(object):
 46 |   # missing associated documentation comment in .proto file
 47 |   pass
 48 | 
 49 |   def process_text(self, request, context):
 50 |     # missing associated documentation comment in .proto file
 51 |     context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 52 |     context.set_details('Method not implemented!')
 53 |     raise NotImplementedError('Method not implemented!')
 54 | 
 55 |   def process_doc(self, request, context):
 56 |     # missing associated documentation comment in .proto file
 57 |     context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 58 |     context.set_details('Method not implemented!')
 59 |     raise NotImplementedError('Method not implemented!')
 60 | 
 61 |   def process_xml(self, request, context):
 62 |     # missing associated documentation comment in .proto file
 63 |     context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 64 |     context.set_details('Method not implemented!')
 65 |     raise NotImplementedError('Method not implemented!')
 66 | 
 67 |   def to_xml(self, request, context):
 68 |     # missing associated documentation comment in .proto file
 69 |     context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 70 |     context.set_details('Method not implemented!')
 71 |     raise NotImplementedError('Method not implemented!')
 72 | 
 73 |   def to_doc(self, request, context):
 74 |     # missing associated documentation comment in .proto file
 75 |     context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 76 |     context.set_details('Method not implemented!')
 77 |     raise NotImplementedError('Method not implemented!')
 78 | 
 79 | 
 80 | def add_aceServicer_to_server(servicer, server):
 81 |   rpc_method_handlers = {
 82 |       'process_text': grpc.unary_unary_rpc_method_handler(
 83 |           servicer.process_text,
 84 |           request_deserializer=ace__pb2.acedoc_request.FromString,
 85 |           response_serializer=ACEDoc__pb2.acedoc.SerializeToString,
 86 |       ),
 87 |       'process_doc': grpc.unary_unary_rpc_method_handler(
 88 |           servicer.process_doc,
 89 |           request_deserializer=ACEDoc__pb2.acedoc.FromString,
 90 |           response_serializer=ACEDoc__pb2.acedoc.SerializeToString,
 91 |       ),
 92 |       'process_xml': grpc.unary_unary_rpc_method_handler(
 93 |           servicer.process_xml,
 94 |           request_deserializer=ace__pb2.acedoc_xml.FromString,
 95 |           response_serializer=ACEDoc__pb2.acedoc.SerializeToString,
 96 |       ),
 97 |       'to_xml': grpc.unary_unary_rpc_method_handler(
 98 |           servicer.to_xml,
 99 |           request_deserializer=ACEDoc__pb2.acedoc.FromString,
100 |           response_serializer=ace__pb2.acedoc_xml.SerializeToString,
101 |       ),
102 |       'to_doc': grpc.unary_unary_rpc_method_handler(
103 |           servicer.to_doc,
104 |           request_deserializer=ace__pb2.acedoc_xml.FromString,
105 |           response_serializer=ACEDoc__pb2.acedoc.SerializeToString,
106 |       ),
107 |   }
108 |   generic_handler = grpc.method_handlers_generic_handler(
109 |       'ace', rpc_method_handlers)
110 |   server.add_generic_rpc_handlers((generic_handler,))


--------------------------------------------------------------------------------
/amr_verbnet_semantics/grpc_defs/enhanced_amr_pb2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
  3 | # source: enhanced_amr.proto
  4 | 
  5 | from google.protobuf import descriptor as _descriptor
  6 | from google.protobuf import message as _message
  7 | from google.protobuf import reflection as _reflection
  8 | from google.protobuf import symbol_database as _symbol_database
  9 | 
 10 | # @@protoc_insertion_point(imports)
 11 | 
 12 | _sym_db = _symbol_database.Default()
 13 | 
 14 | 
 15 | 
 16 | 
 17 | DESCRIPTOR = _descriptor.FileDescriptor(
 18 |   name='enhanced_amr.proto',
 19 |   package='',
 20 |   syntax='proto3',
 21 |   serialized_options=None,
 22 |   serialized_pb=b'\n\x12\x65nhanced_amr.proto\":\n\x08Sentence\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x0f\n\x07has_amr\x18\x02 \x01(\x08\x12\x0f\n\x07org_amr\x18\x03 \x01(\t\"2\n\x0c\x45nhanced_amr\x12\x0b\n\x03\x61mr\x18\x01 \x01(\t\x12\x15\n\rtopk_entities\x18\x02 \x01(\t2>\n\x0c\x41MR_Enhancer\x12.\n\x10get_enhanced_amr\x12\t.Sentence\x1a\r.Enhanced_amr\"\x00\x62\x06proto3'
 23 | )
 24 | 
 25 | 
 26 | 
 27 | 
 28 | _SENTENCE = _descriptor.Descriptor(
 29 |   name='Sentence',
 30 |   full_name='Sentence',
 31 |   filename=None,
 32 |   file=DESCRIPTOR,
 33 |   containing_type=None,
 34 |   fields=[
 35 |     _descriptor.FieldDescriptor(
 36 |       name='text', full_name='Sentence.text', index=0,
 37 |       number=1, type=9, cpp_type=9, label=1,
 38 |       has_default_value=False, default_value=b"".decode('utf-8'),
 39 |       message_type=None, enum_type=None, containing_type=None,
 40 |       is_extension=False, extension_scope=None,
 41 |       serialized_options=None, file=DESCRIPTOR),
 42 |     _descriptor.FieldDescriptor(
 43 |       name='has_amr', full_name='Sentence.has_amr', index=1,
 44 |       number=2, type=8, cpp_type=7, label=1,
 45 |       has_default_value=False, default_value=False,
 46 |       message_type=None, enum_type=None, containing_type=None,
 47 |       is_extension=False, extension_scope=None,
 48 |       serialized_options=None, file=DESCRIPTOR),
 49 |     _descriptor.FieldDescriptor(
 50 |       name='org_amr', full_name='Sentence.org_amr', index=2,
 51 |       number=3, type=9, cpp_type=9, label=1,
 52 |       has_default_value=False, default_value=b"".decode('utf-8'),
 53 |       message_type=None, enum_type=None, containing_type=None,
 54 |       is_extension=False, extension_scope=None,
 55 |       serialized_options=None, file=DESCRIPTOR),
 56 |   ],
 57 |   extensions=[
 58 |   ],
 59 |   nested_types=[],
 60 |   enum_types=[
 61 |   ],
 62 |   serialized_options=None,
 63 |   is_extendable=False,
 64 |   syntax='proto3',
 65 |   extension_ranges=[],
 66 |   oneofs=[
 67 |   ],
 68 |   serialized_start=22,
 69 |   serialized_end=80,
 70 | )
 71 | 
 72 | 
 73 | _ENHANCED_AMR = _descriptor.Descriptor(
 74 |   name='Enhanced_amr',
 75 |   full_name='Enhanced_amr',
 76 |   filename=None,
 77 |   file=DESCRIPTOR,
 78 |   containing_type=None,
 79 |   fields=[
 80 |     _descriptor.FieldDescriptor(
 81 |       name='amr', full_name='Enhanced_amr.amr', index=0,
 82 |       number=1, type=9, cpp_type=9, label=1,
 83 |       has_default_value=False, default_value=b"".decode('utf-8'),
 84 |       message_type=None, enum_type=None, containing_type=None,
 85 |       is_extension=False, extension_scope=None,
 86 |       serialized_options=None, file=DESCRIPTOR),
 87 |     _descriptor.FieldDescriptor(
 88 |       name='topk_entities', full_name='Enhanced_amr.topk_entities', index=1,
 89 |       number=2, type=9, cpp_type=9, label=1,
 90 |       has_default_value=False, default_value=b"".decode('utf-8'),
 91 |       message_type=None, enum_type=None, containing_type=None,
 92 |       is_extension=False, extension_scope=None,
 93 |       serialized_options=None, file=DESCRIPTOR),
 94 |   ],
 95 |   extensions=[
 96 |   ],
 97 |   nested_types=[],
 98 |   enum_types=[
 99 |   ],
100 |   serialized_options=None,
101 |   is_extendable=False,
102 |   syntax='proto3',
103 |   extension_ranges=[],
104 |   oneofs=[
105 |   ],
106 |   serialized_start=82,
107 |   serialized_end=132,
108 | )
109 | 
110 | DESCRIPTOR.message_types_by_name['Sentence'] = _SENTENCE
111 | DESCRIPTOR.message_types_by_name['Enhanced_amr'] = _ENHANCED_AMR
112 | _sym_db.RegisterFileDescriptor(DESCRIPTOR)
113 | 
114 | Sentence = _reflection.GeneratedProtocolMessageType('Sentence', (_message.Message,), {
115 |   'DESCRIPTOR' : _SENTENCE,
116 |   '__module__' : 'enhanced_amr_pb2'
117 |   # @@protoc_insertion_point(class_scope:Sentence)
118 |   })
119 | _sym_db.RegisterMessage(Sentence)
120 | 
121 | Enhanced_amr = _reflection.GeneratedProtocolMessageType('Enhanced_amr', (_message.Message,), {
122 |   'DESCRIPTOR' : _ENHANCED_AMR,
123 |   '__module__' : 'enhanced_amr_pb2'
124 |   # @@protoc_insertion_point(class_scope:Enhanced_amr)
125 |   })
126 | _sym_db.RegisterMessage(Enhanced_amr)
127 | 
128 | 
129 | 
130 | _AMR_ENHANCER = _descriptor.ServiceDescriptor(
131 |   name='AMR_Enhancer',
132 |   full_name='AMR_Enhancer',
133 |   file=DESCRIPTOR,
134 |   index=0,
135 |   serialized_options=None,
136 |   serialized_start=134,
137 |   serialized_end=196,
138 |   methods=[
139 |   _descriptor.MethodDescriptor(
140 |     name='get_enhanced_amr',
141 |     full_name='AMR_Enhancer.get_enhanced_amr',
142 |     index=0,
143 |     containing_service=None,
144 |     input_type=_SENTENCE,
145 |     output_type=_ENHANCED_AMR,
146 |     serialized_options=None,
147 |   ),
148 | ])
149 | _sym_db.RegisterServiceDescriptor(_AMR_ENHANCER)
150 | 
151 | DESCRIPTOR.services_by_name['AMR_Enhancer'] = _AMR_ENHANCER
152 | 
153 | # @@protoc_insertion_point(module_scope)


--------------------------------------------------------------------------------
/amr_verbnet_semantics/grpc_defs/ace_pb2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
  3 | # source: ace.proto
  4 | 
  5 | import sys
  6 | 
  7 | _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
  8 | from google.protobuf import descriptor as _descriptor
  9 | from google.protobuf import message as _message
 10 | from google.protobuf import reflection as _reflection
 11 | from google.protobuf import symbol_database as _symbol_database
 12 | 
 13 | # @@protoc_insertion_point(imports)
 14 | 
 15 | _sym_db = _symbol_database.Default()
 16 | 
 17 | 
 18 | from . import ACEDoc_pb2 as ACEDoc__pb2
 19 | 
 20 | DESCRIPTOR = _descriptor.FileDescriptor(
 21 |   name='ace.proto',
 22 |   package='',
 23 |   syntax='proto3',
 24 |   serialized_options=None,
 25 |   serialized_pb=_b('\n\tace.proto\x1a\x0c\x41\x43\x45\x44oc.proto\".\n\x0e\x61\x63\x65\x64oc_request\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x0e\n\x06\x64oc_id\x18\x02 \x01(\t\"\x19\n\nacedoc_xml\x12\x0b\n\x03xml\x18\x01 \x01(\t2\xb5\x01\n\x03\x61\x63\x65\x12(\n\x0cprocess_text\x12\x0f.acedoc_request\x1a\x07.acedoc\x12\x1f\n\x0bprocess_doc\x12\x07.acedoc\x1a\x07.acedoc\x12#\n\x0bprocess_xml\x12\x0b.acedoc_xml\x1a\x07.acedoc\x12\x1e\n\x06to_xml\x12\x07.acedoc\x1a\x0b.acedoc_xml\x12\x1e\n\x06to_doc\x12\x0b.acedoc_xml\x1a\x07.acedocb\x06proto3')
 26 |   ,
 27 |   dependencies=[ACEDoc__pb2.DESCRIPTOR,])
 28 | 
 29 | 
 30 | 
 31 | 
 32 | _ACEDOC_REQUEST = _descriptor.Descriptor(
 33 |   name='acedoc_request',
 34 |   full_name='acedoc_request',
 35 |   filename=None,
 36 |   file=DESCRIPTOR,
 37 |   containing_type=None,
 38 |   fields=[
 39 |     _descriptor.FieldDescriptor(
 40 |       name='text', full_name='acedoc_request.text', index=0,
 41 |       number=1, type=9, cpp_type=9, label=1,
 42 |       has_default_value=False, default_value=_b("").decode('utf-8'),
 43 |       message_type=None, enum_type=None, containing_type=None,
 44 |       is_extension=False, extension_scope=None,
 45 |       serialized_options=None, file=DESCRIPTOR),
 46 |     _descriptor.FieldDescriptor(
 47 |       name='doc_id', full_name='acedoc_request.doc_id', index=1,
 48 |       number=2, type=9, cpp_type=9, label=1,
 49 |       has_default_value=False, default_value=_b("").decode('utf-8'),
 50 |       message_type=None, enum_type=None, containing_type=None,
 51 |       is_extension=False, extension_scope=None,
 52 |       serialized_options=None, file=DESCRIPTOR),
 53 |   ],
 54 |   extensions=[
 55 |   ],
 56 |   nested_types=[],
 57 |   enum_types=[
 58 |   ],
 59 |   serialized_options=None,
 60 |   is_extendable=False,
 61 |   syntax='proto3',
 62 |   extension_ranges=[],
 63 |   oneofs=[
 64 |   ],
 65 |   serialized_start=27,
 66 |   serialized_end=73,
 67 | )
 68 | 
 69 | 
 70 | _ACEDOC_XML = _descriptor.Descriptor(
 71 |   name='acedoc_xml',
 72 |   full_name='acedoc_xml',
 73 |   filename=None,
 74 |   file=DESCRIPTOR,
 75 |   containing_type=None,
 76 |   fields=[
 77 |     _descriptor.FieldDescriptor(
 78 |       name='xml', full_name='acedoc_xml.xml', index=0,
 79 |       number=1, type=9, cpp_type=9, label=1,
 80 |       has_default_value=False, default_value=_b("").decode('utf-8'),
 81 |       message_type=None, enum_type=None, containing_type=None,
 82 |       is_extension=False, extension_scope=None,
 83 |       serialized_options=None, file=DESCRIPTOR),
 84 |   ],
 85 |   extensions=[
 86 |   ],
 87 |   nested_types=[],
 88 |   enum_types=[
 89 |   ],
 90 |   serialized_options=None,
 91 |   is_extendable=False,
 92 |   syntax='proto3',
 93 |   extension_ranges=[],
 94 |   oneofs=[
 95 |   ],
 96 |   serialized_start=75,
 97 |   serialized_end=100,
 98 | )
 99 | 
100 | DESCRIPTOR.message_types_by_name['acedoc_request'] = _ACEDOC_REQUEST
101 | DESCRIPTOR.message_types_by_name['acedoc_xml'] = _ACEDOC_XML
102 | _sym_db.RegisterFileDescriptor(DESCRIPTOR)
103 | 
104 | acedoc_request = _reflection.GeneratedProtocolMessageType('acedoc_request', (_message.Message,), {
105 |   'DESCRIPTOR' : _ACEDOC_REQUEST,
106 |   '__module__' : 'ace_pb2'
107 |   # @@protoc_insertion_point(class_scope:acedoc_request)
108 |   })
109 | _sym_db.RegisterMessage(acedoc_request)
110 | 
111 | acedoc_xml = _reflection.GeneratedProtocolMessageType('acedoc_xml', (_message.Message,), {
112 |   'DESCRIPTOR' : _ACEDOC_XML,
113 |   '__module__' : 'ace_pb2'
114 |   # @@protoc_insertion_point(class_scope:acedoc_xml)
115 |   })
116 | _sym_db.RegisterMessage(acedoc_xml)
117 | 
118 | 
119 | 
120 | _ACE = _descriptor.ServiceDescriptor(
121 |   name='ace',
122 |   full_name='ace',
123 |   file=DESCRIPTOR,
124 |   index=0,
125 |   serialized_options=None,
126 |   serialized_start=103,
127 |   serialized_end=284,
128 |   methods=[
129 |   _descriptor.MethodDescriptor(
130 |     name='process_text',
131 |     full_name='ace.process_text',
132 |     index=0,
133 |     containing_service=None,
134 |     input_type=_ACEDOC_REQUEST,
135 |     output_type=ACEDoc__pb2._ACEDOC,
136 |     serialized_options=None,
137 |   ),
138 |   _descriptor.MethodDescriptor(
139 |     name='process_doc',
140 |     full_name='ace.process_doc',
141 |     index=1,
142 |     containing_service=None,
143 |     input_type=ACEDoc__pb2._ACEDOC,
144 |     output_type=ACEDoc__pb2._ACEDOC,
145 |     serialized_options=None,
146 |   ),
147 |   _descriptor.MethodDescriptor(
148 |     name='process_xml',
149 |     full_name='ace.process_xml',
150 |     index=2,
151 |     containing_service=None,
152 |     input_type=_ACEDOC_XML,
153 |     output_type=ACEDoc__pb2._ACEDOC,
154 |     serialized_options=None,
155 |   ),
156 |   _descriptor.MethodDescriptor(
157 |     name='to_xml',
158 |     full_name='ace.to_xml',
159 |     index=3,
160 |     containing_service=None,
161 |     input_type=ACEDoc__pb2._ACEDOC,
162 |     output_type=_ACEDOC_XML,
163 |     serialized_options=None,
164 |   ),
165 |   _descriptor.MethodDescriptor(
166 |     name='to_doc',
167 |     full_name='ace.to_doc',
168 |     index=4,
169 |     containing_service=None,
170 |     input_type=_ACEDOC_XML,
171 |     output_type=ACEDoc__pb2._ACEDOC,
172 |     serialized_options=None,
173 |   ),
174 | ])
175 | _sym_db.RegisterServiceDescriptor(_ACE)
176 | 
177 | DESCRIPTOR.services_by_name['ace'] = _ACE
178 | 
179 | # @@protoc_insertion_point(module_scope)


--------------------------------------------------------------------------------
/INSTALLATION.md:
--------------------------------------------------------------------------------
  1 | # INSTALLATION: amr-verbnet-semantics
  2 | 
  3 | # CUDA
  4 | 
  5 | - Supported CUDA 10.2
  6 |   - Note: `thinc` gives an error with CUDA 11.
  7 |   - <details><summary>Install commands for CUDA 10.2 on Ubuntu 20</summary><div>
  8 |   
  9 |     ```bash
 10 |     # download toolkit from https://developer.nvidia.com/cuda-10.2-download-archive
 11 |     # download cudnn from https://developer.nvidia.com/rdp/cudnn-archive#a-collapse830-102
 12 |     
 13 |     # set gcc-9 priority
 14 |     sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 90
 15 |     sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 90
 16 |     
 17 |     # install gcc-7
 18 |     sudo apt-get install gcc-7 g++-7
 19 |     sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 100
 20 |     sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-7 100
 21 |     
 22 |     # install cuda
 23 |     sudo sh cuda_10.2.89_440.33.01_linux.run
 24 |    
 25 |     # add these paths in .bashrc
 26 |     nano ~/.bashrc
 27 |     # export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:$LD_LIBRARY_PATH
 28 |     # export PATH=/usr/local/cuda-10.2/bin:$PATH
 29 |     
 30 |     # install cudnn
 31 |     sudo dpkg -i cudnn-local-repo-ubuntu1804-8.3.1.22_1.0-1_amd64.deb
 32 |    
 33 |     # check cuda version
 34 |     nvcc --version
 35 |      
 36 |     # if you want to decrease the priority for gcc-7
 37 |     sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 70
 38 |     sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-7 70
 39 |     ```
 40 |     </div></details>
 41 | 
 42 | ## Create virtual environment
 43 | 
 44 | ```bash
 45 | conda create -n amr-verbnet python=3.7
 46 | conda activate amr-verbnet
 47 | ```
 48 | 
 49 | ## Install dependencies
 50 | If we want to visualize the enhanced AMR graph, we need to install the pygraphviz package, which requires installing graphviz first:
 51 | 
 52 | ```bash
 53 | # Using Conda
 54 | conda install -c anaconda graphviz
 55 | CONDA_HOME=$(which conda); pip install --global-option=build_ext --global-option="-I{$CONDA_HOME}/envs/amr-verbnet/include" --global-option="-L{$CONDA_HOME}/envs/amr-verbnet/lib" --global-option="-R{$CONDA_HOME}/envs/amr-verbnet/lib" pygraphviz
 56 | 
 57 | # Ubuntu and Debian
 58 | sudo apt-get install graphviz graphviz-dev
 59 | pip install pygraphviz
 60 | 
 61 | # macOS
 62 | brew install graphviz
 63 | pip install pygraphviz
 64 | ```
 65 | 
 66 | You can know how to install pygraphviz on other environments with 
 67 | [the document](https://pygraphviz.github.io/documentation/stable/install.html).
 68 | 
 69 | Next we install our git repository as a standalone python library so that it can be called from other projects. Specifically, it is installed under the project root directory.
 70 | ```bash
 71 | bash scripts/install.sh
 72 | ```
 73 | Note that the scripts/install.sh downloads some NLTK corpora. If you already have some existing corpora under ~/nltk-data, please do backup accordingly.
 74 | 
 75 | ## Create config file
 76 | Note that we use YAML config file to set app specific parameters. To get started, create your own local config file using config_template.yaml file and customize values of different fields if needed and save with name config.yaml.
 77 | 
 78 | ```bash
 79 | cp config_template.yaml config.yaml
 80 | nano config.yaml
 81 | ```
 82 | 
 83 | ## Setup knowledge base
 84 | (1) unzip .ttl.zip file from KG folder for uploading to Blazegraph later.
 85 | 
 86 | ```bash
 87 | unzip KG/UL_KB_V5_PUB.ttl.zip
 88 | ```
 89 | 
 90 | (2) start a database server with the command
 91 | 
 92 | ```bash
 93 | cd blazegraph
 94 | java -server -Xmx32g -Djetty.port=9999 -jar blazegraph.jar
 95 | ```
 96 | 
 97 | You can see the server through `http://127.0.0.1:9999/blazegraph/`. 
 98 | 
 99 | (3) On the ‘namespaces’ tab, create a new namespace with the name you want, e.g. UL_KB_V5_PUB. Make sure you tick ‘inference’ and ‘full text index’  
100 | <img src="./assets/blazegraph_install_1.jpg">
101 | 
102 | (4) On the ‘update’ tab, drag and drop the .ttl file that you downloaded and unzipped in step (1), then click ‘update’.
103 | <img src="./assets/blazegraph_install_2.jpg">
104 | 
105 | (5) On the ‘namespaces’ tab, confirm that the namespace you want to use is running. 
106 | <img src="./assets/blazegraph_install_3.jpg">
107 | 
108 | (6) set the SPARQL_ENDPOINT address in the config.yaml file, e.g.
109 | 
110 | ```yaml
111 | SPARQL_ENDPOINT: "http://localhost:9999/blazegraph/namespace/UL_KB_V5_PUB"
112 | ```
113 | 
114 | ## Download AMR parsing models for local use
115 | Note that the scripts/install.sh creates `third_party` directory to store the AMR models. To use it locally, you have to download the pre-trained model file for AMR from the following path on CCC. Then you have to unzip the file in `third_party` directory. If you use the default values of AMR_MODEL_CHECKPOINT_PATH and THIRD_PARTY_PATH from the config template, you are good to go. Otherwise, set them accordingly.
116 |  
117 | `/dccstor/ykt-parse/SHARED/MODELS/AMR/transition-amr-parser/amr2.0_v0.4.1_youngsuk_ensemble_destillation.zip`
118 | 
119 | ## Set up the PYTHONPATH environment variable
120 | All test python code should be called from the project root directory, which is what the PYTHONPATH environment variable should be set to.
121 | 
122 | ```bash
123 | export PYTHONPATH=.
124 | ```
125 | 
126 | ## FLASK server
127 | If you would like to use FLASK to call the parsing as a web service with the advantage of faster response, set the following in the config.yaml file:
128 | 
129 | ```yaml
130 | USE_FLASK: true
131 | ```
132 | 
133 | To start FLASK server, run
134 | 
135 | ```bash
136 | export FLASK_APP=./amr_verbnet_semantics/web_app/__init__.py; export AMR_LOCAL_SERVICE_PORT=5000; python -m flask run --host=0.0.0.0 --port=5000
137 | ```
138 | 
139 | The Flask logs indicate what URL the service is running on.
140 | 
141 | To test the service, try a test example:
142 | 
143 | ```bash
144 | python amr_verbnet_semantics/test/test_service.py
145 | ```
146 | 
147 | Otherwise set it to false to call the package directly:
148 | 
149 | ```yaml
150 | USE_FLASK: false
151 | ```
152 | 
153 | and then try a test example:
154 | 
155 | ```bash
156 | python amr_verbnet_semantics/test/test_local_amr_client.py
157 | ```
158 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/utils/amr_util.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utility functions for AMR processing
  3 | """
  4 | import os
  5 | import json
  6 | from tqdm import tqdm, trange
  7 | 
  8 | from amr_verbnet_semantics.service.amr import amr_client
  9 | 
 10 | 
 11 | def build_amr_parse_cache(data, sample_generator, output_path, start_idx=0,
 12 |                           extractable_only=True, verbose=False):
 13 |     """
 14 |     Build a cache of AMR parse for the training/test data.
 15 |     :param data: the list of samples for AMR parsing
 16 |     :param output_path: the output path of AMR parse cache
 17 |     :param start_idx: the index of sample to start with
 18 |     :param extractable_only: if a sentence to parse needs to be mapped
 19 |         to at least one triple in the KG
 20 |     :param verbose:
 21 |     :return:
 22 |     """
 23 |     output_dir = os.path.dirname(output_path)
 24 |     if not os.path.exists(output_dir):
 25 |         os.makedirs(output_dir)
 26 | 
 27 |     f = open(output_path, "a")
 28 |     for sample_idx, sentences in enumerate(tqdm(sample_generator(
 29 |             data, extractable_only=extractable_only, verbose=verbose))):
 30 |         if sample_idx < start_idx:
 31 |             continue
 32 | 
 33 |         sentence_parses = []
 34 |         for text in sentences:
 35 |             amr = amr_client.get_amr(text)
 36 |             sentence_parses.append({
 37 |                 "sent": text,
 38 |                 "amr": amr
 39 |             })
 40 |             print("\nsample_idx:", sample_idx)
 41 |             print("text:", text)
 42 |             print("amr:\n", amr)
 43 |         f.write(str(sample_idx))
 44 |         f.write("\t")
 45 |         f.write(json.dumps(sentence_parses))
 46 |         f.write("\n")
 47 |     f.close()
 48 |     print("AMR cache DONE.")
 49 | 
 50 | 
 51 | def load_amr_cache(path):
 52 |     """
 53 |     Load AMR cache from file.
 54 |     :param path: the path to the cache file
 55 |     :return:
 56 |     """
 57 |     amr_cache = dict()
 58 |     with open(path, "r") as f:
 59 |         for line in f:
 60 |             sample_idx, amr_str = line.strip().split("\t")
 61 |             sentences = json.loads(amr_str)
 62 |             amr_cache[int(sample_idx)] = sentences
 63 |     return amr_cache
 64 | 
 65 | 
 66 | def double_quote(json_str):
 67 |     return json_str.replace(" ", "").replace("{", "{\"")\
 68 |         .replace(":", "\":").replace(",", ",\"").replace("'", "\"")
 69 | 
 70 | 
 71 | def read_tokenization(amr):
 72 |     """
 73 |     Read word tokenization from the AMR parse
 74 |     :param amr: AMR parse
 75 |     :return: List of tokens
 76 |     """
 77 |     for line in amr.split("\n"):
 78 |         if line.startswith("# ::tok"):
 79 |             # tokenized text
 80 |             text = line[len("# ::tok"):-len("<ROOT>")].strip()
 81 |             tokens = text.split()
 82 |             return tokens
 83 |     return None
 84 | 
 85 | 
 86 | def read_amr_annotation(amr):
 87 |     token2node_idx = dict()
 88 |     node_idx2token = dict()
 89 |     token2node_id = dict()
 90 |     node_id2token = dict()
 91 |     nodes = []
 92 |     edges = []
 93 | 
 94 |     for line in amr.split("\n"):
 95 |         if line.startswith("# ::tok"):
 96 |             # tokenized text
 97 |             text = line[len("# ::tok"):-len("<ROOT>")].strip()
 98 |             tokens = text.split()
 99 | 
100 |         elif line.startswith("# ::node"):
101 |             node_info = line[len("# ::node"):].strip()
102 |             columns = node_info.split()
103 |             if len(columns) < 2:
104 |                 continue
105 | 
106 |             if len(columns) == 2:
107 |                 node_idx, node_label = columns
108 |                 token = node_label
109 |             else:
110 |                 node_idx, node_label, token_range = columns
111 |                 start_tok_idx, end_tok_idx = token_range.split("-")
112 |                 token = " ".join(tokens[int(start_tok_idx):int(end_tok_idx)]).lower()
113 | 
114 |             token2node_idx[token] = node_idx
115 |             node_idx2token[node_idx] = token
116 |             nodes.append({
117 |                 "node_idx": node_idx,
118 |                 "node_label": node_label,
119 |                 "token": token
120 |             })
121 | 
122 |         elif line.startswith("# ::edge"):
123 |             edge_info = line[len("# ::edge"):].strip()
124 |             columns = edge_info.split()
125 |             if len(columns) < 5:
126 |                 continue
127 | 
128 |             src_node_label, edge_label, tgt_node_label, \
129 |                 src_node_idx, tgt_node_idx = columns
130 | 
131 |             if edge_label.endswith("-of") and edge_label.lower() != "consist-of":
132 |                 edges.append({
133 |                     "src_node_label": tgt_node_label,
134 |                     "edge_label": edge_label[:-len("-of")],
135 |                     "tgt_node_label": src_node_label,
136 |                     "src_node_idx": tgt_node_idx,
137 |                     "tgt_node_idx": src_node_idx
138 |                 })
139 |             else:
140 |                 edges.append({
141 |                     "src_node_label": src_node_label,
142 |                     "edge_label": edge_label,
143 |                     "tgt_node_label": tgt_node_label,
144 |                     "src_node_idx": src_node_idx,
145 |                     "tgt_node_idx": tgt_node_idx
146 |                 })
147 | 
148 |         elif line.startswith("# ::short"):
149 |             node_idx2node_id = json.loads(double_quote(line[len("# ::short"):].strip()))
150 |             node_id2node_idx = {v: k for k, v in node_idx2node_id.items()}
151 | 
152 |     for token in token2node_idx:
153 |         token2node_id[token] = node_idx2node_id[token2node_idx[token]]
154 | 
155 |     try:
156 |         for node_id in node_id2node_idx:
157 |             node_id2token[node_id] = node_idx2token[node_id2node_idx[node_id]]
158 |     except Exception as e:
159 |         print(amr)
160 |         print(e)
161 |         input()
162 | 
163 |     amr_data = {
164 |         "nodes": nodes,
165 |         "edges": edges,
166 |         "token2node_idx": token2node_idx,
167 |         "node_idx2token": node_idx2token,
168 |         "token2node_id": token2node_id,
169 |         "node_id2token": node_id2token,
170 |         "node_idx2node_id": node_idx2node_id,
171 |         "node_id2node_idx": node_id2node_idx
172 |     }
173 | 
174 |     return amr_data
175 | 
176 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/service/amr.py:
--------------------------------------------------------------------------------
  1 | """Wrappers for local and remote AMR parsing client"""
  2 | import collections
  3 | import copy
  4 | import datetime
  5 | import glob
  6 | import os
  7 | import pickle
  8 | import sys
  9 | import threading
 10 | 
 11 | from nltk import sent_tokenize
 12 | from nltk.tokenize import word_tokenize
 13 | 
 14 | from amr_verbnet_semantics.grpc_clients.clients import AMRClientTransformer
 15 | from app_config import config
 16 | 
 17 | 
 18 | class LocalAMRClient(object):
 19 |     def __init__(self, use_cuda=False):
 20 |         self.parser = None
 21 |         self.use_cuda = use_cuda
 22 | 
 23 |     def _get_parser(self, use_cuda=False):
 24 |         sys.path.append(os.path.abspath(config.THIRD_PARTY_PATH))
 25 |         from transition_amr_parser.parse import AMRParser
 26 |         pkg_root_path = os.path.join(os.path.dirname(__file__), "../../")
 27 |         cwd = os.getcwd()
 28 |         os.chdir(os.path.join(pkg_root_path, config.THIRD_PARTY_PATH))
 29 |         print("Loading checkpoint ...")
 30 |         amr_parser = AMRParser.from_checkpoint(
 31 |             checkpoint=config.AMR_MODEL_CHECKPOINT_PATH,
 32 |             roberta_cache_path=config.ROBERTA_CACHE_PATH)
 33 |         print("Loaded checkpoint ...")
 34 |         # for loading resources, parse a test sentence
 35 |         amr_parser.parse_sentences([['test']])
 36 |         os.chdir(cwd)
 37 | 
 38 |         # if not use_cuda:
 39 |         #     amr_parser.use_cuda = False
 40 |         #     list(map(lambda x: x.cpu(), amr_parser.models))
 41 |         #     amr_parser.embeddings.roberta.cpu()
 42 | 
 43 |         return amr_parser
 44 | 
 45 |     def get_amr(self, text):
 46 |         if self.parser is None:
 47 |             # Lazy loading
 48 |             self.parser = self._get_parser(use_cuda=self.use_cuda)
 49 | 
 50 |         res = self.parser.parse_sentences([word_tokenize(text)])
 51 |         return res[0][0]
 52 | 
 53 | 
 54 | class RemoteAMRClient(object):
 55 |     def __init__(self):
 56 |         self.amr_host = "mnlp-demo.sl.cloud9.ibm.com"
 57 |         self.amr_port = 59990
 58 |         self.parser = None
 59 | 
 60 |     def get_amr(self, text):
 61 |         if self.parser is None:
 62 |             # Lazy loading
 63 |             self.parser = AMRClientTransformer(f"{self.amr_host}:{self.amr_port}")
 64 |         return self.parser.get_amr(text)
 65 | 
 66 | 
 67 | class CacheClient:
 68 |     def __init__(self, amr_client, cache_capacity=5000, delete_ratio=0.2,
 69 |                  snapshot_interval=1000, use_snapshot=True):
 70 |         self.amr_client = amr_client
 71 |         self.delete_ratio = delete_ratio
 72 |         self.capacity = cache_capacity
 73 |         self.snapshot_prefix = 'snapshot.pickle_'
 74 |         self.snapshot_interval = snapshot_interval
 75 |         self.count = 0
 76 |         self.cache = collections.OrderedDict()
 77 |         if use_snapshot:
 78 |             self._read_snapshot()
 79 | 
 80 |     def _read_snapshot(self):
 81 |         dt_name_pairs = []
 82 |         i = len(self.snapshot_prefix)
 83 |         for name in glob.glob(f'*/{self.snapshot_prefix}*'):
 84 |             try:
 85 |                 dt = datetime.datetime.strptime(os.path.basename(name)[i:],
 86 |                                                 '%Y-%m-%d_%H-%M-%S')
 87 |                 dt_name_pairs.append((dt, name))
 88 |             except:
 89 |                 # do not handle an exception from datetime.datetime.strptime
 90 |                 pass
 91 | 
 92 |         try:
 93 |             if dt_name_pairs:
 94 |                 dt_name_pairs.sort(key=lambda x: x[0])
 95 |                 latest_name = dt_name_pairs[-1][1]
 96 |                 self.cache = pickle.load(open(latest_name, 'rb'))
 97 |         except Exception as e:
 98 |             print(e)
 99 | 
100 |     def get_amr(self, sentence):
101 |         # use cache
102 |         if sentence in self.cache:
103 |             amr = self.cache[sentence]
104 |             self.cache.move_to_end(sentence, last=False)  # move key "sentence" to the head
105 | 
106 |         else:
107 |             amr = self.amr_client.get_amr(text=sentence)
108 |             self.cache[sentence] = amr
109 | 
110 |             # delete unused keys
111 |             if len(self.cache) == self.capacity:
112 |                 n = int(self.delete_ratio * self.capacity)
113 |                 for _ in range(n):
114 |                     self.cache.popitem(last=True)
115 | 
116 |         # save snapshot
117 |         self.count += 1
118 |         if self.count == self.snapshot_interval:
119 |             self.count = 0
120 |             now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
121 |             file_name = f'{self.snapshot_prefix}{now}'
122 |             thread = threading.Thread(target=self.save_snapshot,
123 |                                       args=(file_name,
124 |                                             copy.deepcopy(self.cache)))
125 |             thread.start()
126 | 
127 |         return amr
128 | 
129 |     @staticmethod
130 |     def save_snapshot(file_name, cache):
131 |         try:
132 |             pickle.dump(cache, open(file_name, 'wb'))
133 |             print(f'cache is saved on {file_name}')
134 |         except Exception as e:
135 |             print(e)
136 | 
137 | 
138 | if config.AMR_PARSING_MODEL == "local":
139 |     amr_client = LocalAMRClient(config.use_cuda)
140 | elif config.AMR_PARSING_MODEL == "remote":
141 |     amr_client = RemoteAMRClient()
142 | else:
143 |     raise Exception("Missing AMR parsing configuration ...")
144 | 
145 | amr_client = CacheClient(amr_client)
146 | 
147 | 
148 | def parse_text(text, verbose=False):
149 |     sentences = sent_tokenize(text)
150 | 
151 |     if verbose:
152 |         print("\ntext:\n", text)
153 |         print("\nsentences:\n==>", "\n\n==>".join(sentences))
154 |         print("parsing ...")
155 | 
156 |     sentence_parses = []
157 |     for sent in sentences:
158 |         sent_amr = amr_client.get_amr(sent)
159 |         sentence_parses.append(sent_amr)
160 | 
161 |     if verbose:
162 |         print("\nsentence_parses:", sentence_parses)
163 |     return sentence_parses
164 | 
165 | 
166 | if __name__ == "__main__":
167 |     text = "The quick brown fox jumped over the lazy moon."
168 |     amr_client = LocalAMRClient(config.use_cuda)
169 |     amr = amr_client.get_amr(text)
170 |     print("\ntext:", text)
171 |     print("\namr:", amr)
172 | 
173 |     # parse_text("You enter a kitchen.")
174 |     # parse_text("The quick brown fox jumped over the lazy moon.")
175 |     # parse_text("You see a dishwasher and a fridge.")
176 |     # parse_text("Here 's a dining table .")
177 |     # parse_text("You see a red apple and a dirty plate on the table .")
178 | 
179 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/service/sparql.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Accessing the unified KB using SPARQL
  3 | """
  4 | import json
  5 | from collections import defaultdict
  6 | 
  7 | from SPARQLWrapper import JSON, SPARQLWrapper
  8 | 
  9 | from app_config import config
 10 | from KG.service.ulkb_access_V5 import \
 11 |     ulkb_sem_predicates_LONG, \
 12 |     ulkb_pb_vn_mappings
 13 | 
 14 | query_prefix = """
 15 | prefix rrp: <http://www.ibm.com/RRP#>
 16 | prefix glo: <http://www.ibm.com/GLO_V2#>
 17 | prefix ulvn: <http://www.ibm.com/UL_VN#>
 18 | prefix ulwn: <http://www.ibm.com/UL_WN#>
 19 | prefix ulpb: <http://www.ibm.com/UL_PB#>
 20 | prefix ulkb: <http://www.ibm.com/UL_KB#>
 21 | """
 22 | 
 23 | sparql = SPARQLWrapper(config.SPARQL_ENDPOINT)
 24 | 
 25 | 
 26 | def query_pb_vn_mapping_from_rdf(propbank_id,
 27 |                                  verbnet_version="verbnet3.4",
 28 |                                  verbose=False):
 29 |     """
 30 |     Query Propbank-VerbNet mappings from RDF KG.
 31 |     :param propbank_id: probank frame id
 32 |     :param verbnet_version: verbnet version of mapping
 33 |     :param verbose:
 34 |     :return:
 35 |     """
 36 |     if verbose:
 37 |         print("propbank_id:", propbank_id)
 38 | 
 39 |     output = ulkb_pb_vn_mappings(propbank_id)
 40 | 
 41 |     mapping_res = []
 42 |     if propbank_id in output:
 43 |         for vn_class in output[propbank_id]:
 44 |             mapping_res.append({
 45 |                 "mapping": vn_class,
 46 |                 "source": output["provenance"]
 47 |             })
 48 | 
 49 |     if len(mapping_res) == 0:
 50 |         return None
 51 |     return mapping_res
 52 | 
 53 | 
 54 | def query_verbnet_semantic_roles_from_rdf(propbank_id):
 55 |     results = dict()
 56 |     output = ulkb_pb_vn_mappings(propbank_id)
 57 | 
 58 |     if propbank_id not in output:
 59 |         return results
 60 | 
 61 |     for vn_class in output[propbank_id]:
 62 |         for arg_role in output[propbank_id][vn_class]:
 63 |             if arg_role not in results:
 64 |                 results[arg_role] = []
 65 |             theme_role = output[propbank_id][vn_class][arg_role]
 66 |             results[arg_role].append({
 67 |                 "description": "",
 68 |                 "vncls": "-".join(vn_class.split("-")[1:]),
 69 |                 "vntheta": theme_role.strip()[len("ThemRole("):-1].lower()
 70 |             })
 71 | 
 72 |     return results
 73 | 
 74 | 
 75 | def query_semantics_from_rdf(verbnet_class_id,
 76 |                              verbnet_version="verbnet3.4",
 77 |                              include_example=False,
 78 |                              verbose=False):
 79 |     if verbose:
 80 |         print("verbnet_class_id:", verbnet_class_id)
 81 | 
 82 |     output = ulkb_sem_predicates_LONG(verbnet_class_id)
 83 | 
 84 |     # Further construct the result
 85 |     semantics_by_role_set = defaultdict(list)
 86 |     for semantic_example in output:
 87 |         if len(semantic_example) == 0:
 88 |             continue
 89 | 
 90 |         example = semantic_example["example"]
 91 |         if "roleList" in semantic_example:
 92 |             role_set = set(semantic_example["roleList"])
 93 |         else:
 94 |             role_set = set()
 95 |         statements = []
 96 |         for predicate in semantic_example["predicates"]:
 97 |             arguments = []
 98 |             for param in predicate["params"]:
 99 |                 arguments.append({
100 |                     "type": param["type"].strip(),
101 |                     "value": param["value"].strip()
102 |                 })
103 | 
104 |             statements.append({
105 |                 "predicate_value": predicate["label_predicate"],
106 |                 "arguments": arguments,
107 |                 "is_negative": "operator" in predicate
108 |             })
109 | 
110 |         if include_example:
111 |             semantics_by_role_set[(example, tuple(sorted(role_set)))].append(statements)
112 |         else:
113 |             semantics_by_role_set[tuple(sorted(role_set))].append(statements)
114 |     return dict(semantics_by_role_set)
115 | 
116 | 
117 | def query_verbnet_semantic_roles(propbank_id):
118 |     query_text = """SELECT DISTINCT ?verb ?pbSemRole ?vnVerbLabel ?vnParamLabel WHERE {{
119 |         ?verb rdfs:label "%s" . 
120 |         #?verb rrp:inKB rrp:PropBank .
121 |         ?verb rrp:hasParameter ?pbParam . 
122 |         ?pbParam rdfs:label ?pbSemRole . 
123 |         ?vnVerb rrp:inKB rrp:VerbNet . 
124 |         ?vnVerb rdfs:label ?vnVerbLabel . 
125 |         ?vnVerb rrp:hasComponent ?vnFrame . 
126 |         ?vnFrame rrp:hasComponent ?semPred . 
127 |         ?semPred rrp:hasParameter ?vnParam . 
128 |         ?pbParam rrp:mapsTo ?vnParam . 
129 |         ?vnParam rdfs:label ?vnParamLabel . 
130 |         }}
131 |         """ % propbank_id
132 | 
133 |     sparql.setQuery(query_prefix + query_text)
134 |     sparql.setReturnFormat(JSON)
135 |     results = sparql.query().convert()
136 | 
137 |     for result in results["results"]["bindings"]:
138 |         pb_sem_role = result["pbSemRole"]["value"]
139 |         vn_verb_label = result["vnVerbLabel"]["value"]
140 |         if "vnSemRole" in result:
141 |             vn_sem_role = result["vnSemRole"]["value"]
142 |         else:
143 |             vn_sem_role = "N/A"
144 |         print(pb_sem_role + "\t(" + vn_verb_label + ", " + vn_sem_role + ")")
145 | 
146 | 
147 | def test_query_provenance(verb):
148 |     query_text = """SELECT DISTINCT ?provenance WHERE {
149 |                       ?entity rdfs:label "%s" .  # FOR the specific verb
150 |                       ?entity rrp:provenance ?provenance .
151 |                     }
152 |                 """ % verb
153 | 
154 |     sparql.setQuery(query_prefix + query_text)
155 |     sparql.setReturnFormat(JSON)
156 |     results = sparql.query().convert()
157 | 
158 |     for result in results["results"]["bindings"]:
159 |         print(result["provenance"]["value"])
160 | 
161 | 
162 | if __name__ == "__main__":
163 |     # query_verbnet_semantic_roles("admire.01")
164 |     # query_verbnet_semantic_roles("enter.01")
165 |     # query_verbnet_semantic_roles("possible.01")
166 |     # query_verbnet_semantic_roles("green.02")
167 |     # query_verbnet_semantic_roles("make_out.23")
168 |     # query_verbnet_semantic_roles("make-out.08")
169 |     # query_verbnet_semantic_roles("make_out.12")
170 |     # print(query_pb_vn_mapping_from_rdf("be-located-at-91"))
171 |     # query_verbnet_semantic_predicates("admire-31_2")
172 |     # print(query_pb_vn_mapping_from_rdf("admire.01"))
173 |     # print(query_pb_vn_mapping_from_rdf("enter.01"))
174 |     print(query_semantics_from_rdf("escape-51.1-1"))
175 |     # test_query_provenance("put-9.1-2")
176 |     # roles = query_verbnet_semantic_roles_from_rdf("be-located-at-91")
177 |     # roles = query_verbnet_semantic_roles_from_rdf("carry.01")
178 |     roles = query_verbnet_semantic_roles_from_rdf("make_out.23")
179 |     # roles = ulkb_sem_roles_for_pb("make_out.12")
180 |     print(json.dumps(roles, indent=4, sort_keys=True))
181 | 
182 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_ground_amr.py:
--------------------------------------------------------------------------------
 1 | from amr_verbnet_semantics.core.amr_verbnet_enhance import ground_amr
 2 | 
 3 | sample_ret = {'coreference': [], 'sentence_parses': [{'text': 'The dresser is made out of maple carefully finished with Danish oil.', 'amr': '# ::tok The dresser is made out of maple carefully finished with Danish oil . <ROOT>\n# ::node\t1\tdresser\t1-2\n# ::node\t2\tmake-01\t3-4\n# ::node\t3\tmaple\t6-7\n# ::node\t4\tcareful\t7-8\n# ::node\t5\tfinish-01\t8-9\n# ::node\t6\tcountry\t10-11\n# ::node\t8\toil\t11-12\n# ::node\t9\t"Denmark"\t10-11\n# ::node\t11\tname\t10-11\n# ::root\t2\tmake-01\n# ::edge\tmake-01\tARG1\tdresser\t2\t1\t\n# ::edge\tmake-01\tARG2\tmaple\t2\t3\t\n# ::edge\tfinish-01\tmanner\tcareful\t5\t4\t\n# ::edge\tmaple\tARG1-of\tfinish-01\t3\t5\t\n# ::edge\toil\tmod\tcountry\t8\t6\t\n# ::edge\tfinish-01\tARG2\toil\t5\t8\t\n# ::edge\tcountry\tname\tname\t6\t11\t\n# ::edge\tname\top1\t"Denmark"\t11\t9\t\n# ::short\t{1: \'d\', 2: \'m\', 3: \'m2\', 4: \'c\', 5: \'f\', 6: \'c2\', 8: \'o\', 9: \'x0\', 11: \'n\'}\t\n(m / make-01\n      :ARG1 (d / dresser)\n      :ARG2 (m2 / maple\n            :ARG1-of (f / finish-01\n                  :ARG2 (o / oil\n                        :mod (c2 / country\n                              :name (n / name\n                                    :op1 "Denmark")))\n                  :manner (c / careful))))\n\n', 'pb_vn_mappings': {'make.01': [{'mapping': 'build-26.1-1', 'source': 'verbnet3.4'}], 'finish.01': [{'mapping': 'stop-55.4-1-1', 'source': 'verbnet3.4'}]}, 'role_mappings': {'make.01': {':ARG0': [{'vncls': '26.1-1', 'vntheta': 'agent', 'description': 'creator'}], ':ARG1': [{'vncls': '26.1-1', 'vntheta': 'product', 'description': 'creation'}], ':ARG2': [{'vncls': '26.1-1', 'vntheta': 'material', 'description': 'created-from, thing changed'}], ':ARG3': [{'vncls': '26.1-1', 'vntheta': 'beneficiary', 'description': 'benefactive'}]}, 'finish.01': {':ARG0': [{'vncls': '55.4-1', 'vntheta': 'agent', 'description': 'Intentional agent'}], ':ARG1': [{'vncls': '55.4-1', 'vntheta': 'theme', 'description': 'Thing finishing'}]}}, 'amr_cal': [{'predicate': 'make-01', 'arguments': ['m'], 'is_negative': False}, {'predicate': 'dresser', 'arguments': ['d'], 'is_negative': False}, {'predicate': 'maple', 'arguments': ['m2'], 'is_negative': False}, {'predicate': 'finish-01', 'arguments': ['f'], 'is_negative': False}, {'predicate': 'oil', 'arguments': ['o'], 'is_negative': False}, {'predicate': 'country', 'arguments': ['c2'], 'is_negative': False}, {'predicate': 'name', 'arguments': ['n'], 'is_negative': False}, {'predicate': 'careful', 'arguments': ['c'], 'is_negative': False}, {'predicate': 'make-01.arg1', 'arguments': ['m', 'd'], 'is_negative': False}, {'predicate': 'make-01.arg2', 'arguments': ['m', 'm2'], 'is_negative': False}, {'predicate': 'finish-01.arg1', 'arguments': ['f', 'm2'], 'is_negative': False}, {'predicate': 'finish-01.arg2', 'arguments': ['f', 'o'], 'is_negative': False}, {'predicate': 'mod', 'arguments': ['o', 'c2'], 'is_negative': False}, {'predicate': 'name', 'arguments': ['c2', 'n'], 'is_negative': False}, {'predicate': 'manner', 'arguments': ['f', 'c'], 'is_negative': False}], 'sem_cal': {'make.01': {'build-26.1-1': [{'predicate': 'BE', 'arguments': ['Product', 'e1'], 'is_negative': True}, {'predicate': 'DO', 'arguments': ['e2', 'Agent'], 'is_negative': False}, {'predicate': 'BE', 'arguments': ['e3', 'Product'], 'is_negative': False}, {'predicate': 'MADE_OF', 'arguments': ['e3', 'Product', 'Material'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e3'], 'is_negative': False}, {'predicate': 'COST', 'arguments': ['E', 'Product', 'Asset'], 'is_negative': False}]}, 'finish.01': {'stop-55.4-1-1': [{'predicate': 'DO', 'arguments': ['e1', 'Agent'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e1'], 'is_negative': False}, {'predicate': 'END', 'arguments': ['E', 'Theme'], 'is_negative': False}]}}, 'unique_sem_cal': [{'make.01': [{'predicate': 'BE', 'arguments': ['Product', 'e1'], 'is_negative': True}, {'predicate': 'DO', 'arguments': ['e2', 'Agent'], 'is_negative': False}, {'predicate': 'BE', 'arguments': ['e3', 'Product'], 'is_negative': False}, {'predicate': 'MADE_OF', 'arguments': ['e3', 'Product', 'Material'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e3'], 'is_negative': False}, {'predicate': 'COST', 'arguments': ['E', 'Product', 'Asset'], 'is_negative': False}], 'finish.01': [{'predicate': 'DO', 'arguments': ['e1', 'Agent'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e1'], 'is_negative': False}, {'predicate': 'END', 'arguments': ['E', 'Theme'], 'is_negative': False}]}], 'grounded_stmt': {'make.01': {'build-26.1-1': [[{'predicate': 'BE', 'arguments': ['d', 'e1'], 'is_negative': True}, {'predicate': 'DO', 'arguments': ['e2', 'Agent'], 'is_negative': False}, {'predicate': 'BE', 'arguments': ['e3', 'd'], 'is_negative': False}, {'predicate': 'MADE_OF', 'arguments': ['e3', 'd', 'm2'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e3'], 'is_negative': False}, {'predicate': 'COST', 'arguments': ['E', 'd', 'Asset'], 'is_negative': False}]]}, 'finish.01': {'stop-55.4-1-1': [[{'predicate': 'DO', 'arguments': ['e1', 'Agent'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e1'], 'is_negative': False}, {'predicate': 'END', 'arguments': ['E', 'm2'], 'is_negative': False}]]}}, 'unique_grounded_stmt': [{'make.01': [[{'predicate': 'BE', 'arguments': ['d', 'e1'], 'is_negative': True}, {'predicate': 'DO', 'arguments': ['e2', 'Agent'], 'is_negative': False}, {'predicate': 'BE', 'arguments': ['e3', 'd'], 'is_negative': False}, {'predicate': 'MADE_OF', 'arguments': ['e3', 'd', 'm2'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e3'], 'is_negative': False}, {'predicate': 'COST', 'arguments': ['E', 'd', 'Asset'], 'is_negative': False}]], 'finish.01': [[{'predicate': 'DO', 'arguments': ['e1', 'Agent'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e1'], 'is_negative': False}, {'predicate': 'END', 'arguments': ['E', 'm2'], 'is_negative': False}]]}], 'amr_cal_str': '[make-01(m), dresser(d), maple(m2), finish-01(f), oil(o), country(c2), name(n), careful(c), make-01.arg1(m, d), make-01.arg2(m, m2), finish-01.arg1(f, m2), finish-01.arg2(f, o), mod(o, c2), name(c2, n), manner(f, c)]', 'sem_cal_str': "{'make.01': {'build-26.1-1': [NOT(BE(Product, e1)), DO(e2, Agent), BE(e3, Product), MADE_OF(e3, Product, Material), CAUSE(e2, e3), COST(E, Product, Asset)]}, 'finish.01': {'stop-55.4-1-1': [DO(e1, Agent), CAUSE(e2, e1), END(E, Theme)]}}", 'unique_sem_cal_str': "[{'make.01': [NOT(BE(Product, e1)), DO(e2, Agent), BE(e3, Product), MADE_OF(e3, Product, Material), CAUSE(e2, e3), COST(E, Product, Asset)], 'finish.01': [DO(e1, Agent), CAUSE(e2, e1), END(E, Theme)]}]", 'grounded_stmt_str': "{'make.01': {'build-26.1-1': [[NOT(BE(d, e1)), DO(e2, Agent), BE(e3, d), MADE_OF(e3, d, m2), CAUSE(e2, e3), COST(E, d, Asset)]]}, 'finish.01': {'stop-55.4-1-1': [[DO(e1, Agent), CAUSE(e2, e1), END(E, m2)]]}}", 'unique_grounded_stmt_str': "[{'make.01': [[NOT(BE(d, e1)), DO(e2, Agent), BE(e3, d), MADE_OF(e3, d, m2), CAUSE(e2, e3), COST(E, d, Asset)]], 'finish.01': [[DO(e1, Agent), CAUSE(e2, e1), END(E, m2)]]}]"}]}
 4 | 
 5 | 
 6 | amr = sample_ret['sentence_parses'][0]['amr']
 7 | r = ground_amr(amr, verbose=False)
 8 | print(r)
 9 | print('\ngrounded_stmt_str:', r['grounded_stmt_str'])
10 | print('\nunique_grounded_stmt_str:', r['unique_grounded_stmt_str'])
11 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/service/semlink.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Jul 8 2021
  5 | @author: Zhicheng (Jason) Liang
  6 | """
  7 | 
  8 | import json
  9 | import os
 10 | 
 11 | from fuzzywuzzy import process
 12 | from nltk.corpus.reader import VerbnetCorpusReader as VerbnetCorpusReaderOrig
 13 | from nltk.corpus.util import LazyCorpusLoader
 14 | 
 15 | from amr_verbnet_semantics.service.sparql import query_pb_vn_mapping_from_rdf
 16 | from app_config import config
 17 | 
 18 | 
 19 | # global variables
 20 | pb_vn_mappings = None
 21 | vn_dict = None
 22 | vn2class_id_dict = None
 23 | matching_priorities = ["verbnet3.4", "verbnet3.3", "verbnet3.2"]
 24 | 
 25 | 
 26 | class VerbnetCorpusReader(VerbnetCorpusReaderOrig):
 27 |     def vnclass(self, fileid_or_classid):
 28 |         """Returns VerbNet class ElementTree
 29 | 
 30 |         Return an ElementTree containing the xml for the specified
 31 |         VerbNet class.
 32 | 
 33 |         :param fileid_or_classid: An identifier specifying which class
 34 |             should be returned.  Can be a file identifier (such as
 35 |             ``'put-9.1.xml'``), or a VerbNet class identifier (such as
 36 |             ``'put-9.1'``) or a short VerbNet class identifier (such as
 37 |             ``'9.1'``).
 38 |         """
 39 |         # File identifier: just return the xml.
 40 |         if fileid_or_classid in self._fileids:
 41 |             return self.xml(fileid_or_classid)
 42 | 
 43 |         # Class identifier: get the xml, and find the right elt.
 44 |         classid = self.longid(fileid_or_classid)
 45 |         classid = os.path.basename(classid)
 46 |         if classid in self._class_to_fileid:
 47 |             fileid = self._class_to_fileid[self.longid(classid)]
 48 |             tree = self.xml(fileid)
 49 |             if classid == tree.get("ID"):
 50 |                 return tree
 51 |             else:
 52 |                 for subclass in tree.findall(".//VNSUBCLASS"):
 53 |                     if classid == subclass.get("ID"):
 54 |                         return subclass
 55 |                 else:
 56 |                     assert False  # we saw it during _index()!
 57 |         else:
 58 |             raise ValueError("Unknown identifier {}".format(fileid_or_classid))
 59 | 
 60 | 
 61 | def load_vn_dict():
 62 |     global vn_dict
 63 |     vn_dict = {
 64 |         "verbnet3.2": LazyCorpusLoader("verbnet3.2", VerbnetCorpusReader, r"(?!\.).*\.xml"),
 65 |         "verbnet3.3": LazyCorpusLoader("verbnet3.3", VerbnetCorpusReader, r"(?!\.).*\.xml"),
 66 |         "verbnet3.4": LazyCorpusLoader("verbnet3.4", VerbnetCorpusReader, r"(?!\.).*\.xml")
 67 |     }
 68 | 
 69 | 
 70 | def load_pb_vn_mappings():
 71 |     global pb_vn_mappings
 72 |     pb_vn_mappings = json.load(open("./data/semlink/instances/pb-vn2.json"))
 73 | 
 74 | 
 75 | def load_vn2class_id_dict():
 76 |     global vn2class_id_dict
 77 |     vn2class_id_dict = {
 78 |         "verbnet3.2": build_class_id_dict("verbnet3.2"),
 79 |         "verbnet3.3": build_class_id_dict("verbnet3.3"),
 80 |         "verbnet3.4": build_class_id_dict("verbnet3.4")
 81 |     }
 82 | 
 83 | 
 84 | def sanity_check(cls, all_classes, vn_version):
 85 |     global vn_dict
 86 |     if vn_dict is None:
 87 |         load_vn_dict()
 88 | 
 89 |     for sub_cls in vn_dict[vn_version].subclasses(cls):
 90 |         if sub_cls not in all_classes:
 91 |             continue
 92 |         sanity_check(sub_cls, all_classes, vn_version)
 93 | 
 94 | 
 95 | def build_class_id_dict(vn_version):
 96 |     global vn_dict
 97 |     if vn_dict is None:
 98 |         load_vn_dict()
 99 | 
100 |     results = dict()
101 |     all_classes = set(vn_dict[vn_version].classids())
102 |     for cls in all_classes:
103 |         sanity_check(cls, all_classes, vn_version)
104 | 
105 |     for cls in all_classes:
106 |         cls_num = "-".join(cls.split("-")[1:])
107 |         results[cls_num] = cls
108 |     return results
109 | 
110 | 
111 | def check_mapping_completeness(output_dir):
112 |     global vn2class_id_dict
113 |     if vn2class_id_dict is None:
114 |         load_vn2class_id_dict()
115 | 
116 |     global pb_vn_mappings
117 |     if pb_vn_mappings is None:
118 |         load_pb_vn_mappings()
119 | 
120 |     if not os.path.exists(output_dir):
121 |         os.makedirs(output_dir)
122 | 
123 |     invalid_out_path = os.path.join(output_dir, "invalid_mappings.tsv")
124 |     invalid_out_file = open(invalid_out_path, "w")
125 |     invalid_out_file.write("propbank_id\tvn_class\tfuzzy_matches\tvn_version\n")
126 | 
127 |     valid_out_path = os.path.join(output_dir, "valid_mappings.tsv")
128 |     valid_out_file = open(valid_out_path, "w")
129 |     valid_out_file.write("propbank_id\tvn_class\tmatches\tvn_version\n")
130 | 
131 |     valid_cnt = 0
132 |     total_cnt = 0
133 |     for propbank_id in pb_vn_mappings:
134 |         vn_mappings = pb_vn_mappings[propbank_id]
135 |         # check the version that has a valid mapping
136 |         for vn_class in vn_mappings:
137 |             total_cnt += 1
138 |             # check the version that has a valid mapping
139 |             invalid_out_file_buffer = []
140 |             is_mapped = False
141 | 
142 |             for vn_version in matching_priorities:
143 |                 class_id_dict = vn2class_id_dict[vn_version]
144 |                 all_class_ids = class_id_dict.keys()
145 |                 if vn_class not in class_id_dict:
146 |                     # print("\nInvalid mappings in vn {}:".format(vn_version))
147 |                     # print("propbank_id:", propbank_id)
148 |                     # print("vn_class:", vn_class)
149 |                     fuzzy_matches = process.extract(vn_class, all_class_ids)[:5]
150 |                     fuzzy_matches = [class_id_dict[m] for m, score in fuzzy_matches]
151 |                     # print(fuzzy_matches)
152 |                     invalid_out_file_buffer.append("{}\t{}\t{}\t{}\n".format(
153 |                         propbank_id, vn_class,
154 |                         str(fuzzy_matches), vn_version))
155 |                 else:
156 |                     valid_cnt += 1
157 |                     is_mapped = True
158 |                     # print("[{}] mapped to [{}] in {}".format(
159 |                     #     propbank_id, class_id_dict[vn_class], vn_version))
160 |                     valid_out_file.write("{}\t{}\t{}\t{}\n".format(
161 |                         propbank_id, vn_class,
162 |                         class_id_dict[vn_class], vn_version))
163 |                     break
164 | 
165 |             if not is_mapped:
166 |                 for row in invalid_out_file_buffer:
167 |                     invalid_out_file.write(row)
168 | 
169 |     invalid_out_file.close()
170 |     valid_out_file.close()
171 |     print("\nInvalid cnt:", total_cnt - valid_cnt)
172 |     print("Total:", total_cnt)
173 |     print("\nWritten to file {}".format(invalid_out_path))
174 |     print("\nWritten to file {}".format(valid_out_path))
175 | 
176 | 
177 | def query_pb_vn_mapping(propbank_id):
178 |     if config.KB_SOURCE == "rdf":
179 |         return query_pb_vn_mapping_from_rdf(propbank_id)
180 |     return query_pb_vn_mapping_from_semlink(propbank_id)
181 | 
182 | 
183 | def query_pb_vn_mapping_from_semlink(propbank_id):
184 |     global vn2class_id_dict
185 |     if vn2class_id_dict is None:
186 |         load_vn2class_id_dict()
187 | 
188 |     global pb_vn_mappings
189 |     if pb_vn_mappings is None:
190 |         load_pb_vn_mappings()
191 | 
192 |     if propbank_id not in pb_vn_mappings:
193 |         return None
194 | 
195 |     # check the version that has a valid mapping
196 |     vn_mappings = pb_vn_mappings[propbank_id]
197 |     for vn_version in matching_priorities:
198 |         class_id_dict = vn2class_id_dict[vn_version]
199 |         mapping_res = []
200 |         for vn_class in vn_mappings:
201 |             if vn_class in class_id_dict:
202 |                 mapping_res.append({
203 |                     "mapping": class_id_dict[vn_class],
204 |                     "source": vn_version
205 |                 })
206 |         if len(mapping_res) > 0:
207 |             return mapping_res
208 |     return None
209 | 
210 | 
211 | if __name__ == '__main__':
212 |     check_mapping_completeness(output_dir="./data/semlink_out")
213 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/KG/ulkb_access.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue Jun  1 21:07:04 2021
  5 | 
  6 | @author: rosariouceda-sosa
  7 | """
  8 | 
  9 | import requests
 10 | from nltk.stem import PorterStemmer
 11 | from SPARQLWrapper import JSON, SPARQLWrapper
 12 | 
 13 | from app_config import config
 14 | 
 15 | # from csv import reader
 16 | # GLOBALS
 17 | schemaLibrary = {}
 18 | schemaIndex = {}  # qNode, label
 19 | 
 20 | # UL_KB ##################################################################
 21 | 
 22 | query_prefix = ("prefix rrp: <http://www.ibm.com/RRP#> \n"
 23 |                 "prefix glo: <http://www.ibm.com/GLO_V2#> \n"
 24 |                 "prefix ulvn: <http://www.ibm.com/UL_VN#> \n"
 25 |                 "prefix ulwn: <http://www.ibm.com/UL_WN#> \n"
 26 |                 "prefix ulpb: <http://www.ibm.com/UL_PB#> \n"
 27 |                 "prefix ulkb: <http://www.ibm.com/UL_KB#> \n")
 28 | 
 29 | # THIS IS THE SERVER, which is taken from config
 30 | sparql = SPARQLWrapper(config.SPARQL_ENDPOINT)
 31 | # THIS IS A LOCAL VERSION (ROSARIO'S)
 32 | # sparql = SPARQLWrapper("http://127.0.0.1:9999/blazegraph/namespace/UL_KB_V0")
 33 | 
 34 | query_roles_for_pb_str = """SELECT DISTINCT ?verb ?pbSemRole ?pbSemRoleDesc ?vnVerbLabel ?vnVarType ?vnVarExpression WHERE {{
 35 |   ?verb rdfs:label "{}" . 
 36 |   #?verb rrp:inKB rrp:PropBank .
 37 |   ?verb rrp:hasParameter ?pbParam .
 38 |   ?pbParam rrp:description ?pbSemRoleDesc .  
 39 |   ?pbParam rdfs:label ?pbSemRole . 
 40 |   ?vnVerb rrp:inKB rrp:VerbNet . 
 41 |   ?vnVerb rdfs:label ?vnVerbLabel . 
 42 |   ?vnVerb rrp:hasComponent ?vnFrame . 
 43 |   ?vnFrame rrp:hasComponent ?semPred . 
 44 |   ?semPred rrp:hasParameter ?vnParam . 
 45 |   ?pbParam rrp:mapsTo ?vnParam . 
 46 |   ?vnParam rrp:varType ?vnVarType . 
 47 |   ?vnParam rrp:varExpression ?vnVarExpression . 
 48 |   }}
 49 | """
 50 | 
 51 | query_roles_for_lemma_str = """SELECT DISTINCT ?pbSemRole ?vnVerbLabel ?vnVarType ?vnVarExpression WHERE {{  
 52 |   ?entity rdfs:label "{}" . 
 53 |   ?entity a rrp:Lemma . 
 54 |   ?vnVerb rrp:hasComponent ?entity .
 55 |   ?vnVerb rrp:inKB rrp:VerbNet . 
 56 |   ?vnVerb rdfs:label ?vnVerbLabel . 
 57 |   ?verb rrp:hasParameter ?pbParam . 
 58 |   ?verb rrp:inKB rrp:PropBank. 
 59 |   ?pbParam rdfs:label ?pbSemRole . 
 60 |   ?vnVerb rrp:hasComponent ?vnFrame . 
 61 |   ?vnFrame rrp:hasComponent ?semPred . 
 62 |   ?semPred rrp:hasParameter ?vnParam . 
 63 |   ?pbParam rrp:mapsTo ?vnParam . 
 64 |   ?vnParam rrp:varType ?vnVarType . 
 65 |   ?vnParam rrp:varExpression ?vnVarExpression . 
 66 |   }} """
 67 | 
 68 | query_all_parents_str = """SELECT DISTINCT ?entity ?entityLabel {{  
 69 |      <{}> wdt:P31* ?entityClass . 
 70 |      ?entityClass wdt:P279* ?entity . 
 71 |      ?entity rdfs:label ?entityLabel .  
 72 |      FILTER (lang(?entityLabel) = 'en')
 73 |      }}  """
 74 | 
 75 | query_label_str = """SELECT DISTINCT ?entityLabel {{
 76 |      <{}>  rdfs:label ?entityLabel .  
 77 |      FILTER (lang(?entityLabel) = 'en') 
 78 |   }}
 79 | """
 80 | 
 81 | query_map_classes = """SELECT DISTINCT  ?otherEntityLabel ?provenance ?KB  WHERE {{
 82 |                   <{}> rrp:hasMapping ?mapping . 
 83 |                   ?otherEntity rrp:hasMapping ?mapping .
 84 |                   ?otherEntity rrp:provenance ?provenance . 
 85 |                   ?otherEntity rrp:inKB ?KB .               
 86 |                   ?otherEntity rdfs:label ?otherEntityLabel. 
 87 |                 }} ORDER BY ?otherEntity
 88 |                   """
 89 | 
 90 | query_sem_predicates_node_str = """SELECT DISTINCT ?example ?operator ?semanticPredicate ?semanticPredicateLabel ?param ?type ?value  WHERE {{
 91 |                   {} rrp:hasComponent ?frame . 
 92 |                   ?frame rrp:example ?example . 
 93 |                   ?frame rrp:hasComponent ?semanticPredicate . 
 94 |                   ?semanticPredicate a rrp:SemanticPredicate .
 95 |                   ?semanticPredicate rdfs:label ?semanticPredicateLabel. 
 96 |                   ?semanticPredicate rrp:hasParameter ?param . 
 97 |                   OPTIONAL {{
 98 |                     ?semanticPredicate rrp:logicOperatorName ?operator .  
 99 |                    }}
100 |                   ?param rrp:varType ?type . 
101 |                   ?param rrp:varName ?value .
102 |                   ?semanticPredicate rrp:textInfo ?predicateText .  
103 |                 }} ORDER BY ?semanticPredicate
104 |                   """
105 | 
106 | query_sem_predicates_str = """SELECT DISTINCT ?example ?operator ?semanticPredicate ?semanticPredicateLabel ?param ?type ?expression  WHERE {{
107 |                   ?entity rdfs:label ?label . 
108 |                   FILTER regex(?label, "{}", "i")
109 |                   ?entity rrp:hasComponent ?frame . 
110 |                   ?frame rrp:example ?example . 
111 |                   ?frame rrp:hasComponent ?semanticPredicate . 
112 |                   ?semanticPredicate a rrp:SemanticPredicate .
113 |                   ?semanticPredicate rdfs:label ?semanticPredicateLabel. 
114 |                   ?semanticPredicate rrp:hasParameter ?param . 
115 |                   OPTIONAL {{
116 |                     ?semanticPredicate rrp:logicOperatorName ?operator .  
117 |                    }}
118 |                   ?param rrp:varType ?type . 
119 |                   ?param rrp:varName ?value .
120 |                   ?param rrp:varExpression ?expression . 
121 |                   ?semanticPredicate rrp:textInfo ?predicateText .  
122 |                 }} ORDER BY ?semanticPredicate
123 |                   """
124 | 
125 | query_check_verb_name_str = """SELECT DISTINCT ?entity ?provenance  WHERE {{
126 |   {{ ?entity a rrp:Lemma }} UNION {{ ?entity a rrp:LinguisticClass }} 
127 |   ?entity rdfs:label ?name . 
128 |   FILTER regex(?name, "{}", "i")
129 |   ?entity rrp:provenance ?provenance. 
130 | }} ORDER BY ?mapping"""
131 | 
132 | 
133 | ###############################################################################
134 | # AUXILIARY FUNCTIONS
135 | ###############################################################################
136 | 
137 | def get_label(_entity: str) -> str:
138 |     global WDURI
139 |     if WDURI not in _entity:
140 |         _entity = WDURI + _entity
141 |     query = query_label_str.format(_entity)
142 |     r = requests.get(url_CURRENT, params={'format': 'json', 'query': query})
143 |     resData = r.json()
144 |     for result in resData["results"]["bindings"]:
145 |         return result["entityLabel"]["value"]
146 |         #
147 |     return ""
148 | 
149 | 
150 | # Retrieves the equivalence classes for all entities.
151 | 
152 | def analyze_coverage_text(_entries: {}, _output: str):
153 |     max = -1
154 |     counter = 0
155 |     outDict = {}
156 |     for entry in _entries:
157 |         if "COVID-19 pandemic in" in entry:
158 |             continue
159 |         verbList = _entries[entry]
160 |         for rawVerb in verbList:
161 |             verb = stem_verb(rawVerb)
162 |             counter += 1
163 |             if len(verb) == 0:
164 |                 continue
165 |             if verb in outDict:
166 |                 continue
167 |             listItems = check_verb_name(verb)
168 |             if len(listItems) > 0:
169 |                 outDict[verb] = "YES"
170 |                 print("process " + verb + ", YES")
171 |             else:
172 |                 outDict[verb] = "NO"
173 |                 print("process " + verb + ", NO")
174 |         if max > 0 and counter > max:
175 |             break
176 | 
177 |     with open(_output, 'w') as f:
178 |         for key, value in outDict.items():
179 |             f.write('%s:%s\n' % (key, value))
180 |         # if verbList in ourDict:
181 |         #    continue
182 | 
183 |     f.close()
184 | 
185 | 
186 | def link_to_terms_list(_list: []) -> []:
187 |     resultList = []
188 |     for link in _list:
189 |         resultList.append(link.split['/'])
190 | 
191 | 
192 | def check_verb_name(_name: str) -> {}:
193 |     query_text = query_check_verb_name_str.format(_name)
194 |     sparql.setQuery(query_prefix + query_text)
195 |     sparql.setReturnFormat(JSON)
196 |     results = sparql.query().convert()
197 |     returnResults = {}
198 |     for result in results["results"]["bindings"]:
199 |         verb = result["entity"]["value"]
200 |         provenance = result["provenance"]["value"]
201 |         returnResults[verb] = provenance
202 |     return returnResults
203 | 
204 | 
205 | def stem_verb(_inputVerb: str) -> str:
206 |     ps = PorterStemmer()
207 |     return ps.stem(_inputVerb)
208 | 
209 | 
210 | #####################################################################
211 | # API  -- ulkb_pb_vn_mappings("enter.01")
212 | # Usage : from a propbank verb, it gives the mappings for it. Right 
213 | #         now it gets 
214 | #####################################################################
215 | # TO DEPRECATE
216 | def ulkb_sem_roles_for_pb(_pbName: str) -> {}:
217 |     return ulkb_pb_vn_mappings(_pbName)
218 | 
219 | 
220 | def ulkb_pb_vn_mappings(_pbName: str) -> {}:
221 |     returnResults = []
222 |     query_text = query_roles_for_pb_str.format(_pbName)
223 |     sparql.setQuery(query_prefix + query_text)
224 |     sparql.setReturnFormat(JSON)
225 |     results = sparql.query().convert()
226 |     returnResults = {}
227 |     # ?verb ?pbSemRole ?vnVerbLabel ?vnParamLabel
228 |     verbResults = {}
229 | 
230 |     for result in results["results"]["bindings"]:
231 |         pbSemRole = result["pbSemRole"]["value"]
232 |         vnVerbLabel = result["vnVerbLabel"]["value"]
233 |         vnVarType = result["vnVarType"]["value"]
234 |         vnVarExpression = result["vnVarExpression"]["value"]
235 |         if vnVerbLabel not in verbResults:
236 |             verbResults[vnVerbLabel] = {}
237 |         curVerb = verbResults[vnVerbLabel]
238 |         if pbSemRole not in curVerb:
239 |             curVerb[pbSemRole] = vnVarType + "(" + vnVarExpression + ")"
240 |         # print(pbSemRole + "\t(" + vnVerbLabel + ", " + vnParamLabel + ")")
241 |     returnResults["info"] = "semantic roles for " + _pbName
242 |     returnResults[_pbName] = verbResults
243 |     return returnResults
244 | 
245 | 
246 | #####################################################################
247 | # API  -- ulkb_sem_roles_for_pb_by_role("enter.01")
248 | # Usage : from a propbank verb, it gives the semantic role mappings
249 | #         for it and returns results after arranging by roles
250 | #####################################################################
251 | def ulkb_sem_roles_for_pb_by_role(_pbName: str) -> {}:
252 |     query_text = query_roles_for_pb_str.format(_pbName)
253 |     sparql.setQuery(query_prefix + query_text)
254 |     sparql.setReturnFormat(JSON)
255 |     results = sparql.query().convert()
256 |     # ?verb ?pbSemRole ?vnVerbLabel ?vnParamLabel
257 |     verbResults = {}
258 |     for result in results["results"]["bindings"]:
259 |         pbSemRole = result["pbSemRole"]["value"]
260 |         pbSemRoleDesc = result["pbSemRoleDesc"]["value"]
261 |         vnVerbLabel = result["vnVerbLabel"]["value"]
262 |         vnVarExpression = result["vnVarExpression"]["value"]
263 |         if pbSemRole not in verbResults:
264 |             verbResults[pbSemRole] = []
265 |         verbResults[pbSemRole].append({
266 |             "vncls": "-".join(vnVerbLabel.split("-")[1:]),
267 |             "vntheta": vnVarExpression.lower(),
268 |             "description": pbSemRoleDesc
269 |         })
270 |     return verbResults
271 | 
272 | 
273 | #####################################################################
274 | # API  -- ulkb_lemma_mappings("put")
275 | # Usage: from a lemma, get a list of mappings from pb to vn that 
276 | #           correspond to that lemma
277 | #####################################################################
278 | # TO DEPRECATE
279 | def ulkb_sem_roles_for_lemma(_lemma: str) -> {}:
280 |     return ulkb_lemma_mappings(_lemma)
281 | 
282 | 
283 | def ulkb_lemma_mappings(_lemma: str) -> {}:
284 |     returnResults = []
285 |     query_text = query_roles_for_lemma_str.format(_lemma)
286 |     sparql.setQuery(query_prefix + query_text)
287 |     sparql.setReturnFormat(JSON)
288 |     results = sparql.query().convert()
289 |     returnResults = {}
290 |     # ?pbSemRole ?vnVerbLabel ?vnVarType ?vnVarExpression
291 |     verbResults = {}
292 | 
293 |     for result in results["results"]["bindings"]:
294 |         pbSemRole = result["pbSemRole"]["value"]
295 |         vnVerbLabel = result["vnVerbLabel"]["value"]
296 | 
297 |         vnVarType = result["vnVarType"]["value"]
298 |         vnVarExpression = result["vnVarExpression"]["value"]
299 |         if vnVerbLabel not in verbResults:
300 |             verbResults[vnVerbLabel] = {}
301 |         curVerb = verbResults[vnVerbLabel]
302 |         if pbSemRole not in curVerb:
303 |             curVerb[pbSemRole] = vnVarType + "(" + vnVarExpression + ")"
304 |         # print(pbSemRole + "\t(" + vnVerbLabel + ", " + vnParamLabel + ")")
305 |     returnResults["info"] = "semantic roles for lemma " + _lemma
306 |     returnResults[_lemma] = verbResults
307 |     return returnResults
308 | 
309 | 
310 | #####################################################################
311 | # API  -- ulkb_sem_predicates_SHORT("escape-51.1-2")
312 | # Usage: from a VN verb, get a list of its semantic predicates. The 
313 | #       predicate expressions are strings 
314 | #####################################################################
315 | def ulkb_sem_predicates_for_vn_SHORT(_verb: str) -> []:
316 |     return ulkb_sem_predicates_SHORT(_verb)
317 | 
318 | 
319 | def ulkb_sem_predicates_SHORT(_verb: str) -> []:
320 |     global query_prefix
321 | 
322 |     query_text = query_sem_predicates_str.format(_verb)
323 |     sparql.setQuery(query_prefix + query_text)
324 |     sparql.setReturnFormat(JSON)
325 |     results = sparql.query().convert()
326 | 
327 |     output = []
328 |     thisFrame = {}
329 |     output.append(thisFrame)
330 |     curPredicateID = ""
331 |     thisPredicate = {}
332 |     for result in results["results"]["bindings"]:
333 |         example = result["example"]["value"]
334 | 
335 |         if 'example' not in thisFrame:
336 |             thisFrame['example'] = example
337 |         if example != thisFrame['example']:
338 |             thisFrame = {}
339 |             output.append(thisFrame)
340 |             thisFrame['example'] = example
341 |             curPredicateID = ""
342 |         thisPredicateID = result["semanticPredicate"]["value"]
343 | 
344 |         if 'predicates' not in thisFrame:
345 |             thisFrame['predicates'] = []
346 |         predicates = thisFrame['predicates']
347 | 
348 |         if thisPredicateID != curPredicateID:
349 |             thisPredicate = {}
350 |             predicates.append(thisPredicate)
351 |             curPredicateID = thisPredicateID
352 | 
353 |         # predicateText = result["predicateText"]["value"]
354 |         predLabel = result["semanticPredicateLabel"]["value"]
355 | 
356 |         thisPredicate['label_predicate'] = predLabel
357 |         if "operator" in result:
358 |             thisPredicate['operator'] = result["operator"]["value"]
359 | 
360 |         if 'params' not in thisPredicate:
361 |             thisPredicate['params'] = []
362 |         params = thisPredicate['params']
363 |         params.append(result["type"]["value"] + "(" + result["expression"]["value"] + ")")
364 |         # thisPredicate['type'] = result["type"]["value"]
365 |         # thisPredicate['value'] = result["value"]["value"]
366 |         # print(example + " " + " " +  thisPredicateID + " " +  predLabel + " " +
367 |         #      result["type"]["value"] + " " + result["value"]["value"] )
368 |     return output
369 | 
370 | 
371 | #####################################################################
372 | # API  -- ulkb_sem_predicates_SHORT("escape-51.1-2")
373 | # Usage: from a VN verb, get a list of its semantic predicates. The 
374 | #       predicate expressions are independent json elements 
375 | #####################################################################
376 | # TO DEPRECATE
377 | def ulkb_sem_predicates_for_vn(_verb: str) -> []:
378 |     return ulkb_sem_predicates_LONG(_verb)
379 | 
380 | 
381 | def ulkb_sem_predicates_LONG(_verb: str) -> []:
382 |     global query_prefix
383 | 
384 |     query_text = query_sem_predicates_str.format(_verb)
385 |     sparql.setQuery(query_prefix + query_text)
386 |     sparql.setReturnFormat(JSON)
387 |     results = sparql.query().convert()
388 | 
389 |     output = []
390 |     thisFrame = {}
391 |     output.append(thisFrame)
392 |     curPredicateID = ""
393 |     thisPredicate = {}
394 |     for result in results["results"]["bindings"]:
395 |         example = result["example"]["value"]
396 | 
397 |         if 'example' not in thisFrame:
398 |             thisFrame['example'] = example
399 |         if example != thisFrame['example']:
400 |             thisFrame = {}
401 |             output.append(thisFrame)
402 |             thisFrame['example'] = example
403 |             curPredicateID = ""
404 |         thisPredicateID = result["semanticPredicate"]["value"]
405 | 
406 |         if 'predicates' not in thisFrame:
407 |             thisFrame['predicates'] = []
408 |         predicates = thisFrame['predicates']
409 | 
410 |         if thisPredicateID != curPredicateID:
411 |             thisPredicate = {}
412 |             predicates.append(thisPredicate)
413 |             curPredicateID = thisPredicateID
414 | 
415 |         # predicateText = result["predicateText"]["value"]
416 |         predLabel = result["semanticPredicateLabel"]["value"]
417 | 
418 |         thisPredicate['label_predicate'] = predLabel
419 |         thisPredicate['id_predicate'] = curPredicateID
420 |         if "operator" in result:
421 |             thisPredicate['operator'] = result["operator"]["value"]
422 | 
423 |         if 'params' not in thisPredicate:
424 |             thisPredicate['params'] = []
425 |         params = thisPredicate['params']
426 |         params.append({'type': result["type"]["value"], 'value': result["expression"]["value"]})
427 |         # thisPredicate['type'] = result["type"]["value"]
428 |         # thisPredicate['value'] = result["value"]["value"]
429 |         # print(example + " " + " " +  thisPredicateID + " " +  predLabel + " " +
430 |         #      result["type"]["value"] + " " + result["value"]["value"] )
431 |     return output
432 | 
433 | 
434 | def ulkb_sem_predicates_for_lemma_SHORT(_verb: str) -> []:
435 |     global query_prefix
436 | 
437 |     query_text = query_sem_predicates_str.format(_verb)
438 |     sparql.setQuery(query_prefix + query_text)
439 |     sparql.setReturnFormat(JSON)
440 |     results = sparql.query().convert()
441 | 
442 |     output = []
443 |     thisFrame = {}
444 |     output.append(thisFrame)
445 |     curPredicateID = ""
446 |     thisPredicate = {}
447 |     for result in results["results"]["bindings"]:
448 |         example = result["example"]["value"]
449 | 
450 |         if 'example' not in thisFrame:
451 |             thisFrame['example'] = example
452 |         if example != thisFrame['example']:
453 |             thisFrame = {}
454 |             output.append(thisFrame)
455 |             thisFrame['example'] = example
456 |             curPredicateID = ""
457 |         thisPredicateID = result["semanticPredicate"]["value"]
458 | 
459 |         if 'predicates' not in thisFrame:
460 |             thisFrame['predicates'] = []
461 |         predicates = thisFrame['predicates']
462 | 
463 |         if thisPredicateID != curPredicateID:
464 |             thisPredicate = {}
465 |             predicates.append(thisPredicate)
466 |             curPredicateID = thisPredicateID
467 | 
468 |         # predicateText = result["predicateText"]["value"]
469 |         predLabel = result["semanticPredicateLabel"]["value"]
470 | 
471 |         thisPredicate['label_predicate'] = predLabel
472 |         if "operator" in result:
473 |             thisPredicate['operator'] = result["operator"]["value"]
474 | 
475 |         if 'params' not in thisPredicate:
476 |             thisPredicate['params'] = []
477 |         params = thisPredicate['params']
478 |         params.append(result["type"]["value"] + "(" + result["expression"]["value"] + ")")
479 |         # thisPredicate['type'] = result["type"]["value"]
480 |         # thisPredicate['value'] = result["value"]["value"]
481 |         # print(example + " " + " " +  thisPredicateID + " " +  predLabel + " " +
482 |         #      result["type"]["value"] + " " + result["value"]["value"] )
483 |     return output
484 | 
485 | 
486 | if __name__ == '__main__':
487 |     # print(str(check_verb_name("MIMI")))
488 | 
489 |     # TEST SEMANTIC ROLES
490 |     # output = ulkb_sem_roles_for_pb("put.01")
491 |     # print(str(output))
492 |     # TEST SEMANTIC PREDICATES
493 |     # output = ulkb_sem_predicates_for("escape-51.1-1-1")
494 |     # print(str(output))
495 | 
496 |     # output = ulkb_sem_predicates_for_SHORT("escape-51.1-1-1")
497 |     # print(str(output))
498 |     # TEST API WITH LEMMAS
499 |     print("\nCalling ulkb_sem_roles_for_lemma for make_out ...")
500 |     output = ulkb_sem_roles_for_lemma("make_out")
501 |     print(str(output))
502 | 
503 |     print("\nCalling ulkb_sem_roles_for_pb_by_role for carry.01 ...")
504 |     output = ulkb_sem_roles_for_pb_by_role("carry.01")
505 |     print(str(output))
506 | 


--------------------------------------------------------------------------------
/amr_verbnet_semantics/test/test_ground_text_to_verbnet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "parsing ...\n",
 13 |       "Loading StanfordCoreNLP models ...\n",
 14 |       "Models created ...\n",
 15 |       "\n",
 16 |       "text:\n",
 17 |       " The dresser is made out of maple carefully finished with Danish oil.\n",
 18 |       "\n",
 19 |       "sentences:\n",
 20 |       "==> The dresser is made out of maple carefully finished with Danish oil.\n",
 21 |       "\n",
 22 |       "coreference:\n",
 23 |       " []\n",
 24 |       "\n",
 25 |       "ori_amr_cal: [make-01(m), dresser(d), maple(m2), finish-01(f), oil(o), country(c2), name(n), careful(c), make-01.arg1(m, d), make-01.arg2(m, m2), finish-01.arg1(f, m2), finish-01.arg2(f, o), mod(o, c2), name(c2, n), manner(f, c)]\n",
 26 |       "arg_map: {'make.01': {'m': {':ARG1': 'd', ':ARG2': 'm2'}}, 'finish.01': {'f': {':ARG1': 'm2', ':ARG2': 'o', ':manner': 'c'}}, 'oil': {'o': {':mod': 'c2'}}, 'country': {'c2': {':name': 'n'}}}\n",
 27 |       "query_propbank_roles for propbank_id make.01\n",
 28 |       "query_propbank_roles for propbank_id finish.01\n",
 29 |       "role_mappings: {'make.01': {':ARG0': [{'vncls': '26.1-1', 'vntheta': 'agent', 'description': 'creator'}], ':ARG1': [{'vncls': '26.1-1', 'vntheta': 'product', 'description': 'creation'}], ':ARG2': [{'vncls': '26.1-1', 'vntheta': 'material', 'description': 'created-from, thing changed'}], ':ARG3': [{'vncls': '26.1-1', 'vntheta': 'beneficiary', 'description': 'benefactive'}]}, 'finish.01': {':ARG0': [{'vncls': '55.4-1', 'vntheta': 'agent', 'description': 'Intentional agent'}], ':ARG1': [{'vncls': '55.4-1', 'vntheta': 'theme', 'description': 'Thing finishing'}]}}\n",
 30 |       "\n",
 31 |       "amr_cal: [make-01(m), dresser(d), maple(m2), finish-01(f), oil(o), country(c2), name(n), careful(c), make-01.arg1(m, d), make-01.arg2(m, m2), finish-01.arg1(f, m2), finish-01.arg2(f, o), mod(o, c2), name(c2, n), manner(f, c)]\n",
 32 |       "\n",
 33 |       "sem_cal: {'make.01': {'build-26.1-1': [NOT(BE(Product, e1)), DO(e2, Agent), BE(e3, Product), MADE_OF(e3, Product, Material), CAUSE(e2, e3), COST(E, Product, Asset)]}, 'finish.01': {'stop-55.4-1-1': [DO(e1, Agent), CAUSE(e2, e1), END(E, Theme)]}}\n",
 34 |       "\n",
 35 |       "grounded_stmt: {'make.01': {'build-26.1-1': [[NOT(BE(d, e1)), DO(e2, Agent), BE(e3, d), MADE_OF(e3, d, m2), CAUSE(e2, e3), COST(E, d, Asset)]]}, 'finish.01': {'stop-55.4-1-1': [[DO(e1, Agent), CAUSE(e2, e1), END(E, m2)]]}}\n",
 36 |       "finish with 36.491 seconds\n"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "import time\n",
 42 |     "from amr_verbnet_semantics.core.amr_verbnet_enhance import ground_text_to_verbnet\n",
 43 |     "\n",
 44 |     "start = time.time()\n",
 45 |     "ret = ground_text_to_verbnet(\"The dresser is made out of maple carefully finished with Danish oil.\")\n",
 46 |     "print(f'finish with {time.time()-start:.3f} seconds')"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 3,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "parsing ...\n",
 59 |       "\n",
 60 |       "text:\n",
 61 |       " The dresser is made out of maple carefully finished with Danish oil.\n",
 62 |       "\n",
 63 |       "sentences:\n",
 64 |       "==> The dresser is made out of maple carefully finished with Danish oil.\n",
 65 |       "\n",
 66 |       "coreference:\n",
 67 |       " []\n",
 68 |       "\n",
 69 |       "ori_amr_cal: [make-01(m), dresser(d), maple(m2), finish-01(f), oil(o), country(c2), name(n), careful(c), make-01.arg1(m, d), make-01.arg2(m, m2), finish-01.arg1(f, m2), finish-01.arg2(f, o), mod(o, c2), name(c2, n), manner(f, c)]\n",
 70 |       "arg_map: {'make.01': {'m': {':ARG1': 'd', ':ARG2': 'm2'}}, 'finish.01': {'f': {':ARG1': 'm2', ':ARG2': 'o', ':manner': 'c'}}, 'oil': {'o': {':mod': 'c2'}}, 'country': {'c2': {':name': 'n'}}}\n",
 71 |       "query_propbank_roles for propbank_id make.01\n",
 72 |       "query_propbank_roles for propbank_id finish.01\n",
 73 |       "role_mappings: {'make.01': {':ARG0': [{'vncls': '26.1-1', 'vntheta': 'agent', 'description': 'creator'}], ':ARG1': [{'vncls': '26.1-1', 'vntheta': 'product', 'description': 'creation'}], ':ARG2': [{'vncls': '26.1-1', 'vntheta': 'material', 'description': 'created-from, thing changed'}], ':ARG3': [{'vncls': '26.1-1', 'vntheta': 'beneficiary', 'description': 'benefactive'}]}, 'finish.01': {':ARG0': [{'vncls': '55.4-1', 'vntheta': 'agent', 'description': 'Intentional agent'}], ':ARG1': [{'vncls': '55.4-1', 'vntheta': 'theme', 'description': 'Thing finishing'}]}}\n",
 74 |       "\n",
 75 |       "amr_cal: [make-01(m), dresser(d), maple(m2), finish-01(f), oil(o), country(c2), name(n), careful(c), make-01.arg1(m, d), make-01.arg2(m, m2), finish-01.arg1(f, m2), finish-01.arg2(f, o), mod(o, c2), name(c2, n), manner(f, c)]\n",
 76 |       "\n",
 77 |       "sem_cal: {'make.01': {'build-26.1-1': [NOT(BE(Product, e1)), DO(e2, Agent), BE(e3, Product), MADE_OF(e3, Product, Material), CAUSE(e2, e3), COST(E, Product, Asset)]}, 'finish.01': {'stop-55.4-1-1': [DO(e1, Agent), CAUSE(e2, e1), END(E, Theme)]}}\n",
 78 |       "\n",
 79 |       "grounded_stmt: {'make.01': {'build-26.1-1': [[NOT(BE(d, e1)), DO(e2, Agent), BE(e3, d), MADE_OF(e3, d, m2), CAUSE(e2, e3), COST(E, d, Asset)]]}, 'finish.01': {'stop-55.4-1-1': [[DO(e1, Agent), CAUSE(e2, e1), END(E, m2)]]}}\n",
 80 |       "finish with 1.138 seconds\n",
 81 |       "******************************\n",
 82 |       "{'coreference': [], 'sentence_parses': [{'text': 'The dresser is made out of maple carefully finished with Danish oil.', 'amr': '# ::tok The dresser is made out of maple carefully finished with Danish oil . <ROOT>\\n# ::node\\t1\\tdresser\\t1-2\\n# ::node\\t2\\tmake-01\\t3-4\\n# ::node\\t3\\tmaple\\t6-7\\n# ::node\\t4\\tcareful\\t7-8\\n# ::node\\t5\\tfinish-01\\t8-9\\n# ::node\\t6\\tcountry\\t10-11\\n# ::node\\t8\\toil\\t11-12\\n# ::node\\t9\\t\"Denmark\"\\t10-11\\n# ::node\\t11\\tname\\t10-11\\n# ::root\\t2\\tmake-01\\n# ::edge\\tmake-01\\tARG1\\tdresser\\t2\\t1\\t\\n# ::edge\\tmake-01\\tARG2\\tmaple\\t2\\t3\\t\\n# ::edge\\tfinish-01\\tmanner\\tcareful\\t5\\t4\\t\\n# ::edge\\tmaple\\tARG1-of\\tfinish-01\\t3\\t5\\t\\n# ::edge\\toil\\tmod\\tcountry\\t8\\t6\\t\\n# ::edge\\tfinish-01\\tARG2\\toil\\t5\\t8\\t\\n# ::edge\\tcountry\\tname\\tname\\t6\\t11\\t\\n# ::edge\\tname\\top1\\t\"Denmark\"\\t11\\t9\\t\\n# ::short\\t{1: \\'d\\', 2: \\'m\\', 3: \\'m2\\', 4: \\'c\\', 5: \\'f\\', 6: \\'c2\\', 8: \\'o\\', 9: \\'x0\\', 11: \\'n\\'}\\t\\n(m / make-01\\n      :ARG1 (d / dresser)\\n      :ARG2 (m2 / maple\\n            :ARG1-of (f / finish-01\\n                  :ARG2 (o / oil\\n                        :mod (c2 / country\\n                              :name (n / name\\n                                    :op1 \"Denmark\")))\\n                  :manner (c / careful))))\\n\\n', 'pb_vn_mappings': {'make.01': [{'mapping': 'build-26.1-1', 'source': 'verbnet3.4'}], 'finish.01': [{'mapping': 'stop-55.4-1-1', 'source': 'verbnet3.4'}]}, 'role_mappings': {'make.01': {':ARG0': [{'vncls': '26.1-1', 'vntheta': 'agent', 'description': 'creator'}], ':ARG1': [{'vncls': '26.1-1', 'vntheta': 'product', 'description': 'creation'}], ':ARG2': [{'vncls': '26.1-1', 'vntheta': 'material', 'description': 'created-from, thing changed'}], ':ARG3': [{'vncls': '26.1-1', 'vntheta': 'beneficiary', 'description': 'benefactive'}]}, 'finish.01': {':ARG0': [{'vncls': '55.4-1', 'vntheta': 'agent', 'description': 'Intentional agent'}], ':ARG1': [{'vncls': '55.4-1', 'vntheta': 'theme', 'description': 'Thing finishing'}]}}, 'amr_cal': [{'predicate': 'make-01', 'arguments': ['m'], 'is_negative': False}, {'predicate': 'dresser', 'arguments': ['d'], 'is_negative': False}, {'predicate': 'maple', 'arguments': ['m2'], 'is_negative': False}, {'predicate': 'finish-01', 'arguments': ['f'], 'is_negative': False}, {'predicate': 'oil', 'arguments': ['o'], 'is_negative': False}, {'predicate': 'country', 'arguments': ['c2'], 'is_negative': False}, {'predicate': 'name', 'arguments': ['n'], 'is_negative': False}, {'predicate': 'careful', 'arguments': ['c'], 'is_negative': False}, {'predicate': 'make-01.arg1', 'arguments': ['m', 'd'], 'is_negative': False}, {'predicate': 'make-01.arg2', 'arguments': ['m', 'm2'], 'is_negative': False}, {'predicate': 'finish-01.arg1', 'arguments': ['f', 'm2'], 'is_negative': False}, {'predicate': 'finish-01.arg2', 'arguments': ['f', 'o'], 'is_negative': False}, {'predicate': 'mod', 'arguments': ['o', 'c2'], 'is_negative': False}, {'predicate': 'name', 'arguments': ['c2', 'n'], 'is_negative': False}, {'predicate': 'manner', 'arguments': ['f', 'c'], 'is_negative': False}], 'sem_cal': {'make.01': {'build-26.1-1': [{'predicate': 'BE', 'arguments': ['Product', 'e1'], 'is_negative': True}, {'predicate': 'DO', 'arguments': ['e2', 'Agent'], 'is_negative': False}, {'predicate': 'BE', 'arguments': ['e3', 'Product'], 'is_negative': False}, {'predicate': 'MADE_OF', 'arguments': ['e3', 'Product', 'Material'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e3'], 'is_negative': False}, {'predicate': 'COST', 'arguments': ['E', 'Product', 'Asset'], 'is_negative': False}]}, 'finish.01': {'stop-55.4-1-1': [{'predicate': 'DO', 'arguments': ['e1', 'Agent'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e1'], 'is_negative': False}, {'predicate': 'END', 'arguments': ['E', 'Theme'], 'is_negative': False}]}}, 'grounded_stmt': {'make.01': {'build-26.1-1': [[{'predicate': 'BE', 'arguments': ['d', 'e1'], 'is_negative': True}, {'predicate': 'DO', 'arguments': ['e2', 'Agent'], 'is_negative': False}, {'predicate': 'BE', 'arguments': ['e3', 'd'], 'is_negative': False}, {'predicate': 'MADE_OF', 'arguments': ['e3', 'd', 'm2'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e3'], 'is_negative': False}, {'predicate': 'COST', 'arguments': ['E', 'd', 'Asset'], 'is_negative': False}]]}, 'finish.01': {'stop-55.4-1-1': [[{'predicate': 'DO', 'arguments': ['e1', 'Agent'], 'is_negative': False}, {'predicate': 'CAUSE', 'arguments': ['e2', 'e1'], 'is_negative': False}, {'predicate': 'END', 'arguments': ['E', 'm2'], 'is_negative': False}]]}}, 'amr_cal_str': '[make-01(m), dresser(d), maple(m2), finish-01(f), oil(o), country(c2), name(n), careful(c), make-01.arg1(m, d), make-01.arg2(m, m2), finish-01.arg1(f, m2), finish-01.arg2(f, o), mod(o, c2), name(c2, n), manner(f, c)]', 'sem_cal_str': \"{'make.01': {'build-26.1-1': [NOT(BE(Product, e1)), DO(e2, Agent), BE(e3, Product), MADE_OF(e3, Product, Material), CAUSE(e2, e3), COST(E, Product, Asset)]}, 'finish.01': {'stop-55.4-1-1': [DO(e1, Agent), CAUSE(e2, e1), END(E, Theme)]}}\", 'grounded_stmt_str': \"{'make.01': {'build-26.1-1': [[NOT(BE(d, e1)), DO(e2, Agent), BE(e3, d), MADE_OF(e3, d, m2), CAUSE(e2, e3), COST(E, d, Asset)]]}, 'finish.01': {'stop-55.4-1-1': [[DO(e1, Agent), CAUSE(e2, e1), END(E, m2)]]}}\"}]}\n"
 83 |      ]
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "start = time.time()\n",
 88 |     "ret = ground_text_to_verbnet(\"The dresser is made out of maple carefully finished with Danish oil.\")\n",
 89 |     "print(f'finish with {time.time()-start:.3f} seconds')\n",
 90 |     "\n",
 91 |     "print('***' * 10)\n",
 92 |     "print(ret)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 4,
 98 |    "metadata": {},
 99 |    "outputs": [
100 |     {
101 |      "name": "stdout",
102 |      "output_type": "stream",
103 |      "text": [
104 |       "parsing ...\n",
105 |       "\n",
106 |       "text:\n",
107 |       " You enter a kitchen.\n",
108 |       "\n",
109 |       "sentences:\n",
110 |       "==> You enter a kitchen.\n",
111 |       "\n",
112 |       "coreference:\n",
113 |       " []\n",
114 |       "\n",
115 |       "ori_amr_cal: [enter-01(e), you(y), kitchen(k), enter-01.arg0(e, y), enter-01.arg1(e, k)]\n",
116 |       "arg_map: {'enter.01': {'e': {':ARG0': 'y', ':ARG1': 'k'}}}\n",
117 |       "query_propbank_roles for propbank_id enter.01\n",
118 |       "role_mappings: {'enter.01': {':ARG0': [{'vncls': '51.1-2', 'vntheta': 'theme', 'description': 'entity entering'}], ':ARG1': [{'vncls': '51.1-2', 'vntheta': 'destination', 'description': 'place or thing entered'}]}}\n",
119 |       "\n",
120 |       "amr_cal: [enter-01(e), you(y), kitchen(k), enter-01.arg0(e, y), enter-01.arg1(e, k)]\n",
121 |       "\n",
122 |       "sem_cal: {'enter.01': {'escape-51.1-1-2': [HAS_LOCATION(e1, Theme, ?Initial_Location), MOTION(ë2, Theme, ?Trajectory), NOT(HAS_LOCATION(e2, Theme, ?Initial_Location)), HAS_LOCATION(e3, Theme, Destination)]}}\n",
123 |       "\n",
124 |       "grounded_stmt: {'enter.01': {'escape-51.1-1-2': [[HAS_LOCATION(e1, y, ?Initial_Location), MOTION(ë2, y, ?Trajectory), NOT(HAS_LOCATION(e2, y, ?Initial_Location)), HAS_LOCATION(e3, y, k)]]}}\n",
125 |       "finish with 0.618 seconds\n",
126 |       "******************************\n",
127 |       "{'coreference': [], 'sentence_parses': [{'text': 'You enter a kitchen.', 'amr': \"# ::tok You enter a kitchen . <ROOT>\\n# ::node\\t1\\tyou\\t0-1\\n# ::node\\t2\\tenter-01\\t1-2\\n# ::node\\t3\\tkitchen\\t3-4\\n# ::root\\t2\\tenter-01\\n# ::edge\\tenter-01\\tARG0\\tyou\\t2\\t1\\t\\n# ::edge\\tenter-01\\tARG1\\tkitchen\\t2\\t3\\t\\n# ::short\\t{1: 'y', 2: 'e', 3: 'k'}\\t\\n(e / enter-01\\n      :ARG0 (y / you)\\n      :ARG1 (k / kitchen))\\n\\n\", 'pb_vn_mappings': {'enter.01': [{'mapping': 'escape-51.1-1-2', 'source': 'verbnet3.4'}]}, 'role_mappings': {'enter.01': {':ARG0': [{'vncls': '51.1-2', 'vntheta': 'theme', 'description': 'entity entering'}], ':ARG1': [{'vncls': '51.1-2', 'vntheta': 'destination', 'description': 'place or thing entered'}]}}, 'amr_cal': [{'predicate': 'enter-01', 'arguments': ['e'], 'is_negative': False}, {'predicate': 'you', 'arguments': ['y'], 'is_negative': False}, {'predicate': 'kitchen', 'arguments': ['k'], 'is_negative': False}, {'predicate': 'enter-01.arg0', 'arguments': ['e', 'y'], 'is_negative': False}, {'predicate': 'enter-01.arg1', 'arguments': ['e', 'k'], 'is_negative': False}], 'sem_cal': {'enter.01': {'escape-51.1-1-2': [{'predicate': 'HAS_LOCATION', 'arguments': ['e1', 'Theme', '?Initial_Location'], 'is_negative': False}, {'predicate': 'MOTION', 'arguments': ['ë2', 'Theme', '?Trajectory'], 'is_negative': False}, {'predicate': 'HAS_LOCATION', 'arguments': ['e2', 'Theme', '?Initial_Location'], 'is_negative': True}, {'predicate': 'HAS_LOCATION', 'arguments': ['e3', 'Theme', 'Destination'], 'is_negative': False}]}}, 'grounded_stmt': {'enter.01': {'escape-51.1-1-2': [[{'predicate': 'HAS_LOCATION', 'arguments': ['e1', 'y', '?Initial_Location'], 'is_negative': False}, {'predicate': 'MOTION', 'arguments': ['ë2', 'y', '?Trajectory'], 'is_negative': False}, {'predicate': 'HAS_LOCATION', 'arguments': ['e2', 'y', '?Initial_Location'], 'is_negative': True}, {'predicate': 'HAS_LOCATION', 'arguments': ['e3', 'y', 'k'], 'is_negative': False}]]}}, 'amr_cal_str': '[enter-01(e), you(y), kitchen(k), enter-01.arg0(e, y), enter-01.arg1(e, k)]', 'sem_cal_str': \"{'enter.01': {'escape-51.1-1-2': [HAS_LOCATION(e1, Theme, ?Initial_Location), MOTION(ë2, Theme, ?Trajectory), NOT(HAS_LOCATION(e2, Theme, ?Initial_Location)), HAS_LOCATION(e3, Theme, Destination)]}}\", 'grounded_stmt_str': \"{'enter.01': {'escape-51.1-1-2': [[HAS_LOCATION(e1, y, ?Initial_Location), MOTION(ë2, y, ?Trajectory), NOT(HAS_LOCATION(e2, y, ?Initial_Location)), HAS_LOCATION(e3, y, k)]]}}\"}]}\n"
128 |      ]
129 |     }
130 |    ],
131 |    "source": [
132 |     "start = time.time()\n",
133 |     "ret = ground_text_to_verbnet(\"You enter a kitchen.\")\n",
134 |     "print(f'finish with {time.time()-start:.3f} seconds')\n",
135 |     "\n",
136 |     "print('***' * 10)\n",
137 |     "print(ret)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 5,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "name": "stdout",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "parsing ...\n",
150 |       "\n",
151 |       "text:\n",
152 |       " You see a dishwasher and a fridge.\n",
153 |       "\n",
154 |       "sentences:\n",
155 |       "==> You see a dishwasher and a fridge.\n",
156 |       "\n",
157 |       "coreference:\n",
158 |       " []\n",
159 |       "\n",
160 |       "ori_amr_cal: [see-01(s), you(y), and(a), dishwasher(d), fridge(f), see-01.arg0(s, y), see-01.arg1(s, a), and.op1(a, d), and.op2(a, f)]\n",
161 |       "arg_map: {'see.01': {'s': {':ARG0': 'y', ':ARG1': 'a'}}, 'and': {'a': {':op1': 'd', ':op2': 'f'}}}\n",
162 |       "query_propbank_roles for propbank_id see.01\n",
163 |       "role_mappings: {'see.01': {':ARG0': [{'vncls': '29.2-1', 'vntheta': 'agent', 'description': 'viewer'}, {'vncls': '30.1-1', 'vntheta': 'experiencer', 'description': 'viewer'}, {'vncls': '29.9-1-1', 'vntheta': 'agent', 'description': 'viewer'}], ':ARG1': [{'vncls': '29.2-1', 'vntheta': 'theme', 'description': 'thing viewed'}, {'vncls': '30.1-1', 'vntheta': 'stimulus', 'description': 'thing viewed'}, {'vncls': '29.9-1-1', 'vntheta': 'theme', 'description': 'thing viewed'}], ':ARG2': [{'vncls': '29.2-1', 'vntheta': 'attribute', 'description': 'attribute of arg1, further description'}, {'vncls': '29.9-1-1', 'vntheta': 'attribute', 'description': 'attribute of arg1, further description'}]}}\n",
164 |       "\n",
165 |       "amr_cal: [see-01(s), you(y), dishwasher(d), fridge(f), see-01.arg0(s, y), see-01.arg1(s, d), see-01.arg1(s, f)]\n",
166 |       "\n",
167 |       "sem_cal: {'see.01': {'see-30.1-1': [PERCEIVE(e1, Experiencer, Stimulus), IN_REACTION_TO(e1, Stimulus)]}}\n",
168 |       "\n",
169 |       "grounded_stmt: {'see.01': {'see-30.1-1': [[PERCEIVE(e1, y, d), IN_REACTION_TO(e1, d), PERCEIVE(e1, y, f), IN_REACTION_TO(e1, f)]]}}\n",
170 |       "finish with 0.805 seconds\n",
171 |       "******************************\n",
172 |       "{'coreference': [], 'sentence_parses': [{'text': 'You see a dishwasher and a fridge.', 'amr': \"# ::tok You see a dishwasher and a fridge . <ROOT>\\n# ::node\\t1\\tyou\\t0-1\\n# ::node\\t2\\tsee-01\\t1-2\\n# ::node\\t3\\tdishwasher\\t3-4\\n# ::node\\t4\\tand\\t4-5\\n# ::node\\t5\\tfridge\\t6-7\\n# ::root\\t2\\tsee-01\\n# ::edge\\tsee-01\\tARG0\\tyou\\t2\\t1\\t\\n# ::edge\\tand\\top1\\tdishwasher\\t4\\t3\\t\\n# ::edge\\tsee-01\\tARG1\\tand\\t2\\t4\\t\\n# ::edge\\tand\\top2\\tfridge\\t4\\t5\\t\\n# ::short\\t{1: 'y', 2: 's', 3: 'd', 4: 'a', 5: 'f'}\\t\\n(s / see-01\\n      :ARG0 (y / you)\\n      :ARG1 (a / and\\n            :op1 (d / dishwasher)\\n            :op2 (f / fridge)))\\n\\n\", 'pb_vn_mappings': {'see.01': [{'mapping': 'see-30.1-1', 'source': 'verbnet3.4'}]}, 'role_mappings': {'see.01': {':ARG0': [{'vncls': '29.2-1', 'vntheta': 'agent', 'description': 'viewer'}, {'vncls': '30.1-1', 'vntheta': 'experiencer', 'description': 'viewer'}, {'vncls': '29.9-1-1', 'vntheta': 'agent', 'description': 'viewer'}], ':ARG1': [{'vncls': '29.2-1', 'vntheta': 'theme', 'description': 'thing viewed'}, {'vncls': '30.1-1', 'vntheta': 'stimulus', 'description': 'thing viewed'}, {'vncls': '29.9-1-1', 'vntheta': 'theme', 'description': 'thing viewed'}], ':ARG2': [{'vncls': '29.2-1', 'vntheta': 'attribute', 'description': 'attribute of arg1, further description'}, {'vncls': '29.9-1-1', 'vntheta': 'attribute', 'description': 'attribute of arg1, further description'}]}}, 'amr_cal': [{'predicate': 'see-01', 'arguments': ['s'], 'is_negative': False}, {'predicate': 'you', 'arguments': ['y'], 'is_negative': False}, {'predicate': 'dishwasher', 'arguments': ['d'], 'is_negative': False}, {'predicate': 'fridge', 'arguments': ['f'], 'is_negative': False}, {'predicate': 'see-01.arg0', 'arguments': ['s', 'y'], 'is_negative': False}, {'predicate': 'see-01.arg1', 'arguments': ['s', 'd'], 'is_negative': False}, {'predicate': 'see-01.arg1', 'arguments': ['s', 'f'], 'is_negative': False}], 'sem_cal': {'see.01': {'see-30.1-1': [{'predicate': 'PERCEIVE', 'arguments': ['e1', 'Experiencer', 'Stimulus'], 'is_negative': False}, {'predicate': 'IN_REACTION_TO', 'arguments': ['e1', 'Stimulus'], 'is_negative': False}]}}, 'grounded_stmt': {'see.01': {'see-30.1-1': [[{'predicate': 'PERCEIVE', 'arguments': ['e1', 'y', 'd'], 'is_negative': False}, {'predicate': 'IN_REACTION_TO', 'arguments': ['e1', 'd'], 'is_negative': False}, {'predicate': 'PERCEIVE', 'arguments': ['e1', 'y', 'f'], 'is_negative': False}, {'predicate': 'IN_REACTION_TO', 'arguments': ['e1', 'f'], 'is_negative': False}]]}}, 'amr_cal_str': '[see-01(s), you(y), dishwasher(d), fridge(f), see-01.arg0(s, y), see-01.arg1(s, d), see-01.arg1(s, f)]', 'sem_cal_str': \"{'see.01': {'see-30.1-1': [PERCEIVE(e1, Experiencer, Stimulus), IN_REACTION_TO(e1, Stimulus)]}}\", 'grounded_stmt_str': \"{'see.01': {'see-30.1-1': [[PERCEIVE(e1, y, d), IN_REACTION_TO(e1, d), PERCEIVE(e1, y, f), IN_REACTION_TO(e1, f)]]}}\"}]}\n"
173 |      ]
174 |     }
175 |    ],
176 |    "source": [
177 |     "start = time.time()\n",
178 |     "ret = ground_text_to_verbnet(\"You see a dishwasher and a fridge.\")\n",
179 |     "print(f'finish with {time.time()-start:.3f} seconds')\n",
180 |     "\n",
181 |     "print('***' * 10)\n",
182 |     "print(ret)"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": []
191 |   }
192 |  ],
193 |  "metadata": {
194 |   "kernelspec": {
195 |    "display_name": "venv",
196 |    "language": "python",
197 |    "name": "venv"
198 |   },
199 |   "language_info": {
200 |    "codemirror_mode": {
201 |     "name": "ipython",
202 |     "version": 3
203 |    },
204 |    "file_extension": ".py",
205 |    "mimetype": "text/x-python",
206 |    "name": "python",
207 |    "nbconvert_exporter": "python",
208 |    "pygments_lexer": "ipython3",
209 |    "version": "3.7.11"
210 |   }
211 |  },
212 |  "nbformat": 4,
213 |  "nbformat_minor": 5
214 | }
215 | 


--------------------------------------------------------------------------------