├── __init__.py ├── amie_plus.jar ├── __pycache__ ├── predict.cpython-37.pyc ├── __init__.cpython-37.pyc ├── aser_table2tsv.cpython-37.pyc ├── mine_and_sort.cpython-37.pyc └── relation_senses.cpython-37.pyc ├── relation_senses.py ├── aser_table2tsv.py ├── README.md ├── result_testing.py ├── .gitignore ├── pipeline.py ├── mine_and_sort.py └── predict.py /__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /amie_plus.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JefferyO/ASER_AMIE/HEAD/amie_plus.jar -------------------------------------------------------------------------------- /__pycache__/predict.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JefferyO/ASER_AMIE/HEAD/__pycache__/predict.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JefferyO/ASER_AMIE/HEAD/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/aser_table2tsv.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JefferyO/ASER_AMIE/HEAD/__pycache__/aser_table2tsv.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/mine_and_sort.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JefferyO/ASER_AMIE/HEAD/__pycache__/mine_and_sort.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/relation_senses.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JefferyO/ASER_AMIE/HEAD/__pycache__/relation_senses.cpython-37.pyc -------------------------------------------------------------------------------- /relation_senses.py: -------------------------------------------------------------------------------- 1 | relation_senses = [ 2 | 'Precedence', 'Succession', 'Synchronous', 3 | 'Reason', 'Result', 4 | 'Condition', 'Contrast', 'Concession', 5 | 'Conjunction', 'Instantiation', 'Restatement', 'ChosenAlternative', 'Alternative', 'Exception', 6 | 'Co_Occurrence'] 7 | -------------------------------------------------------------------------------- /aser_table2tsv.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import csv 3 | from relation_senses import relation_senses 4 | 5 | 6 | def aser2tsv(tsv_path, db_path): 7 | with open(tsv_path, 'w', newline='') as f: 8 | conn = sqlite3.connect(db_path) 9 | tsv_writer = csv.writer(f, delimiter='\t') 10 | for x in conn.execute('SELECT * FROM RELATIONS;'): 11 | # 0: _id 12 | # 1: event1_id 13 | # 2: event2_id 14 | # 3-18: relation_senses 15 | for idx in range(3, 3+14): 16 | # exclude the Co_Occurrence 17 | count = int(float(x[idx]) + 0.5) 18 | row = [x[1], relation_senses[idx-3], x[2]] 19 | for i in range(count): 20 | tsv_writer.writerow(row) 21 | conn.close() 22 | return tsv_path 23 | ''' 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument('--db_path', type=str, help='/path/to/KG.db') 26 | parser.add_argument('--tsv_path', type=str, help='/path/to/KG.db') 27 | args = parser.parse_args() 28 | ''' 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ASER_AMIE 2 | A Python pipeline for using AMIE+ to mine logic rules and instantiate new facts. 3 | Initially designed for mining new relations for [ASER](https://hkust-knowcomp.github.io/ASER/) 4 | ## Settings and Dependencies: 5 | * Python 3.7 6 | * [AMIE+](https://www.mpi-inf.mpg.de/departments/databases-and-information-systems/research/yago-naga/amie/) 7 | ## Usage: 8 | * Show help message and descriptions of arguments 9 | ```Bash 10 | python pipeline.py -h 11 | ``` 12 | * Run the whole pipeline (Only for [ASER Knowledge Graph](https://github.com/HKUST-KnowComp/ASER)): 13 | ```Bash 14 | python pipeline.py -wp --row_triples /path/to/row_triples.tsv --db_path DB_PATH /path/to/KG.db --amie_plus_path /path/to/AMIE+.jar --new_prediction_path /path/to/new_prediction.tsv 15 | ``` 16 | With this command, the pipeline will first extract RDF triples from ASER format database into .tsv file. Then it will run AMIE+ on the .tsv file to mine all logical rules within preset threshold. Finally it will instantiate new RDF facts by grounding the mined rules to orignal triples. 17 | 18 | * Run pipeline for other knowledge base: 19 | 20 | * Mine logical rules with AMIE+: 21 | ```Bash 22 | python pipeline.py -m --row --row_triples /path/to/row_triples.tsv --amie_plus_path /path/to/AMIE+.jar 23 | ``` 24 | The mined rules will be sorted according to the PCA and STD confidence repectively and saved in "pca_sorted_rule.tsv" and "std_sorted_rule.tsv" in the module directory. 25 | 26 | * Predict/Instantiate new facts with mined/provided rules: 27 | ```Bash 28 | python pipeline.py -p --rule_path /path/to/rule_you_provide.tsv --row_triples /path/to/row_triples.tsv ----new_prediction_path /path/to/new_prediction.tsv 29 | ``` 30 | -------------------------------------------------------------------------------- /result_testing.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | with open('test_predict_result.csv', 'r') as f: 4 | with open('triples_with_bootstrapping.tsv', 'r') as t: 5 | rdt = csv.reader(t, delimiter='\t') 6 | original_b1 = [] 7 | original_b2 = [] 8 | for rowt in rdt: 9 | if rowt[1] == 'Precedence': 10 | original_b1.append(rowt) 11 | if rowt[1] == 'Synchronous': 12 | original_b2.append(rowt) 13 | standard_var_match = [0, 1, 2, 1, 2, 0] 14 | standard_relation = ['Precedence', 'Synchronous', 'Synchronous'] 15 | match_var = [0, 1, 2, 2, 2, 2] 16 | rule_var = [] 17 | check_relation = True 18 | check_var_match = True 19 | check_exist = True 20 | rdf = csv.reader(f, delimiter='\t') 21 | count = 0 22 | next(rdf, None) 23 | for row in rdf: 24 | body1 = row[0:3] 25 | body2 = row[3:6] 26 | head = row[6:9] 27 | rule_var = [body1[0], body1[2], body2[0], body2[2], head[0], head[2]] 28 | 29 | for i in range(2, 6): 30 | if rule_var[i] == rule_var[0]: 31 | match_var[i] = 0 32 | if rule_var[i] == rule_var[1]: 33 | match_var[i] = 1 34 | 35 | if body1[1] != standard_relation[0] or body2[1] != standard_relation[1] or head[1] != standard_relation[2]: 36 | check_relation = False 37 | break 38 | 39 | elif match_var != standard_var_match: 40 | check_relation = False 41 | break 42 | 43 | elif (not(body1 in original_b1)) or (not(body2 in original_b2)): 44 | check_exist = False 45 | 46 | else: 47 | print('correct') 48 | count += 1 49 | 50 | if not(check_relation and check_var_match and check_exist): 51 | print(row) 52 | if not check_var_match: 53 | print('wrong var match') 54 | elif not check_exist: 55 | print('body not exist originally') 56 | else: 57 | print('wrong relation') 58 | 59 | print(count) 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /pipeline.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from aser_table2tsv import aser2tsv 3 | from mine_and_sort import m_n_s 4 | from predict import derive_new_triples 5 | 6 | if __name__ == "__main__": 7 | # arguments setting 8 | parser = argparse.ArgumentParser(description='API to handle the logic rule mining on ASER KB via AMIE+') 9 | group = parser.add_mutually_exclusive_group() 10 | group.add_argument("-wp", "--whole_pipeline", action="store_true", help="Run the whole pipeline.\n" 11 | + "Take the KB as input and output the sorted rule together with the found facts") 12 | group.add_argument("-a2t", "--aser2tsv", action="store_true", help="Extract triples (event1, relation, event2) from KB, store as tsv") 13 | group.add_argument("-m", "--mine_rule", action="store_true", help="Mine logic rule with AMIE+, tsv triples as input, output are sorted") 14 | group.add_argument("-p", "--predict", action="store_true", help="Predict new facts with mined rule") 15 | 16 | parser.add_argument("--db_path", type=str, help="/path/to/KG.db") 17 | parser.add_argument("--row_triples", type=str, help="/path/to/row_triples.tsv") 18 | parser.add_argument("--rule_path", type=str, help="/path/to/rule_you_provide.tsv") 19 | parser.add_argument("--new_prediction_path", type=str, help="/path/to/new_prediction.tsv") 20 | parser.add_argument("--amie_plus_path", type=str, help="/path/to/AMIE+/") 21 | parser.add_argument("--minhc", type=float, help="min head coverage threshold, default=0.01") 22 | parser.add_argument("--minc", type=float, help="min std confidence threshold, default=0.01") 23 | parser.add_argument("--minpca", type=float, help="min pca confidence threshold, default=0.01") 24 | 25 | args = parser.parse_args() 26 | 27 | if args.whole_pipeline: 28 | extracted_triples_path = aser2tsv(tsv_path=args.row_triples, db_path=args.db_path) 29 | sorted_rule_paths = m_n_s(tsv_source_path=extracted_triples_path, amie_path=args.amie_plus_path, minhc=args.minhc, minc=args.minc, minpca=args.minpca) 30 | derive_new_triples(mined_rules_path=sorted_rule_paths[0], original_triples_path=extracted_triples_path, new_triples_path=args.new_prediction_path) 31 | 32 | elif args.aser2tsv: 33 | aser2tsv(args.row_triples, args.db_path) 34 | 35 | elif args.mine_rule: 36 | m_n_s(tsv_source_path=args.row_triples, amie_path=args.amie_plus_path, minhc=args.minhc, minc=args.minc, minpca=args.minpca) 37 | 38 | elif args.predict: 39 | derive_new_triples(mined_rules_path=args.rule_path, original_triples_path=args.row_triples, new_triples_path=args.new_prediction_path) 40 | -------------------------------------------------------------------------------- /mine_and_sort.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import subprocess 3 | import os 4 | 5 | 6 | def m_n_s(tsv_source_path, amie_path, minhc=0.01, minc=None, minpca=None): 7 | amie_arg = "java -XX:-UseGCOverheadLimit -Xmx4G -jar " + amie_path + " " + tsv_source_path 8 | 9 | if minhc != 0.01 and minhc: 10 | amie_arg += " -minhc " 11 | amie_arg += str(minhc) 12 | if minc: 13 | amie_arg += " -minc " 14 | amie_arg += str(minc) 15 | if minpca: 16 | amie_arg += " -minpca " 17 | amie_arg += str(minpca) 18 | 19 | # tsv_target_path = "result_" + tsv_source_path 20 | open('result.txt', 'w').close() # clean up the old content in result.txt 21 | amie_arg += " >>result.txt" 22 | print(amie_arg) 23 | os.system(amie_arg) 24 | 25 | cols = [[] for x in range(11)] 26 | with open("result.txt", 'r') as source: 27 | for line in source: 28 | if line[0] == '?': 29 | current_line = line.split() 30 | # print(current_line) 31 | if len(current_line) != 0: 32 | current_rule = '' 33 | if len(current_line) == 20: 34 | for i in range(0, 10): 35 | current_rule = current_rule + current_line[i] + ' ' 36 | cols[0].append(current_rule) 37 | 38 | for j in range(10, 20): 39 | if current_line[j][0] != '?': 40 | cols[j - 9].append(float(current_line[j])) 41 | else: 42 | cols[j - 9].append(current_line[j]) 43 | else: 44 | for i in range(0, 7): 45 | current_rule = current_rule + current_line[i] + ' ' 46 | cols[0].append(current_rule) 47 | 48 | for j in range(7, 17): 49 | if current_line[j][0] != '?': 50 | cols[j - 6].append(float(current_line[j])) 51 | else: 52 | cols[j - 6].append(current_line[j]) 53 | 54 | df = pandas.DataFrame({ 55 | 'rule': cols[0], 56 | 'v1': cols[1], 57 | 'v2': cols[2], 58 | 'v3': cols[3], 59 | 'v4': cols[4], 60 | 'v5': cols[5], 61 | 'v6': cols[6], 62 | 'v7': cols[7], 63 | 'v8': cols[8], 64 | 'v9': cols[9], 65 | 'v10': cols[10], 66 | }) 67 | 68 | current_dir = os.path.abspath(os.path.dirname("__file__")) 69 | 70 | pca_sorted_path = os.path.join(current_dir, "pca_sorted_rule.tsv") 71 | std_sorted_path = os.path.join(current_dir, "std_sorted_rule.tsv") 72 | sorted_by_pca = df.sort_values(by='v3', ascending=False) 73 | sorted_by_std = df.sort_values(by='v2', ascending=False) 74 | sorted_by_pca.to_csv(pca_sorted_path, sep='\t') 75 | sorted_by_std.to_csv(std_sorted_path, sep='\t') 76 | 77 | return pca_sorted_path, std_sorted_path 78 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import pandas 3 | 4 | 5 | def derive_new_triples(mined_rules_path, original_triples_path, new_triples_path): 6 | with open(new_triples_path, 'w', newline='') as target: 7 | new_writer = csv.writer(target, delimiter='\t') 8 | with open(mined_rules_path, 'r') as rules: 9 | rule_reader = csv.reader(rules, delimiter='\t') 10 | # skip the header 11 | next(rule_reader, None) 12 | for row_rule in rule_reader: 13 | if row_rule[0] != '13': 14 | continue 15 | current_rule = row_rule[1].split() 16 | 17 | # extract current rule's relation and variable pattern 18 | # if body size == 1: e.g. ?a Conjunction ?b => ?a Concession ?b 19 | # len == 7, body relation1 index == 1, head relation index = 5 20 | # if body size == 2: e.g. ?e Exception ?b ?e Result ?a => ?a Exception ?b 21 | # len ==10, body relation1 index == 1, body relation2 index == 4, head relation index == 8 22 | 23 | rule_var = [] # variables in current rule 24 | match_var = [2] * 6 # if match_var[i] == match_var[j] then rule_var[i] == rule_var[j] 25 | 26 | # initialize var0 and var1 27 | match_var[0] = 0 28 | match_var[1] = 1 29 | 30 | # extract variables and relations from rule 31 | 32 | if len(current_rule) == 7: # body size == 1 33 | for i in (0, 2, 4, 6): 34 | rule_var.append(current_rule[i]) 35 | 36 | body_relation1 = current_rule[1] 37 | body_relation2 = None 38 | head_relation = current_rule[5] 39 | 40 | # variables matching 41 | for i in range(2, 4): 42 | if rule_var[i] == rule_var[0]: 43 | match_var[i] = 0 44 | for i in range(2, 4): 45 | if rule_var[i] == rule_var[1]: 46 | match_var[i] = 1 47 | 48 | else: # body size == 2 49 | for i in (0, 2, 3, 5, 7, 9): 50 | rule_var.append(current_rule[i]) 51 | 52 | body_relation1 = current_rule[1] 53 | body_relation2 = current_rule[4] 54 | head_relation = current_rule[8] 55 | 56 | # variables matching 57 | for i in range(2, 6): 58 | if rule_var[i] == rule_var[0]: 59 | match_var[i] = 0 60 | if rule_var[i] == rule_var[1]: 61 | match_var[i] = 1 62 | 63 | # search for facts with body relations in original facts 64 | # also search for facts with head relations in original facts, to check duplicate 65 | original_facts = open(original_triples_path, 'r') 66 | facts_reader = csv.reader(original_facts, delimiter='\t') 67 | br1 = [] 68 | br2 = [] 69 | hr = [] 70 | 71 | for row_fact in facts_reader: 72 | if row_fact[1] == body_relation1: 73 | br1.append(row_fact) 74 | if row_fact[1] == head_relation: 75 | hr.append(row_fact) 76 | if row_fact[1] == body_relation2 and body_relation2: # body size == 2 77 | br2.append(row_fact) 78 | 79 | # check and store potential new facts 80 | candidate_new_facts = [] 81 | candidate_new_body1 = [] 82 | candidate_new_body2 = [] 83 | if not body_relation2: # body size == 1 84 | for facts in br1: 85 | if match_var[0] == match_var[2]: 86 | current_new_fact = [facts[0], head_relation, facts[2]] 87 | else: 88 | current_new_fact = [facts[2], head_relation, facts[0]] 89 | 90 | candidate_new_body1.append(facts) 91 | candidate_new_facts.append(current_new_fact) 92 | 93 | new_writer.writerow([facts, None, current_new_fact]) 94 | 95 | else: # body size == 2 96 | ''' 97 | for facts1 in br1: 98 | match_with_var0 = [i for i, x in enumerate(match_var) if x == 0] 99 | match_with_var1 = [i for i, x in enumerate(match_var) if x == 1] 100 | 101 | if len(match_with_var0) == 2: # rule has only 2 var 102 | if match_with_var0[0] == 2: 103 | tentative_body2 = [facts1[0], body_relation2, facts1[2]] 104 | else: 105 | tentative_body2 = [facts1[2], body_relation2, facts1[0]] 106 | 107 | for facts2 in br2: 108 | if facts2 == tentative_body2: 109 | if match_with_var0[1] == 4: 110 | current_new_fact = [facts1[0], head_relation, facts1[2]] 111 | else 112 | current_new_fact = [facts1[2], head_relation, facts1[0]] 113 | 114 | else: #rule has 3 var 115 | if match_with_var0[0] == 2: 116 | if match_with_var1[0] == 117 | ''' 118 | for facts1 in br1: 119 | for facts2 in br2: 120 | current_match = [0, 1, 2, 2] 121 | # extract the current pattern 122 | current_var = [facts1[0], facts1[2], facts2[0], facts2[2]] 123 | for i in range(2, 4): 124 | if current_var[i] == current_var[0]: 125 | current_match[i] = 0 126 | if current_var[i] == current_var[1]: 127 | current_match[i] = 1 128 | 129 | # if the pattern matched 130 | if current_match == match_var[0:4]: 131 | head_var1_index = match_var.index(match_var[4]) 132 | head_var2_index = match_var.index(match_var[5]) 133 | 134 | current_new_fact = [current_var[head_var1_index], head_relation, current_var[head_var2_index]] 135 | 136 | current_rule_instance = [] 137 | current_rule_instance.extend(facts1) 138 | current_rule_instance.extend(facts2) 139 | current_rule_instance.extend(current_new_fact) 140 | new_writer.writerow(current_rule_instance) 141 | 142 | # check duplication 143 | ''' 144 | for old_facts in hr: 145 | for candidate_facts, bd1, bd2 in candidate_new_facts, candidate_new_body1, candidate_new_body2: 146 | if old_facts == candidate_facts: 147 | candidate_new_facts.remove(candidate_facts) 148 | original_facts.close() 149 | # write candidate facts in to new file 150 | # for predictions in candidate_new_facts: 151 | # new_writer.writerow(predictions) 152 | ''' 153 | 154 | 155 | # derive_new_triples(mined_rules_path="pca_sorted_rule.tsv", original_triples_path="triples_with_bootstrapping.tsv", new_triples_path="predictions_test.tsv") 156 | --------------------------------------------------------------------------------