├── neo4j ├── is_a.tsv ├── has_symptom.tsv ├── risk.tsv ├── located_in.tsv ├── increases_risk.tsv ├── anatomy.tsv ├── symptom.tsv └── disease.tsv ├── config.yaml ├── pgmx_output ├── pgmx_output_links.tsv ├── pgmx_output_nodes.tsv └── pgmx_output_potentials.tsv ├── source ├── links.tsv ├── potentials.tsv └── nodes.tsv ├── google_to_neo4j.py ├── tsv_to_neo4j.py ├── LICENSE ├── upsert_tsv.py ├── google_to_pgmx.py ├── README.md ├── templates └── unicriterion_pgmx.txt ├── neo4j_commands.txt ├── pgmx_to_tsv.py ├── tsv_to_pgmx.py ├── .gitignore ├── manmade_backup.pgmx ├── manmade.pgmx └── manmade_google.pgmx /neo4j/is_a.tsv: -------------------------------------------------------------------------------- 1 | source target label model 2 | Lung cancer Lung disease IS_A Asia 3 | Tuberculosis Lung disease IS_A Asia 4 | COVID-19 Lung disease IS_A Asia 5 | -------------------------------------------------------------------------------- /neo4j/has_symptom.tsv: -------------------------------------------------------------------------------- 1 | source target label model 2 | Bronchitis Dyspnea HAS_SYMPTOM Asia 3 | Lung disease Dyspnea HAS_SYMPTOM Asia 4 | Lung disease X-ray HAS_SYMPTOM Asia 5 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | google_sheet_id: "1xUSIGgCz9Mf08pwT-tMsEAsa6icGbEdKAtNFLLT4zmk" 2 | google_sheet_node: "nodes" 3 | google_sheet_link: "links" 4 | google_sheet_potentials: "potentials" -------------------------------------------------------------------------------- /neo4j/risk.tsv: -------------------------------------------------------------------------------- 1 | name type role states x y label model description 2 | Visit To Asia finiteStates chance no; yes 204.0 55.0 Risk Asia NULL 3 | Smoking finiteStates chance no; yes 625.0 61.0 Risk Asia NULL 4 | -------------------------------------------------------------------------------- /neo4j/located_in.tsv: -------------------------------------------------------------------------------- 1 | source target label model 2 | Lung cancer Lung LOCATED_IN NULL 3 | Bronchitis Bronchi LOCATED_IN NULL 4 | Tuberculosis Lung LOCATED_IN NULL 5 | COVID-19 Lung LOCATED_IN NULL 6 | Lung disease Lung LOCATED_IN NULL 7 | -------------------------------------------------------------------------------- /neo4j/increases_risk.tsv: -------------------------------------------------------------------------------- 1 | source target label model 2 | Visit To Asia Tuberculosis INCREASES_RISK Asia 3 | Visit To Asia COVID-19 INCREASES_RISK Asia 4 | Smoking Bronchitis INCREASES_RISK Asia 5 | Smoking Lung cancer INCREASES_RISK Asia 6 | Smoking COVID-19 INCREASES_RISK Asia 7 | -------------------------------------------------------------------------------- /pgmx_output/pgmx_output_links.tsv: -------------------------------------------------------------------------------- 1 | source target 2 | Bronchitis Dyspnea 3 | Visit To Asia Tuberculosis 4 | Visit To Asia COVID-19 5 | Smoking Bronchitis 6 | Smoking Lung cancer 7 | Smoking COVID-19 8 | Lung cancer Lung disease 9 | Tuberculosis Lung disease 10 | COVID-19 Lung disease 11 | Lung disease Dyspnea 12 | Lung disease X-ray 13 | -------------------------------------------------------------------------------- /neo4j/anatomy.tsv: -------------------------------------------------------------------------------- 1 | name type role states x y label model description 2 | Lung NULL NULL NULL NULL NULL Anatomy NULL One of a pair of organs in the chest that supplies the body with oxygen, and removes carbon dioxide from the body. 3 | Bronchi NULL NULL NULL NULL NULL Anatomy NULL A bronchus is a passage or airway in the lower respiratory tract that conducts air into the lungs. 4 | -------------------------------------------------------------------------------- /neo4j/symptom.tsv: -------------------------------------------------------------------------------- 1 | name type role states x y label model description 2 | X-ray finiteStates chance no; yes 214.0 397.0 Symptom Asia NULL 3 | Dyspnea finiteStates chance no; yes 509.0 400.0 Symptom Asia Shortness of breath — known medically as dyspnea — is often described as an intense tightening in the chest, air hunger, difficulty breathing, breathlessness or a feeling of suffocation. Very strenuous exercise, extreme temperatures, obesity and higher altitude all can cause shortness of breath in a healthy person. 4 | -------------------------------------------------------------------------------- /pgmx_output/pgmx_output_nodes.tsv: -------------------------------------------------------------------------------- 1 | name type role states x y 2 | X-ray finiteStates chance no; yes 214 397 3 | Bronchitis finiteStates chance no; yes 701 216 4 | Dyspnea finiteStates chance no; yes 509 400 5 | Visit To Asia finiteStates chance no; yes 204 55 6 | Smoking finiteStates chance no; yes 625 61 7 | Lung cancer finiteStates chance no; yes 444 163 8 | Tuberculosis finiteStates chance no; yes 202 163 9 | Lung disease finiteStates chance no; yes 325 284 10 | COVID-19 finiteStates chance no; yes 377 171 11 | -------------------------------------------------------------------------------- /source/links.tsv: -------------------------------------------------------------------------------- 1 | source target label model 2 | Bronchitis Dyspnea HAS_SYMPTOM Asia 3 | Visit To Asia Tuberculosis INCREASES_RISK Asia 4 | Visit To Asia COVID-19 INCREASES_RISK Asia 5 | Smoking Bronchitis INCREASES_RISK Asia 6 | Smoking Lung cancer INCREASES_RISK Asia 7 | Smoking COVID-19 INCREASES_RISK Asia 8 | Lung cancer Lung disease IS_A Asia 9 | Tuberculosis Lung disease IS_A Asia 10 | COVID-19 Lung disease IS_A Asia 11 | Lung disease Dyspnea HAS_SYMPTOM Asia 12 | Lung disease X-ray HAS_SYMPTOM Asia 13 | Lung cancer Lung LOCATED_IN 14 | Bronchitis Bronchi LOCATED_IN 15 | Tuberculosis Lung LOCATED_IN 16 | COVID-19 Lung LOCATED_IN 17 | Lung disease Lung LOCATED_IN -------------------------------------------------------------------------------- /pgmx_output/pgmx_output_potentials.tsv: -------------------------------------------------------------------------------- 1 | type role variables values 2 | Table conditionalProbability X-ray; Lung disease 0.95 0.05 0.02 0.98 3 | Table conditionalProbability Bronchitis; Smoking 0.7 0.3 0.4 0.6 4 | Table conditionalProbability Dyspnea; Lung disease; Bronchitis 0.9 0.1 0.3 0.7 0.2 0.8 0.1 0.9 5 | Table conditionalProbability Visit To Asia 0.99 0.01 6 | Table conditionalProbability Smoking 0.5 0.5 7 | Table conditionalProbability Lung cancer; Smoking 0.99 0.01 0.9 0.1 8 | Table conditionalProbability Tuberculosis; Visit To Asia 0.99 0.01 0.95 0.05 9 | Table conditionalProbability Lung disease; Lung cancer; Tuberculosis; COVID-19 1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 10 | Table conditionalProbability COVID-19; Smoking; Visit To Asia 0.7 0.3 0.3 0.7 0.5 0.5 0.05 0.95 11 | -------------------------------------------------------------------------------- /source/potentials.tsv: -------------------------------------------------------------------------------- 1 | type role variables values model 2 | Table conditionalProbability X-ray; Lung disease 0.95 0.05 0.02 0.98 Asia 3 | Table conditionalProbability Bronchitis; Smoking 0.7 0.3 0.4 0.6 Asia 4 | Table conditionalProbability Dyspnea; Lung disease; Bronchitis 0.9 0.1 0.3 0.7 0.2 0.8 0.1 0.9 Asia 5 | Table conditionalProbability Visit To Asia 0.99 0.01 Asia 6 | Table conditionalProbability Smoking 0.5 0.5 Asia 7 | Table conditionalProbability Lung cancer; Smoking 0.99 0.01 0.9 0.1 Asia 8 | Table conditionalProbability Tuberculosis; Visit To Asia 0.99 0.01 0.95 0.05 Asia 9 | Table conditionalProbability Lung disease; Lung cancer; Tuberculosis; COVID-19 1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 Asia 10 | Table conditionalProbability COVID-19; Smoking; Visit To Asia 0.7 0.3 0.3 0.7 0.5 0.5 0.05 0.95 Asia -------------------------------------------------------------------------------- /google_to_neo4j.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import tsv_to_neo4j 3 | import yaml 4 | 5 | output_folder = "./neo4j" 6 | 7 | 8 | with open("config.yaml", "r") as stream: 9 | try: 10 | PARAM = yaml.safe_load(stream) 11 | except yaml.YAMLError as exc: 12 | print(exc) 13 | 14 | if __name__ == "__main__": 15 | sheet_id = PARAM["google_sheet_id"] 16 | nodes_sheet = PARAM["google_sheet_node"] 17 | links_sheet = PARAM["google_sheet_link"] 18 | potentials_name = PARAM["google_sheet_potentials"] 19 | 20 | nodes_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={nodes_sheet}" 21 | nodes = pd.read_csv(nodes_url) 22 | 23 | links_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={links_sheet}" 24 | links = pd.read_csv(links_url) 25 | 26 | tsv_to_neo4j.to_neo4j(nodes, links) -------------------------------------------------------------------------------- /tsv_to_neo4j.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | 4 | 5 | output_folder = "./neo4j" 6 | 7 | def to_neo4j(nodes_df: pd.DataFrame, links_df: pd.DataFrame): 8 | """ 9 | Args: 10 | nodes_df (pd.DataFrame): pandas dataframe with columns: name, type, role, states 11 | links_df (pd.DataFrame): pandas dataframe with columns: source, target 12 | 13 | Returns: 14 | """ 15 | 16 | node_labels = pd.unique(nodes_df['label']) 17 | 18 | for l in node_labels: 19 | temp_df = nodes_df[nodes_df['label'] == l] 20 | temp_df.to_csv(os.path.join(output_folder, l.lower() + ".tsv"), sep="\t", index=False, na_rep='NULL') 21 | 22 | link_labels = pd.unique(links_df['label']) 23 | 24 | for l in link_labels: 25 | temp_df = links_df[links_df['label'] == l] 26 | temp_df.to_csv(os.path.join(output_folder, l.lower() + ".tsv"), sep="\t", index=False, na_rep='NULL') 27 | 28 | 29 | if __name__ == "__main__": 30 | 31 | nodes = pd.read_csv("./source/nodes.tsv", sep="\t") 32 | links = pd.read_csv("./source/links.tsv", sep="\t") 33 | 34 | to_neo4j(nodes, links) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Sixing Huang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /upsert_tsv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import sys 3 | import os 4 | 5 | def upsert_df (df: pd.DataFrame, old_df: pd.DataFrame, key: str): 6 | """ 7 | Args: 8 | df (pd.DataFrame): pandas dataframe with columns: name, type, role, states 9 | old_df (pd.DataFrame): pandas dataframe with columns: source, target 10 | key (str): key to use for upsert 11 | 12 | Returns: 13 | pd.DataFrame: combined dataframe 14 | """ 15 | df = df.set_index(key) 16 | old_df = old_df.set_index(key) 17 | combined = df.combine_first(old_df) 18 | return combined 19 | 20 | if __name__ == "__main__": 21 | # list of strings 22 | files = {"potentials": "variables", "nodes": "name", "links": ["source", "target"]} 23 | for f in files: 24 | old_df_file = os.path.join("./source", f"{f}.tsv") 25 | new_df_file = os.path.join("./pgmx_output", f"pgmx_output_{f}.tsv") 26 | 27 | old_df = pd.read_csv(old_df_file, sep="\t") 28 | new_df = pd.read_csv(new_df_file, sep="\t") 29 | 30 | combined = upsert_df(new_df, old_df, files[f]) 31 | combined.to_csv(old_df_file, sep="\t", na_rep='NULL') 32 | 33 | -------------------------------------------------------------------------------- /neo4j/disease.tsv: -------------------------------------------------------------------------------- 1 | name type role states x y label model description 2 | Bronchitis finiteStates chance no; yes 701.0 216.0 Disease Asia Bronchitis is inflammation of the airways in the lungs that is usually caused by an infection. It often gets better without treatment in around 3 weeks. Some people have long-term inflammation of the airways in the lungs called chronic bronchitis. This is known as chronic obstructive pulmonary disease (COPD). 3 | Lung cancer finiteStates chance no; yes 444.0 163.0 Disease Asia Lung cancers usually are grouped into two main types called small cell and non-small cell (including adenocarcinoma and squamous cell carcinoma). These types of lung cancer grow differently and are treated differently. Non-small cell lung cancer is more common than small cell lung cancer. 4 | Tuberculosis finiteStates chance no; yes 202.0 163.0 Disease Asia Tuberculosis (TB) is a disease caused by germs that are spread from person to person through the air. TB usually affects the lungs, but it can also affect other parts of the body, such as the brain, the kidneys, or the spine. A person with TB can die if they do not get treatment. 5 | Lung disease finiteStates chance no; yes 325.0 284.0 Disease Asia NULL 6 | COVID-19 finiteStates chance no; yes 377.0 171.0 Disease Asia Coronavirus disease 2019 (COVID-19) is a contagious disease caused by a virus, the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). The first known case was identified in Wuhan, China, in December 2019. The disease quickly spread worldwide, resulting in the COVID-19 pandemic. 7 | -------------------------------------------------------------------------------- /google_to_pgmx.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import yaml 3 | import sys 4 | import tsv_to_pgmx 5 | 6 | model = sys.argv[1] 7 | 8 | 9 | with open("config.yaml", "r") as stream: 10 | try: 11 | PARAM = yaml.safe_load(stream) 12 | except yaml.YAMLError as exc: 13 | print(exc) 14 | 15 | sheet_id = PARAM["google_sheet_id"] 16 | nodes_sheet = PARAM["google_sheet_node"] 17 | links_sheet = PARAM["google_sheet_link"] 18 | potentials_name = PARAM["google_sheet_potentials"] 19 | 20 | nodes_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={nodes_sheet}" 21 | nodes = pd.read_csv(nodes_url) 22 | 23 | links_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={links_sheet}" 24 | links = pd.read_csv(links_url) 25 | 26 | potentials_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={potentials_name}" 27 | potentials = pd.read_csv(potentials_url) 28 | 29 | 30 | model_nodes = nodes[nodes['model'].notna()] 31 | mask = model_nodes['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x]) 32 | model_nodes = model_nodes[mask] 33 | 34 | model_links = links[links['model'].notna()] 35 | mask = model_links['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x]) 36 | model_links = model_links[mask] 37 | 38 | model_potentials = potentials[potentials['model'].notna()] 39 | mask = model_potentials['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x]) 40 | model_potentials = model_potentials[mask] 41 | 42 | pgmx = tsv_to_pgmx.get_pgmx(model_nodes, model_links, model_potentials) 43 | print (pgmx) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Introduction 4 | 5 | 6 | 7 | 8 | 9 | This repository contains code and data for my article "[How to build a Bayesian Knowledge Graph](https://dgg32.medium.com/how-to-build-a-bayesian-knowledge-graph-dee1cc821d35)". 10 | 11 | 1. The scripts are for data flow between Google Sheets, Neo4j and OpenMarkov. 12 | 13 | 14 | 15 | 2. The source data folder contains the TSV downloaded from the Google Sheets for debugging purpose. The pgmx_output folder contains TSV files that are extracted from a pgmx file. 16 | 17 | 18 | 19 | 20 | 21 | # Prerequisite 22 | 23 | Neo4j Desktop 24 | 25 | OpenMarkov 26 | 27 | 28 | # Run 29 | First, config the config.yaml to match your Google Sheets setup. 30 | 31 | 1. Convert data from Google Sheets to a pgmx file 32 | ```console 33 | python google_to_pgmx.py [model_name] > [model].pgmx 34 | ``` 35 | 36 | For example: 37 | ```console 38 | python google_to_pgmx.py Asia > manmade_google.pgmx 39 | ``` 40 | 41 | 42 | 2. Convert data from Google Sheets to Neo4j 43 | 44 | ```console 45 | python tsv_to_neo4j.py 46 | ``` 47 | It generates a series of files in the ./neo4j folder. 48 | 49 | 3. Other utility files. 50 | 51 | pgmx_to_tsv.py is to parse a PGMX file and generate a node, a link and a potential TSV file. You use this script when you have modified data in OpenMarkov and want to overwrite the changes back into your TSV files. 52 | 53 | And you can use upsert_tsv.py to upsert the new data into an old TSV file. 54 | 55 | ## Authors 56 | 57 | 58 | 59 | * **Sixing Huang** - *Concept and Coding* 60 | 61 | 62 | 63 | ## License 64 | 65 | 66 | 67 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details 68 | -------------------------------------------------------------------------------- /templates/unicriterion_pgmx.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | {% for node in nodes -%} 13 | 14 | 15 | 16 | {% for s in node.states -%} 17 | 18 | {% endfor %} 19 | 20 | 21 | {% endfor %} 22 | 23 | 24 | {% for link in links -%} 25 | 26 | 27 | 28 | 29 | {% endfor %} 30 | 31 | 32 | {% for potential in potentials -%} 33 | 34 | 35 | {% for v in potential.variables -%} 36 | 37 | {% endfor %} 38 | 39 | {{ potential.value }} 40 | 41 | {% endfor %} 42 | 43 | 44 | 45 | 46 | UNICRITERION 47 | 48 | 49 | -------------------------------------------------------------------------------- /neo4j_commands.txt: -------------------------------------------------------------------------------- 1 | CREATE CONSTRAINT FOR (s:Symptom) REQUIRE s.name IS UNIQUE; 2 | 3 | CREATE CONSTRAINT FOR (d:Disease) REQUIRE d.name IS UNIQUE; 4 | 5 | CREATE CONSTRAINT FOR (r:Risk) REQUIRE r.name IS UNIQUE; 6 | 7 | CREATE CONSTRAINT FOR (a:Anatomy) REQUIRE a.name IS UNIQUE; 8 | 9 | LOAD CSV WITH HEADERS FROM 'file:///symptom.tsv' AS row FIELDTERMINATOR '\t' MERGE (s:Symptom {name: row.name, model: row.model, description: row.description, role: row.role, type: row.type, states: row.states}); 10 | 11 | LOAD CSV WITH HEADERS FROM 'file:///disease.tsv' AS row FIELDTERMINATOR '\t' MERGE (d:Disease {name: row.name, model: row.model, description: row.description, role: row.role, type: row.type, states: row.states}); 12 | 13 | LOAD CSV WITH HEADERS FROM 'file:///risk.tsv' AS row FIELDTERMINATOR '\t' MERGE (r:Risk {name: row.name, model: row.model, description: row.description, role: row.role, type: row.type, states: row.states}); 14 | 15 | LOAD CSV WITH HEADERS FROM 'file:///anatomy.tsv' AS row FIELDTERMINATOR '\t' MERGE (a:Anatomy {name: row.name, model: row.model, description: row.description, role: row.role, type: row.type, states: row.states}); 16 | 17 | 18 | 19 | LOAD CSV WITH HEADERS FROM 'file:///has_symptom.tsv' AS row FIELDTERMINATOR '\t' MERGE (d:Disease {name: row.source}) MERGE (s:Symptom {name: row.target}) MERGE (d)-[r:HAS_SYMPTOM]->(s); 20 | 21 | LOAD CSV WITH HEADERS FROM 'file:///increases_risk.tsv' AS row FIELDTERMINATOR '\t' MERGE (ri:Risk {name: row.source}) MERGE (d:Disease {name: row.target}) MERGE (ri)-[r:INCREASES_RISK]->(d); 22 | 23 | LOAD CSV WITH HEADERS FROM 'file:///is_a.tsv' AS row FIELDTERMINATOR '\t' MERGE (d1:Disease {name: row.source}) MERGE (d2:Disease {name: row.target}) MERGE (d1)-[r:IS_A]->(d2); 24 | 25 | LOAD CSV WITH HEADERS FROM 'file:///located_in.tsv' AS row FIELDTERMINATOR '\t' MERGE (d:Disease {name: row.source}) MERGE (a:Anatomy {name: row.target}) MERGE (d)-[r:LOCATED_IN]->(a); -------------------------------------------------------------------------------- /pgmx_to_tsv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | # Import BeautifulSoup 3 | from bs4 import BeautifulSoup as bs 4 | import os 5 | 6 | content = [] 7 | 8 | # Read the XML file 9 | with open("manmade.pgmx", "r") as file: 10 | # Read each line in the file, readlines() returns a list of lines 11 | content = file.readlines() 12 | # Combine the lines in the list into a string 13 | content = "".join(content) 14 | bs_content = bs(content, "xml") 15 | 16 | nodes = [] 17 | for node in bs_content.find("Variables").find_all("Variable"): 18 | states = "; ".join([n.get("name") for n in node.find("States").find_all("State")]) 19 | x = int(node.find("Coordinates").get("x")) 20 | y = int(node.find("Coordinates").get("y")) 21 | 22 | nodes.append({"name": node.get("name"), "type": node.get("type"), "role": node.get("role"), "states": states, "x": x, "y": y}) 23 | #print (node) 24 | df = pd.DataFrame.from_records(nodes) 25 | df.to_csv(os.path.join("pgmx_output", "pgmx_output_nodes.tsv"), sep="\t", index=False, na_rep='NULL') 26 | 27 | 28 | links = [] 29 | for l in bs_content.find("Links").find_all("Link"): 30 | 31 | source, target = [x.get("name") for x in l.find_all("Variable")] 32 | 33 | links.append({"source": source, "target": target}) 34 | 35 | df = pd.DataFrame.from_records(links) 36 | df.to_csv(os.path.join("pgmx_output", "pgmx_output_links.tsv"), sep="\t", index=False, na_rep='NULL') 37 | 38 | 39 | potentials = [] 40 | for potential in bs_content.find("Potentials").find_all("Potential"): 41 | variables = "; ".join([n.get("name") for n in potential.find("Variables").find_all("Variable")]) 42 | values = potential.find("Values").text 43 | potentials.append({"type": potential.get("type"), "role": potential.get("role"), "variables": variables, "values": values}) 44 | 45 | df = pd.DataFrame.from_records(potentials) 46 | df.to_csv(os.path.join("pgmx_output", "pgmx_output_potentials.tsv"), sep="\t", index=False, na_rep='NULL') 47 | -------------------------------------------------------------------------------- /source/nodes.tsv: -------------------------------------------------------------------------------- 1 | name type role states x y label model description 2 | X-ray finiteStates chance no; yes 214 397 Symptom Asia 3 | Bronchitis finiteStates chance no; yes 701 216 Disease Asia Bronchitis is inflammation of the airways in the lungs that is usually caused by an infection. It often gets better without treatment in around 3 weeks. Some people have long-term inflammation of the airways in the lungs called chronic bronchitis. This is known as chronic obstructive pulmonary disease (COPD). 4 | Dyspnea finiteStates chance no; yes 509 400 Symptom Asia Shortness of breath — known medically as dyspnea — is often described as an intense tightening in the chest, air hunger, difficulty breathing, breathlessness or a feeling of suffocation. Very strenuous exercise, extreme temperatures, obesity and higher altitude all can cause shortness of breath in a healthy person. 5 | Visit To Asia finiteStates chance no; yes 204 55 Risk Asia 6 | Smoking finiteStates chance no; yes 625 61 Risk Asia 7 | Lung cancer finiteStates chance no; yes 444 163 Disease Asia Lung cancers usually are grouped into two main types called small cell and non-small cell (including adenocarcinoma and squamous cell carcinoma). These types of lung cancer grow differently and are treated differently. Non-small cell lung cancer is more common than small cell lung cancer. 8 | Tuberculosis finiteStates chance no; yes 202 163 Disease Asia Tuberculosis (TB) is a disease caused by germs that are spread from person to person through the air. TB usually affects the lungs, but it can also affect other parts of the body, such as the brain, the kidneys, or the spine. A person with TB can die if they do not get treatment. 9 | Lung disease finiteStates chance no; yes 325 284 Disease Asia 10 | Lung Anatomy One of a pair of organs in the chest that supplies the body with oxygen, and removes carbon dioxide from the body. 11 | Bronchi Anatomy A bronchus is a passage or airway in the lower respiratory tract that conducts air into the lungs. 12 | COVID-19 finiteStates chance no; yes 377 171 Disease Asia Coronavirus disease 2019 (COVID-19) is a contagious disease caused by a virus, the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). The first known case was identified in Wuhan, China, in December 2019. The disease quickly spread worldwide, resulting in the COVID-19 pandemic. -------------------------------------------------------------------------------- /tsv_to_pgmx.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from jinja2 import Environment, FileSystemLoader 3 | import sys 4 | 5 | file_loader = FileSystemLoader('templates') 6 | env = Environment(loader=file_loader) 7 | template = env.get_template('unicriterion_pgmx.txt') 8 | 9 | def get_pgmx(nodes_df: pd.DataFrame, links_df: pd.DataFrame, potentials_df: pd.DataFrame) -> str: 10 | """ 11 | Args: 12 | nodes_df (pd.DataFrame): pandas dataframe with columns: name, type, role, states 13 | links_df (pd.DataFrame): pandas dataframe with columns: source, target 14 | potentials_df (pd.DataFrame): pandas dataframe with columns: type, role, variables, values 15 | 16 | Returns: 17 | str: pgmx file 18 | """ 19 | 20 | nodes_to_jinja = [] 21 | for i, row in nodes_df.iterrows(): 22 | 23 | states = [x.strip() for x in row.states.split(";")] 24 | x = 1+ i * 100 25 | y = 1+ i * 100 26 | 27 | if "x" in row and row["x"]: 28 | x = int(row["x"]) 29 | if "y" in row and row["y"]: 30 | y = int(row["y"]) 31 | 32 | 33 | nodes_to_jinja.append({"name": row["name"], "type": row.type, "role": row.role, "states": states, "x": x, "y": y}) 34 | 35 | 36 | links_to_jinja = [] 37 | for i, row in links_df.iterrows(): 38 | 39 | links_to_jinja.append({"source": row["source"], "target": row.target}) 40 | 41 | 42 | 43 | potentials_to_jinja = [] 44 | for i, row in potentials_df.iterrows(): 45 | variables = [x.strip() for x in row.variables.split(";")] 46 | potentials_to_jinja.append({"type": row["type"], "role": row.role, "variables": variables, "value": row["values"]}) 47 | 48 | #print (potentials_to_jinja) 49 | 50 | output = template.render(nodes=nodes_to_jinja, links = links_to_jinja, potentials = potentials_to_jinja) 51 | return (output) 52 | 53 | if __name__ == "__main__": 54 | model = sys.argv[1] 55 | 56 | nodes = pd.read_csv("./source/nodes.tsv", sep="\t") 57 | links = pd.read_csv("./source/links.tsv", sep="\t") 58 | potentials = pd.read_csv("./source/potentials.tsv", sep="\t") 59 | 60 | 61 | 62 | model_nodes = nodes[nodes['model'].notna()] 63 | mask = model_nodes['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x]) 64 | model_nodes = model_nodes[mask] 65 | 66 | 67 | 68 | model_links = links[links['model'].notna()] 69 | mask = model_links['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x]) 70 | model_links = model_links[mask] 71 | 72 | model_potentials = potentials[potentials['model'].notna()] 73 | mask = model_potentials['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x]) 74 | model_potentials = model_potentials[mask] 75 | 76 | #print (model_potentials.head()) 77 | 78 | pgmx = get_pgmx(model_nodes, model_links, model_potentials) 79 | print (pgmx) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /manmade_backup.pgmx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 0.95 0.05 0.02 0.98 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 0.7 0.3 0.4 0.6 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 0.9 0.1 0.3 0.7 0.2 0.8 0.1 0.9 158 | 159 | 160 | 161 | 162 | 163 | 164 | 0.99 0.01 165 | 166 | 167 | 168 | 169 | 170 | 171 | 0.5 0.5 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 0.99 0.01 0.9 0.1 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 0.99 0.01 0.95 0.05 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 0.7 0.3 0.3 0.7 0.5 0.5 0.05 0.95 206 | 207 | 208 | 209 | 210 | 211 | 212 | UNICRITERION 213 | 214 | 215 | 216 | -------------------------------------------------------------------------------- /manmade.pgmx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 0.95 0.05 0.02 0.98 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 0.7 0.3 0.4 0.6 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 0.9 0.1 0.3 0.7 0.2 0.8 0.1 0.9 158 | 159 | 160 | 161 | 162 | 163 | 164 | 0.99 0.01 165 | 166 | 167 | 168 | 169 | 170 | 171 | 0.5 0.5 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 0.99 0.01 0.9 0.1 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 0.99 0.01 0.95 0.05 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 0.7 0.3 0.3 0.7 0.5 0.5 0.05 0.95 207 | 208 | 209 | 210 | 211 | 212 | 213 | UNICRITERION 214 | 215 | 216 | 217 | -------------------------------------------------------------------------------- /manmade_google.pgmx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 0.95 0.05 0.02 0.98 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 0.7 0.3 0.4 0.6 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 0.9 0.1 0.3 0.7 0.2 0.8 0.1 0.9 158 | 159 | 160 | 161 | 162 | 163 | 164 | 0.99 0.01 165 | 166 | 167 | 168 | 169 | 170 | 171 | 0.5 0.5 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 0.99 0.01 0.9 0.1 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 0.99 0.01 0.95 0.05 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 0.7 0.3 0.3 0.7 0.5 0.5 0.05 0.95 207 | 208 | 209 | 210 | 211 | 212 | 213 | UNICRITERION 214 | 215 | 216 | 217 | --------------------------------------------------------------------------------