├── neo4j
├── is_a.tsv
├── has_symptom.tsv
├── risk.tsv
├── located_in.tsv
├── increases_risk.tsv
├── anatomy.tsv
├── symptom.tsv
└── disease.tsv
├── config.yaml
├── pgmx_output
├── pgmx_output_links.tsv
├── pgmx_output_nodes.tsv
└── pgmx_output_potentials.tsv
├── source
├── links.tsv
├── potentials.tsv
└── nodes.tsv
├── google_to_neo4j.py
├── tsv_to_neo4j.py
├── LICENSE
├── upsert_tsv.py
├── google_to_pgmx.py
├── README.md
├── templates
└── unicriterion_pgmx.txt
├── neo4j_commands.txt
├── pgmx_to_tsv.py
├── tsv_to_pgmx.py
├── .gitignore
├── manmade_backup.pgmx
├── manmade.pgmx
└── manmade_google.pgmx
/neo4j/is_a.tsv:
--------------------------------------------------------------------------------
1 | source target label model
2 | Lung cancer Lung disease IS_A Asia
3 | Tuberculosis Lung disease IS_A Asia
4 | COVID-19 Lung disease IS_A Asia
5 |
--------------------------------------------------------------------------------
/neo4j/has_symptom.tsv:
--------------------------------------------------------------------------------
1 | source target label model
2 | Bronchitis Dyspnea HAS_SYMPTOM Asia
3 | Lung disease Dyspnea HAS_SYMPTOM Asia
4 | Lung disease X-ray HAS_SYMPTOM Asia
5 |
--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
1 | google_sheet_id: "1xUSIGgCz9Mf08pwT-tMsEAsa6icGbEdKAtNFLLT4zmk"
2 | google_sheet_node: "nodes"
3 | google_sheet_link: "links"
4 | google_sheet_potentials: "potentials"
--------------------------------------------------------------------------------
/neo4j/risk.tsv:
--------------------------------------------------------------------------------
1 | name type role states x y label model description
2 | Visit To Asia finiteStates chance no; yes 204.0 55.0 Risk Asia NULL
3 | Smoking finiteStates chance no; yes 625.0 61.0 Risk Asia NULL
4 |
--------------------------------------------------------------------------------
/neo4j/located_in.tsv:
--------------------------------------------------------------------------------
1 | source target label model
2 | Lung cancer Lung LOCATED_IN NULL
3 | Bronchitis Bronchi LOCATED_IN NULL
4 | Tuberculosis Lung LOCATED_IN NULL
5 | COVID-19 Lung LOCATED_IN NULL
6 | Lung disease Lung LOCATED_IN NULL
7 |
--------------------------------------------------------------------------------
/neo4j/increases_risk.tsv:
--------------------------------------------------------------------------------
1 | source target label model
2 | Visit To Asia Tuberculosis INCREASES_RISK Asia
3 | Visit To Asia COVID-19 INCREASES_RISK Asia
4 | Smoking Bronchitis INCREASES_RISK Asia
5 | Smoking Lung cancer INCREASES_RISK Asia
6 | Smoking COVID-19 INCREASES_RISK Asia
7 |
--------------------------------------------------------------------------------
/pgmx_output/pgmx_output_links.tsv:
--------------------------------------------------------------------------------
1 | source target
2 | Bronchitis Dyspnea
3 | Visit To Asia Tuberculosis
4 | Visit To Asia COVID-19
5 | Smoking Bronchitis
6 | Smoking Lung cancer
7 | Smoking COVID-19
8 | Lung cancer Lung disease
9 | Tuberculosis Lung disease
10 | COVID-19 Lung disease
11 | Lung disease Dyspnea
12 | Lung disease X-ray
13 |
--------------------------------------------------------------------------------
/neo4j/anatomy.tsv:
--------------------------------------------------------------------------------
1 | name type role states x y label model description
2 | Lung NULL NULL NULL NULL NULL Anatomy NULL One of a pair of organs in the chest that supplies the body with oxygen, and removes carbon dioxide from the body.
3 | Bronchi NULL NULL NULL NULL NULL Anatomy NULL A bronchus is a passage or airway in the lower respiratory tract that conducts air into the lungs.
4 |
--------------------------------------------------------------------------------
/neo4j/symptom.tsv:
--------------------------------------------------------------------------------
1 | name type role states x y label model description
2 | X-ray finiteStates chance no; yes 214.0 397.0 Symptom Asia NULL
3 | Dyspnea finiteStates chance no; yes 509.0 400.0 Symptom Asia Shortness of breath — known medically as dyspnea — is often described as an intense tightening in the chest, air hunger, difficulty breathing, breathlessness or a feeling of suffocation. Very strenuous exercise, extreme temperatures, obesity and higher altitude all can cause shortness of breath in a healthy person.
4 |
--------------------------------------------------------------------------------
/pgmx_output/pgmx_output_nodes.tsv:
--------------------------------------------------------------------------------
1 | name type role states x y
2 | X-ray finiteStates chance no; yes 214 397
3 | Bronchitis finiteStates chance no; yes 701 216
4 | Dyspnea finiteStates chance no; yes 509 400
5 | Visit To Asia finiteStates chance no; yes 204 55
6 | Smoking finiteStates chance no; yes 625 61
7 | Lung cancer finiteStates chance no; yes 444 163
8 | Tuberculosis finiteStates chance no; yes 202 163
9 | Lung disease finiteStates chance no; yes 325 284
10 | COVID-19 finiteStates chance no; yes 377 171
11 |
--------------------------------------------------------------------------------
/source/links.tsv:
--------------------------------------------------------------------------------
1 | source target label model
2 | Bronchitis Dyspnea HAS_SYMPTOM Asia
3 | Visit To Asia Tuberculosis INCREASES_RISK Asia
4 | Visit To Asia COVID-19 INCREASES_RISK Asia
5 | Smoking Bronchitis INCREASES_RISK Asia
6 | Smoking Lung cancer INCREASES_RISK Asia
7 | Smoking COVID-19 INCREASES_RISK Asia
8 | Lung cancer Lung disease IS_A Asia
9 | Tuberculosis Lung disease IS_A Asia
10 | COVID-19 Lung disease IS_A Asia
11 | Lung disease Dyspnea HAS_SYMPTOM Asia
12 | Lung disease X-ray HAS_SYMPTOM Asia
13 | Lung cancer Lung LOCATED_IN
14 | Bronchitis Bronchi LOCATED_IN
15 | Tuberculosis Lung LOCATED_IN
16 | COVID-19 Lung LOCATED_IN
17 | Lung disease Lung LOCATED_IN
--------------------------------------------------------------------------------
/pgmx_output/pgmx_output_potentials.tsv:
--------------------------------------------------------------------------------
1 | type role variables values
2 | Table conditionalProbability X-ray; Lung disease 0.95 0.05 0.02 0.98
3 | Table conditionalProbability Bronchitis; Smoking 0.7 0.3 0.4 0.6
4 | Table conditionalProbability Dyspnea; Lung disease; Bronchitis 0.9 0.1 0.3 0.7 0.2 0.8 0.1 0.9
5 | Table conditionalProbability Visit To Asia 0.99 0.01
6 | Table conditionalProbability Smoking 0.5 0.5
7 | Table conditionalProbability Lung cancer; Smoking 0.99 0.01 0.9 0.1
8 | Table conditionalProbability Tuberculosis; Visit To Asia 0.99 0.01 0.95 0.05
9 | Table conditionalProbability Lung disease; Lung cancer; Tuberculosis; COVID-19 1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0
10 | Table conditionalProbability COVID-19; Smoking; Visit To Asia 0.7 0.3 0.3 0.7 0.5 0.5 0.05 0.95
11 |
--------------------------------------------------------------------------------
/source/potentials.tsv:
--------------------------------------------------------------------------------
1 | type role variables values model
2 | Table conditionalProbability X-ray; Lung disease 0.95 0.05 0.02 0.98 Asia
3 | Table conditionalProbability Bronchitis; Smoking 0.7 0.3 0.4 0.6 Asia
4 | Table conditionalProbability Dyspnea; Lung disease; Bronchitis 0.9 0.1 0.3 0.7 0.2 0.8 0.1 0.9 Asia
5 | Table conditionalProbability Visit To Asia 0.99 0.01 Asia
6 | Table conditionalProbability Smoking 0.5 0.5 Asia
7 | Table conditionalProbability Lung cancer; Smoking 0.99 0.01 0.9 0.1 Asia
8 | Table conditionalProbability Tuberculosis; Visit To Asia 0.99 0.01 0.95 0.05 Asia
9 | Table conditionalProbability Lung disease; Lung cancer; Tuberculosis; COVID-19 1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 Asia
10 | Table conditionalProbability COVID-19; Smoking; Visit To Asia 0.7 0.3 0.3 0.7 0.5 0.5 0.05 0.95 Asia
--------------------------------------------------------------------------------
/google_to_neo4j.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import tsv_to_neo4j
3 | import yaml
4 |
5 | output_folder = "./neo4j"
6 |
7 |
8 | with open("config.yaml", "r") as stream:
9 | try:
10 | PARAM = yaml.safe_load(stream)
11 | except yaml.YAMLError as exc:
12 | print(exc)
13 |
14 | if __name__ == "__main__":
15 | sheet_id = PARAM["google_sheet_id"]
16 | nodes_sheet = PARAM["google_sheet_node"]
17 | links_sheet = PARAM["google_sheet_link"]
18 | potentials_name = PARAM["google_sheet_potentials"]
19 |
20 | nodes_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={nodes_sheet}"
21 | nodes = pd.read_csv(nodes_url)
22 |
23 | links_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={links_sheet}"
24 | links = pd.read_csv(links_url)
25 |
26 | tsv_to_neo4j.to_neo4j(nodes, links)
--------------------------------------------------------------------------------
/tsv_to_neo4j.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import os
3 |
4 |
5 | output_folder = "./neo4j"
6 |
7 | def to_neo4j(nodes_df: pd.DataFrame, links_df: pd.DataFrame):
8 | """
9 | Args:
10 | nodes_df (pd.DataFrame): pandas dataframe with columns: name, type, role, states
11 | links_df (pd.DataFrame): pandas dataframe with columns: source, target
12 |
13 | Returns:
14 | """
15 |
16 | node_labels = pd.unique(nodes_df['label'])
17 |
18 | for l in node_labels:
19 | temp_df = nodes_df[nodes_df['label'] == l]
20 | temp_df.to_csv(os.path.join(output_folder, l.lower() + ".tsv"), sep="\t", index=False, na_rep='NULL')
21 |
22 | link_labels = pd.unique(links_df['label'])
23 |
24 | for l in link_labels:
25 | temp_df = links_df[links_df['label'] == l]
26 | temp_df.to_csv(os.path.join(output_folder, l.lower() + ".tsv"), sep="\t", index=False, na_rep='NULL')
27 |
28 |
29 | if __name__ == "__main__":
30 |
31 | nodes = pd.read_csv("./source/nodes.tsv", sep="\t")
32 | links = pd.read_csv("./source/links.tsv", sep="\t")
33 |
34 | to_neo4j(nodes, links)
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Sixing Huang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/upsert_tsv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import sys
3 | import os
4 |
5 | def upsert_df (df: pd.DataFrame, old_df: pd.DataFrame, key: str):
6 | """
7 | Args:
8 | df (pd.DataFrame): pandas dataframe with columns: name, type, role, states
9 | old_df (pd.DataFrame): pandas dataframe with columns: source, target
10 | key (str): key to use for upsert
11 |
12 | Returns:
13 | pd.DataFrame: combined dataframe
14 | """
15 | df = df.set_index(key)
16 | old_df = old_df.set_index(key)
17 | combined = df.combine_first(old_df)
18 | return combined
19 |
20 | if __name__ == "__main__":
21 | # list of strings
22 | files = {"potentials": "variables", "nodes": "name", "links": ["source", "target"]}
23 | for f in files:
24 | old_df_file = os.path.join("./source", f"{f}.tsv")
25 | new_df_file = os.path.join("./pgmx_output", f"pgmx_output_{f}.tsv")
26 |
27 | old_df = pd.read_csv(old_df_file, sep="\t")
28 | new_df = pd.read_csv(new_df_file, sep="\t")
29 |
30 | combined = upsert_df(new_df, old_df, files[f])
31 | combined.to_csv(old_df_file, sep="\t", na_rep='NULL')
32 |
33 |
--------------------------------------------------------------------------------
/neo4j/disease.tsv:
--------------------------------------------------------------------------------
1 | name type role states x y label model description
2 | Bronchitis finiteStates chance no; yes 701.0 216.0 Disease Asia Bronchitis is inflammation of the airways in the lungs that is usually caused by an infection. It often gets better without treatment in around 3 weeks. Some people have long-term inflammation of the airways in the lungs called chronic bronchitis. This is known as chronic obstructive pulmonary disease (COPD).
3 | Lung cancer finiteStates chance no; yes 444.0 163.0 Disease Asia Lung cancers usually are grouped into two main types called small cell and non-small cell (including adenocarcinoma and squamous cell carcinoma). These types of lung cancer grow differently and are treated differently. Non-small cell lung cancer is more common than small cell lung cancer.
4 | Tuberculosis finiteStates chance no; yes 202.0 163.0 Disease Asia Tuberculosis (TB) is a disease caused by germs that are spread from person to person through the air. TB usually affects the lungs, but it can also affect other parts of the body, such as the brain, the kidneys, or the spine. A person with TB can die if they do not get treatment.
5 | Lung disease finiteStates chance no; yes 325.0 284.0 Disease Asia NULL
6 | COVID-19 finiteStates chance no; yes 377.0 171.0 Disease Asia Coronavirus disease 2019 (COVID-19) is a contagious disease caused by a virus, the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). The first known case was identified in Wuhan, China, in December 2019. The disease quickly spread worldwide, resulting in the COVID-19 pandemic.
7 |
--------------------------------------------------------------------------------
/google_to_pgmx.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import yaml
3 | import sys
4 | import tsv_to_pgmx
5 |
6 | model = sys.argv[1]
7 |
8 |
9 | with open("config.yaml", "r") as stream:
10 | try:
11 | PARAM = yaml.safe_load(stream)
12 | except yaml.YAMLError as exc:
13 | print(exc)
14 |
15 | sheet_id = PARAM["google_sheet_id"]
16 | nodes_sheet = PARAM["google_sheet_node"]
17 | links_sheet = PARAM["google_sheet_link"]
18 | potentials_name = PARAM["google_sheet_potentials"]
19 |
20 | nodes_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={nodes_sheet}"
21 | nodes = pd.read_csv(nodes_url)
22 |
23 | links_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={links_sheet}"
24 | links = pd.read_csv(links_url)
25 |
26 | potentials_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={potentials_name}"
27 | potentials = pd.read_csv(potentials_url)
28 |
29 |
30 | model_nodes = nodes[nodes['model'].notna()]
31 | mask = model_nodes['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x])
32 | model_nodes = model_nodes[mask]
33 |
34 | model_links = links[links['model'].notna()]
35 | mask = model_links['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x])
36 | model_links = model_links[mask]
37 |
38 | model_potentials = potentials[potentials['model'].notna()]
39 | mask = model_potentials['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x])
40 | model_potentials = model_potentials[mask]
41 |
42 | pgmx = tsv_to_pgmx.get_pgmx(model_nodes, model_links, model_potentials)
43 | print (pgmx)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Introduction
4 |
5 |
6 |
7 |
8 |
9 | This repository contains code and data for my article "[How to build a Bayesian Knowledge Graph](https://dgg32.medium.com/how-to-build-a-bayesian-knowledge-graph-dee1cc821d35)".
10 |
11 | 1. The scripts are for data flow between Google Sheets, Neo4j and OpenMarkov.
12 |
13 |
14 |
15 | 2. The source data folder contains the TSV downloaded from the Google Sheets for debugging purpose. The pgmx_output folder contains TSV files that are extracted from a pgmx file.
16 |
17 |
18 |
19 |
20 |
21 | # Prerequisite
22 |
23 | Neo4j Desktop
24 |
25 | OpenMarkov
26 |
27 |
28 | # Run
29 | First, config the config.yaml to match your Google Sheets setup.
30 |
31 | 1. Convert data from Google Sheets to a pgmx file
32 | ```console
33 | python google_to_pgmx.py [model_name] > [model].pgmx
34 | ```
35 |
36 | For example:
37 | ```console
38 | python google_to_pgmx.py Asia > manmade_google.pgmx
39 | ```
40 |
41 |
42 | 2. Convert data from Google Sheets to Neo4j
43 |
44 | ```console
45 | python tsv_to_neo4j.py
46 | ```
47 | It generates a series of files in the ./neo4j folder.
48 |
49 | 3. Other utility files.
50 |
51 | pgmx_to_tsv.py is to parse a PGMX file and generate a node, a link and a potential TSV file. You use this script when you have modified data in OpenMarkov and want to overwrite the changes back into your TSV files.
52 |
53 | And you can use upsert_tsv.py to upsert the new data into an old TSV file.
54 |
55 | ## Authors
56 |
57 |
58 |
59 | * **Sixing Huang** - *Concept and Coding*
60 |
61 |
62 |
63 | ## License
64 |
65 |
66 |
67 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details
68 |
--------------------------------------------------------------------------------
/templates/unicriterion_pgmx.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | {% for node in nodes -%}
13 |
14 |
15 |
16 | {% for s in node.states -%}
17 |
18 | {% endfor %}
19 |
20 |
21 | {% endfor %}
22 |
23 |
24 | {% for link in links -%}
25 |
26 |
27 |
28 |
29 | {% endfor %}
30 |
31 |
32 | {% for potential in potentials -%}
33 |
34 |
35 | {% for v in potential.variables -%}
36 |
37 | {% endfor %}
38 |
39 | {{ potential.value }}
40 |
41 | {% endfor %}
42 |
43 |
44 |
45 |
46 | UNICRITERION
47 |
48 |
49 |
--------------------------------------------------------------------------------
/neo4j_commands.txt:
--------------------------------------------------------------------------------
1 | CREATE CONSTRAINT FOR (s:Symptom) REQUIRE s.name IS UNIQUE;
2 |
3 | CREATE CONSTRAINT FOR (d:Disease) REQUIRE d.name IS UNIQUE;
4 |
5 | CREATE CONSTRAINT FOR (r:Risk) REQUIRE r.name IS UNIQUE;
6 |
7 | CREATE CONSTRAINT FOR (a:Anatomy) REQUIRE a.name IS UNIQUE;
8 |
9 | LOAD CSV WITH HEADERS FROM 'file:///symptom.tsv' AS row FIELDTERMINATOR '\t' MERGE (s:Symptom {name: row.name, model: row.model, description: row.description, role: row.role, type: row.type, states: row.states});
10 |
11 | LOAD CSV WITH HEADERS FROM 'file:///disease.tsv' AS row FIELDTERMINATOR '\t' MERGE (d:Disease {name: row.name, model: row.model, description: row.description, role: row.role, type: row.type, states: row.states});
12 |
13 | LOAD CSV WITH HEADERS FROM 'file:///risk.tsv' AS row FIELDTERMINATOR '\t' MERGE (r:Risk {name: row.name, model: row.model, description: row.description, role: row.role, type: row.type, states: row.states});
14 |
15 | LOAD CSV WITH HEADERS FROM 'file:///anatomy.tsv' AS row FIELDTERMINATOR '\t' MERGE (a:Anatomy {name: row.name, model: row.model, description: row.description, role: row.role, type: row.type, states: row.states});
16 |
17 |
18 |
19 | LOAD CSV WITH HEADERS FROM 'file:///has_symptom.tsv' AS row FIELDTERMINATOR '\t' MERGE (d:Disease {name: row.source}) MERGE (s:Symptom {name: row.target}) MERGE (d)-[r:HAS_SYMPTOM]->(s);
20 |
21 | LOAD CSV WITH HEADERS FROM 'file:///increases_risk.tsv' AS row FIELDTERMINATOR '\t' MERGE (ri:Risk {name: row.source}) MERGE (d:Disease {name: row.target}) MERGE (ri)-[r:INCREASES_RISK]->(d);
22 |
23 | LOAD CSV WITH HEADERS FROM 'file:///is_a.tsv' AS row FIELDTERMINATOR '\t' MERGE (d1:Disease {name: row.source}) MERGE (d2:Disease {name: row.target}) MERGE (d1)-[r:IS_A]->(d2);
24 |
25 | LOAD CSV WITH HEADERS FROM 'file:///located_in.tsv' AS row FIELDTERMINATOR '\t' MERGE (d:Disease {name: row.source}) MERGE (a:Anatomy {name: row.target}) MERGE (d)-[r:LOCATED_IN]->(a);
--------------------------------------------------------------------------------
/pgmx_to_tsv.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | # Import BeautifulSoup
3 | from bs4 import BeautifulSoup as bs
4 | import os
5 |
6 | content = []
7 |
8 | # Read the XML file
9 | with open("manmade.pgmx", "r") as file:
10 | # Read each line in the file, readlines() returns a list of lines
11 | content = file.readlines()
12 | # Combine the lines in the list into a string
13 | content = "".join(content)
14 | bs_content = bs(content, "xml")
15 |
16 | nodes = []
17 | for node in bs_content.find("Variables").find_all("Variable"):
18 | states = "; ".join([n.get("name") for n in node.find("States").find_all("State")])
19 | x = int(node.find("Coordinates").get("x"))
20 | y = int(node.find("Coordinates").get("y"))
21 |
22 | nodes.append({"name": node.get("name"), "type": node.get("type"), "role": node.get("role"), "states": states, "x": x, "y": y})
23 | #print (node)
24 | df = pd.DataFrame.from_records(nodes)
25 | df.to_csv(os.path.join("pgmx_output", "pgmx_output_nodes.tsv"), sep="\t", index=False, na_rep='NULL')
26 |
27 |
28 | links = []
29 | for l in bs_content.find("Links").find_all("Link"):
30 |
31 | source, target = [x.get("name") for x in l.find_all("Variable")]
32 |
33 | links.append({"source": source, "target": target})
34 |
35 | df = pd.DataFrame.from_records(links)
36 | df.to_csv(os.path.join("pgmx_output", "pgmx_output_links.tsv"), sep="\t", index=False, na_rep='NULL')
37 |
38 |
39 | potentials = []
40 | for potential in bs_content.find("Potentials").find_all("Potential"):
41 | variables = "; ".join([n.get("name") for n in potential.find("Variables").find_all("Variable")])
42 | values = potential.find("Values").text
43 | potentials.append({"type": potential.get("type"), "role": potential.get("role"), "variables": variables, "values": values})
44 |
45 | df = pd.DataFrame.from_records(potentials)
46 | df.to_csv(os.path.join("pgmx_output", "pgmx_output_potentials.tsv"), sep="\t", index=False, na_rep='NULL')
47 |
--------------------------------------------------------------------------------
/source/nodes.tsv:
--------------------------------------------------------------------------------
1 | name type role states x y label model description
2 | X-ray finiteStates chance no; yes 214 397 Symptom Asia
3 | Bronchitis finiteStates chance no; yes 701 216 Disease Asia Bronchitis is inflammation of the airways in the lungs that is usually caused by an infection. It often gets better without treatment in around 3 weeks. Some people have long-term inflammation of the airways in the lungs called chronic bronchitis. This is known as chronic obstructive pulmonary disease (COPD).
4 | Dyspnea finiteStates chance no; yes 509 400 Symptom Asia Shortness of breath — known medically as dyspnea — is often described as an intense tightening in the chest, air hunger, difficulty breathing, breathlessness or a feeling of suffocation. Very strenuous exercise, extreme temperatures, obesity and higher altitude all can cause shortness of breath in a healthy person.
5 | Visit To Asia finiteStates chance no; yes 204 55 Risk Asia
6 | Smoking finiteStates chance no; yes 625 61 Risk Asia
7 | Lung cancer finiteStates chance no; yes 444 163 Disease Asia Lung cancers usually are grouped into two main types called small cell and non-small cell (including adenocarcinoma and squamous cell carcinoma). These types of lung cancer grow differently and are treated differently. Non-small cell lung cancer is more common than small cell lung cancer.
8 | Tuberculosis finiteStates chance no; yes 202 163 Disease Asia Tuberculosis (TB) is a disease caused by germs that are spread from person to person through the air. TB usually affects the lungs, but it can also affect other parts of the body, such as the brain, the kidneys, or the spine. A person with TB can die if they do not get treatment.
9 | Lung disease finiteStates chance no; yes 325 284 Disease Asia
10 | Lung Anatomy One of a pair of organs in the chest that supplies the body with oxygen, and removes carbon dioxide from the body.
11 | Bronchi Anatomy A bronchus is a passage or airway in the lower respiratory tract that conducts air into the lungs.
12 | COVID-19 finiteStates chance no; yes 377 171 Disease Asia Coronavirus disease 2019 (COVID-19) is a contagious disease caused by a virus, the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). The first known case was identified in Wuhan, China, in December 2019. The disease quickly spread worldwide, resulting in the COVID-19 pandemic.
--------------------------------------------------------------------------------
/tsv_to_pgmx.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from jinja2 import Environment, FileSystemLoader
3 | import sys
4 |
5 | file_loader = FileSystemLoader('templates')
6 | env = Environment(loader=file_loader)
7 | template = env.get_template('unicriterion_pgmx.txt')
8 |
9 | def get_pgmx(nodes_df: pd.DataFrame, links_df: pd.DataFrame, potentials_df: pd.DataFrame) -> str:
10 | """
11 | Args:
12 | nodes_df (pd.DataFrame): pandas dataframe with columns: name, type, role, states
13 | links_df (pd.DataFrame): pandas dataframe with columns: source, target
14 | potentials_df (pd.DataFrame): pandas dataframe with columns: type, role, variables, values
15 |
16 | Returns:
17 | str: pgmx file
18 | """
19 |
20 | nodes_to_jinja = []
21 | for i, row in nodes_df.iterrows():
22 |
23 | states = [x.strip() for x in row.states.split(";")]
24 | x = 1+ i * 100
25 | y = 1+ i * 100
26 |
27 | if "x" in row and row["x"]:
28 | x = int(row["x"])
29 | if "y" in row and row["y"]:
30 | y = int(row["y"])
31 |
32 |
33 | nodes_to_jinja.append({"name": row["name"], "type": row.type, "role": row.role, "states": states, "x": x, "y": y})
34 |
35 |
36 | links_to_jinja = []
37 | for i, row in links_df.iterrows():
38 |
39 | links_to_jinja.append({"source": row["source"], "target": row.target})
40 |
41 |
42 |
43 | potentials_to_jinja = []
44 | for i, row in potentials_df.iterrows():
45 | variables = [x.strip() for x in row.variables.split(";")]
46 | potentials_to_jinja.append({"type": row["type"], "role": row.role, "variables": variables, "value": row["values"]})
47 |
48 | #print (potentials_to_jinja)
49 |
50 | output = template.render(nodes=nodes_to_jinja, links = links_to_jinja, potentials = potentials_to_jinja)
51 | return (output)
52 |
53 | if __name__ == "__main__":
54 | model = sys.argv[1]
55 |
56 | nodes = pd.read_csv("./source/nodes.tsv", sep="\t")
57 | links = pd.read_csv("./source/links.tsv", sep="\t")
58 | potentials = pd.read_csv("./source/potentials.tsv", sep="\t")
59 |
60 |
61 |
62 | model_nodes = nodes[nodes['model'].notna()]
63 | mask = model_nodes['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x])
64 | model_nodes = model_nodes[mask]
65 |
66 |
67 |
68 | model_links = links[links['model'].notna()]
69 | mask = model_links['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x])
70 | model_links = model_links[mask]
71 |
72 | model_potentials = potentials[potentials['model'].notna()]
73 | mask = model_potentials['model'].str.split(";").apply(lambda x: model in [e.strip() for e in x])
74 | model_potentials = model_potentials[mask]
75 |
76 | #print (model_potentials.head())
77 |
78 | pgmx = get_pgmx(model_nodes, model_links, model_potentials)
79 | print (pgmx)
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/manmade_backup.pgmx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 | 0.95 0.05 0.02 0.98
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 | 0.7 0.3 0.4 0.6
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 | 0.9 0.1 0.3 0.7 0.2 0.8 0.1 0.9
158 |
159 |
160 |
161 |
162 |
163 |
164 | 0.99 0.01
165 |
166 |
167 |
168 |
169 |
170 |
171 | 0.5 0.5
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 | 0.99 0.01 0.9 0.1
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 | 0.99 0.01 0.95 0.05
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 | 1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 | 0.7 0.3 0.3 0.7 0.5 0.5 0.05 0.95
206 |
207 |
208 |
209 |
210 |
211 |
212 | UNICRITERION
213 |
214 |
215 |
216 |
--------------------------------------------------------------------------------
/manmade.pgmx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 | 0.95 0.05 0.02 0.98
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 | 0.7 0.3 0.4 0.6
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 | 0.9 0.1 0.3 0.7 0.2 0.8 0.1 0.9
158 |
159 |
160 |
161 |
162 |
163 |
164 | 0.99 0.01
165 |
166 |
167 |
168 |
169 |
170 |
171 | 0.5 0.5
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 | 0.99 0.01 0.9 0.1
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 | 0.99 0.01 0.95 0.05
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 | 1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 | 0.7 0.3 0.3 0.7 0.5 0.5 0.05 0.95
207 |
208 |
209 |
210 |
211 |
212 |
213 | UNICRITERION
214 |
215 |
216 |
217 |
--------------------------------------------------------------------------------
/manmade_google.pgmx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 | 0.95 0.05 0.02 0.98
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 | 0.7 0.3 0.4 0.6
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 | 0.9 0.1 0.3 0.7 0.2 0.8 0.1 0.9
158 |
159 |
160 |
161 |
162 |
163 |
164 | 0.99 0.01
165 |
166 |
167 |
168 |
169 |
170 |
171 | 0.5 0.5
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 | 0.99 0.01 0.9 0.1
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 | 0.99 0.01 0.95 0.05
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 | 1.0 0.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 | 0.7 0.3 0.3 0.7 0.5 0.5 0.05 0.95
207 |
208 |
209 |
210 |
211 |
212 |
213 | UNICRITERION
214 |
215 |
216 |
217 |
--------------------------------------------------------------------------------