├── .DS_Store ├── LICENSE ├── README.md ├── __pycache__ └── config.cpython-39.pyc ├── app.py ├── config.py ├── data ├── .DS_Store ├── input │ ├── .DS_Store │ └── L141.txt └── output │ ├── L141.json │ └── L141.txt.json ├── neo4j.png └── requirements.txt /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/defnecirci/InsightGraph/7bb8c3667a610373b618f0195c407e9fbf6710fb/.DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 defnecirci, shrutibadhwar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # InsightGraph 2 | Welcome to our project submission for the LLMs for Chemistry and Materials Science Heckatlon that took place on March 29th, 2023. 3 | 4 | # Overview 5 | Our project aims to create a simplified knowledge graph from article abstacts to discover concepts and relevant articles. During the hackathon, we were able to design a web application that automatically extracts entities and relationships from material-science abstracts using a pre-defined schema.. You can find our video submission here: https://twitter.com/DCirci/status/1641486022709059585?s=20. 6 | 7 | # How to use 8 | 9 | ### Installation 10 | 11 | 1. Clone the repo 12 | ```sh 13 | git clone https://github.com/github_username/repo_name.git 14 | ``` 15 | 2. Install the packages 16 | ```sh 17 | pip install -r requirements.txt 18 | ``` 19 | 3. Specify authentication details in config.py 20 | 21 | ```sh 22 | openai_api_key = "openai_api_key" 23 | neo4j_uri = "neo4j_uri" 24 | neo4j_username = "neo4j_username" 25 | neo4j_password = "neo4j_password" 26 | ``` 27 | 28 | Get Neo4j credentials by first creating a user account, and then creating a free instance. 29 | On creation of an instance, you will be prompted to download authentication details containing uri, 30 | username and password. 31 | 32 | ![alt text](https://github.com/defnecirci/InsightGraph/blob/3754651d7f8163c16685656f7798e23b7d0d0029/neo4j.png) 33 | 34 | 35 | 5. Run 36 | ```sh 37 | streamlit run app.py 38 | ``` 39 | 40 | 6. To view and interact with the results on Neo4j Browser, you will be asked to authenticate with your credentials again (see config.py) 41 | 42 | 43 | # Contributing 44 | We are still working on developing our project and would greatly appreciate your feedback and contributions. 45 | 46 | # License 47 | 48 | This project is licensed under the MIT License. See the LICENSE file for details. 49 | 50 | # Contact 51 | If you have any questions or comments, feel free to reach out to us at defne.circi@duke.edu. 52 | 53 | Thank you for taking the time to check out our project! 54 | -------------------------------------------------------------------------------- /__pycache__/config.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/defnecirci/InsightGraph/7bb8c3667a610373b618f0195c407e9fbf6710fb/__pycache__/config.cpython-39.pyc -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import json 4 | import datetime 5 | import streamlit as st 6 | import config as cfg 7 | import networkx as nx 8 | import matplotlib.pyplot as plt 9 | from neo4j import GraphDatabase 10 | from collections import Counter 11 | 12 | openai.api_key = cfg.openai_api_key 13 | uri = cfg.neo4j_uri 14 | username = cfg.neo4j_username 15 | password = cfg.neo4j_password 16 | 17 | def extract_graph(text,filename): 18 | prompt = f"{cfg.prompt} {text}" 19 | system_role = cfg.system_role 20 | model = cfg.model 21 | response = openai.ChatCompletion.create( 22 | model=model, 23 | messages=[ 24 | {"role": "system", "content": system_role}, 25 | {"role": "user", "content": prompt}, 26 | ] 27 | ) 28 | out = response["choices"][0]["message"]["content"] 29 | json_object = json.loads(out) 30 | with open('./data/output/' + filename + '.json', 'w') as file: 31 | json.dump(json_object, file) 32 | pretty_json = json.dumps(json_object, indent=4) 33 | print(pretty_json) 34 | graph = json_object 35 | return graph 36 | 37 | def save_graph(graph,filename): 38 | driver = GraphDatabase.driver(uri, auth=(username, password)) 39 | now = datetime.datetime.now() 40 | start = int(now.timestamp()) 41 | 42 | def create_graph(tx, data): 43 | for node in data["nodes"]: 44 | tx.run("CREATE (:{} {{id: '{}', name: '{}'}})".format(node["label"], str(node["id"]+start), node["name"])) 45 | for rel in data["edges"]: 46 | print (rel) 47 | rel["startLabels"] = data["nodes"][rel["source"]-1]["label"] 48 | rel["endLabels"] = data["nodes"][rel["target"]-1]["label"] 49 | tx.run("MATCH (a:{} {{id: {}}}), (b:{} {{id: {}}}) CREATE (a)-[:{} {{type: '{}'}}]->(b)".format(rel["startLabels"], rel["source"]+start, rel["endLabels"], rel["target"]+start, rel["type"], rel["type"])) 50 | tx.run("CREATE (:{} {{id: {}, name: '{}'}})".format("Article", start+500, filename)) 51 | tx.run("MATCH (a:Article {{id: {}}}) MATCH (n:material) CREATE (a)-[:MENTIONS]->(n) RETURN a, n".format(start+500)) 52 | 53 | with driver.session() as session: 54 | session.execute_write(create_graph, graph) 55 | driver.close() 56 | 57 | def show_graph(text): 58 | with open('./data/output/L141.json', 'r') as file: 59 | output_data = json.load(file) 60 | 61 | nodes = output_data['nodes'] 62 | edges = output_data['edges'] 63 | node_labels = [n["label"] for n in nodes] 64 | node_label_counts = Counter(node_labels) 65 | edge_types = [e["type"] for e in edges] 66 | edge_type_counts = Counter(edge_types) 67 | 68 | #Display nodes and edges information 69 | st.write("**Nodes**") 70 | node_count_str = ' '.join([f"{item}: {count}" for item, count in node_label_counts.items()]) 71 | st.write(node_count_str) 72 | st.write("**Edges**") 73 | edge_count_str = ' '.join([f"{item}: {count}" for item, count in edge_type_counts.items()]) 74 | st.write(edge_count_str) 75 | 76 | #Display graph 77 | G = nx.Graph() 78 | for node in nodes: 79 | G.add_node(node['id'],label=node['name']) 80 | for edge in edges: 81 | G.add_edge(edge['source'], edge['target'], **edge) 82 | fig, ax = plt.subplots(figsize=(8, 8)) 83 | pos = nx.spring_layout(G, k=1.8) 84 | labels = nx.get_node_attributes(G, 'label') 85 | nx.draw(G, pos, with_labels=True, labels=labels, font_size=8, node_size=500, node_color='red', edge_color='blue', width=5) 86 | edge_labels = {(edge['source'], edge['target']): edge['type'] for edge in edges} 87 | nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels,verticalalignment='top',font_size=8,alpha=0.5) 88 | st.pyplot(fig) 89 | 90 | 91 | 92 | def main(): 93 | st.title("GraphInsight!") 94 | st.header("A visual journey through Materials Articles.") 95 | st.write("View details [link](https://browser.graphapp.io)") 96 | input = "" 97 | filename = "" 98 | with st.sidebar: 99 | st.sidebar.title("Upload the abstract") 100 | file_path = st.sidebar.file_uploader(label="", type='txt') 101 | if file_path is not None: 102 | with file_path: 103 | text = file_path.read().decode('utf-8') 104 | filename = os.path.basename(file_path.name) 105 | st.write(text) 106 | input = text 107 | 108 | 109 | if input and filename: 110 | graph = extract_graph(text,filename) 111 | save_graph(graph,filename) 112 | show_graph(text) 113 | 114 | 115 | if __name__ == '__main__': 116 | main() 117 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | openai_api_key = "replace_with_your_API_Key" 2 | neo4j_uri = "replace_with_your_neo4j_uri" 3 | neo4j_username = "replace_with_your_neo4j_username" 4 | neo4j_password = "replace_with_your_neo4j_password" 5 | 6 | 7 | prompt = "***Task***" 8 | prompt += "For the text below extract materials, compositions, properties, applications, methods and create a JSON object in this format." 9 | prompt += "Relationships or Edges can be contains, has_material and has_property, has_application, has_value." 10 | prompt += "Node labels can be contains, material, property, applications, compositions." 11 | prompt += "Nodes should be with ids, names, labels, attributes and relationships should be source, target, type. Ids need to be numerical values. Json keys need to be nodes and edges." 12 | prompt += "***Format***" 13 | prompt += "'nodes': [{'id': '','name': '','label':'material','attributes': {}],'edges': [{'source': '','target': '','type': ''}],}" 14 | prompt += "***Text***" 15 | 16 | system_role = "You extract information from documents and return json objects" 17 | 18 | model = "gpt-3.5-turbo" -------------------------------------------------------------------------------- /data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/defnecirci/InsightGraph/7bb8c3667a610373b618f0195c407e9fbf6710fb/data/.DS_Store -------------------------------------------------------------------------------- /data/input/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/defnecirci/InsightGraph/7bb8c3667a610373b618f0195c407e9fbf6710fb/data/input/.DS_Store -------------------------------------------------------------------------------- /data/input/L141.txt: -------------------------------------------------------------------------------- 1 | '15 nm diameter SiO2 nanoparticles with a grafted block copolymer consisting of a 5–20 nm rubbery polyhexylmethacrylate (PHMA) inner block and a 30 nm outer block of matrix compatible polyglycidylmethacrylate (PGMA) were synthesized to toughen an epoxy. A systematic study of the effect of block copolymer graft density (from 0.07 to 0.7 chains/nm2) and block molecular weight (from 20 to 80 kg/mol) on the tensile behavior, fracture toughness, and fatigue properties was conducted. It was found that the copolymer grafted SiO2 nanoparticles enhanced the ductility (maximum 60% improvement), fracture toughness (maximum 300% improvement) and fatigue crack growth resistance of the epoxy matrix while maintaining the modulus at loadings of less than 2 vol% of silica core. The PHMA block induced plastic void growth and shear banding. At lower graft density and larger molecular weight of the PHMA block, the nanocomposites exhibited simultaneous improvements in fracture toughness and tensile modulus. The PGMA epoxy compatible block also contributed to the improved fracture energy of the nanocomposites.' -------------------------------------------------------------------------------- /data/output/L141.json: -------------------------------------------------------------------------------- 1 | {"nodes": [{"id": 1, "name": "SiO2 nanoparticles", "label": "material", "attributes": {"diameter": "15 nm", "composition": "SiO2", "grafted_block_copolymer": "Yes"}}, {"id": 2, "name": "Block copolymer", "label": "composition", "attributes": {"inner_block": "5-20 nm rubbery polyhexylmethacrylate (PHMA)", "outer_block": "30 nm matrix compatible polyglycidylmethacrylate (PGMA)", "density": "0.07 to 0.7 chains/nm2", "molecular_weight": "20 to 80 kg/mol"}}, {"id": 3, "name": "Epoxy", "label": "material", "attributes": {}}, {"id": 4, "name": "Tensile behavior", "label": "property", "attributes": {}}, {"id": 5, "name": "Fracture toughness", "label": "property", "attributes": {}}, {"id": 6, "name": "Fatigue properties", "label": "property", "attributes": {}}, {"id": 7, "name": "Ductility", "label": "property", "attributes": {}}, {"id": 8, "name": "Modulus", "label": "property", "attributes": {}}, {"id": 9, "name": "Fatigue crack growth resistance", "label": "property", "attributes": {}}], "edges": [{"source": 1, "target": 2, "type": "has_material"}, {"source": 2, "target": 1, "type": "contains"}, {"source": 1, "target": 3, "type": "has_material"}, {"source": 3, "target": 1, "type": "contains"}, {"source": 2, "target": 3, "type": "has_material"}, {"source": 3, "target": 2, "type": "has_property"}, {"source": 7, "target": 3, "type": "has_property"}, {"source": 5, "target": 3, "type": "has_property"}, {"source": 9, "target": 3, "type": "has_property"}, {"source": 8, "target": 3, "type": "has_property"}, {"source": 1, "target": 7, "type": "has_property"}, {"source": 1, "target": 5, "type": "has_property"}, {"source": 1, "target": 9, "type": "has_property"}, {"source": 1, "target": 8, "type": "has_property"}, {"source": 2, "target": 7, "type": "has_value"}, {"source": 2, "target": 5, "type": "has_value"}, {"source": 2, "target": 9, "type": "has_value"}, {"source": 8, "target": 1, "type": "has_value"}, {"source": 2, "target": 7, "type": "has_value"}, {"source": 2, "target": 5, "type": "has_value"}, {"source": 2, "target": 9, "type": "has_value"}, {"source": 8, "target": 1, "type": "has_value"}]} -------------------------------------------------------------------------------- /data/output/L141.txt.json: -------------------------------------------------------------------------------- 1 | {"nodes": [{"id": 1, "name": "SiO2 nanoparticles", "label": "material", "attributes": {"diameter": "15 nm", "composition": "SiO2"}}, {"id": 2, "name": "block copolymer", "label": "composition", "attributes": {"inner block": "polyhexylmethacrylate (PHMA)", "outer block": "polyglycidylmethacrylate (PGMA)", "graft density": "0.07-0.7 chains/nm2", "block molecular weight": "20-80 kg/mol"}}, {"id": 3, "name": "tensile behavior", "label": "property", "attributes": {}}, {"id": 4, "name": "fracture toughness", "label": "property", "attributes": {}}, {"id": 5, "name": "fatigue properties", "label": "property", "attributes": {}}, {"id": 6, "name": "ductility", "label": "property", "attributes": {"improvement": "maximum 60%"}}, {"id": 7, "name": "fatigue crack growth resistance", "label": "property", "attributes": {"improvement": "undefined"}}, {"id": 8, "name": "modulus", "label": "property", "attributes": {"at loadings": "less than 2 vol% of silica core"}}, {"id": 9, "name": "plastic void growth", "label": "property", "attributes": {"induced by": "PHMA block"}}, {"id": 10, "name": "shear banding", "label": "property", "attributes": {"induced by": "PHMA block"}}, {"id": 11, "name": "improved fracture energy", "label": "property", "attributes": {"contributed by": "PGMA epoxy compatible block"}}, {"id": 12, "name": "nanocomposites", "label": "application", "attributes": {}}, {"id": 13, "name": "epoxy matrix", "label": "material", "attributes": {}}], "edges": [{"source": 1, "target": 2, "type": "has_composition"}, {"source": 2, "target": 12, "type": "has_application"}, {"source": 3, "target": 12, "type": "has_application"}, {"source": 4, "target": 12, "type": "has_application"}, {"source": 5, "target": 12, "type": "has_application"}, {"source": 2, "target": 6, "type": "has_property"}, {"source": 2, "target": 7, "type": "has_property"}, {"source": 2, "target": 8, "type": "has_property"}, {"source": 9, "target": 6, "type": "has_property"}, {"source": 10, "target": 4, "type": "has_property"}, {"source": 11, "target": 4, "type": "has_property"}, {"source": 13, "target": 12, "type": "contains"}, {"source": 1, "target": 13, "type": "contains"}]} -------------------------------------------------------------------------------- /neo4j.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/defnecirci/InsightGraph/7bb8c3667a610373b618f0195c407e9fbf6710fb/neo4j.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | neo4j 3 | openai 4 | networkx 5 | matplotlib 6 | --------------------------------------------------------------------------------