├── .gitignore ├── requirements.txt ├── .assets ├── neo4j_inspect_walls.gif └── simpleIfcAIAgent_demo.gif ├── .env ├── LICENSE ├── main.py ├── README.md ├── chatbot.py └── ifc_to_neo4j.py /.gitignore: -------------------------------------------------------------------------------- 1 | env/* 2 | __pycache__/* 3 | .env 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimitrovakulenko/simpleIfcAIAgentWithGraphRAG/HEAD/requirements.txt -------------------------------------------------------------------------------- /.assets/neo4j_inspect_walls.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimitrovakulenko/simpleIfcAIAgentWithGraphRAG/HEAD/.assets/neo4j_inspect_walls.gif -------------------------------------------------------------------------------- /.assets/simpleIfcAIAgent_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimitrovakulenko/simpleIfcAIAgentWithGraphRAG/HEAD/.assets/simpleIfcAIAgent_demo.gif -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | OPENAI_API_VERSION=2024-08-01-preview 2 | AZURE_OPENAI_ENDPOINT=https://your-api-endpoint.openai.azure.com 3 | AZURE_OPENAI_API_KEY=your_key -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Dmytro Vakulenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from neo4j import GraphDatabase 3 | from chatbot import Chatbot 4 | from ifc_to_neo4j import process_ifc_file 5 | 6 | def connect_to_neo4j(uri, user, password): 7 | driver = GraphDatabase.driver(uri, auth=(user, password)) 8 | return driver 9 | 10 | async def main(): 11 | ifc_file_path = r"C:\Users\Public\Solibri\SOLIBRI\Samples\ifc\Solibri Building Structural.ifc" 12 | 13 | neo4j_uri = "bolt://localhost:7687" 14 | neo4j_user = "neo4j" 15 | neo4j_password = "password" 16 | 17 | database_name = "test2.db" 18 | 19 | fill_db = True 20 | 21 | driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password)) 22 | 23 | if fill_db: 24 | process_ifc_file(ifc_file_path, driver, database_name) 25 | 26 | bot = Chatbot() 27 | bot.initialize(driver, database_name) 28 | 29 | while True: 30 | user_input = input("\nYou: ") 31 | 32 | if user_input.lower() in {"exit", "quit", "bye"}: 33 | print("Exiting chat. Goodbye!") 34 | break 35 | 36 | try: 37 | bot_response = await bot.message(user_input) 38 | print(f"\nBot: {bot_response}") 39 | except Exception as e: 40 | print(f"An error occurred: {e}") 41 | 42 | if __name__ == "__main__": 43 | asyncio.run(main()) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simple Ifc AI Agent with GraphRAG 2 | 3 | ## Overview 4 | 5 | `simpleIfcAIAgentWithGraphRAG` is a minimalistic project designed to showcase a simple AI agent that extracts data from an IFC file stored in a graph database (Neo4j). The project demonstrates the integration of AI, IFC file processing, and graph-based data storage and retrieval using modern tools such as Neo4j, LangGraph, and Azure GPT. 6 | 7 | ![Simple Ifc AI Agent with GraphRAG Demo](.assets/simpleIfcAIAgent_demo.gif) 8 | 9 | The project consists of two primary scripts: 10 | 1. **`ifc_to_neo4j.py`**: Parses an IFC file and stores its data in a Neo4j graph database. It organizes IFC entities as graph nodes and their relationships as graph edges. 11 | 2. **`chatbot.py`**: Implements an AI agent that answers queries about the IFC file by executing Cypher queries on the Neo4j database. 12 | 13 | This project is modular and can be adapted to other databases or in-memory graph representations with minor adjustments. 14 | 15 | ## How to run 16 | 17 | ### Prerequisites 18 | 19 | 1. Install **Neo4j Desktop** to set up a local Neo4j database. 20 | 2. Deploy a ChatGPT model instance via **Azure OpenAI Service**. 21 | 22 | ### Steps 23 | 24 | 1. **Set up the Neo4j project**: 25 | - Open **Neo4j Desktop** and create a new project. 26 | - Note the connection details (URI, username, password). 27 | 28 | 2. **Run the project**: 29 | - Clone this repository: 30 | ```bash 31 | git clone https://github.com/your-username/simpleIfcAIAgentWithGraphRAG.git 32 | cd simpleIfcAIAgentWithGraphRAG 33 | ``` 34 | - Create a virtual environment and install dependencies: 35 | ```bash 36 | python -m venv venv 37 | venv\Scripts\activate # On Linux/Mac: source venv/bin/activate 38 | pip install -r requirements.txt 39 | ``` 40 | - Update the configuration in `main.py`: 41 | - Set `neo4j_uri`, `neo4j_user`, and `neo4j_password` to your Neo4j instance details. 42 | - Set `ifc_file_path` to the path of your IFC file. 43 | - Adjust `database_name` as needed. 44 | 45 | - Run the application: 46 | ```bash 47 | python main.py 48 | ``` 49 | 50 | 3. **Interact with the chatbot**: 51 | - The chatbot will launch in a terminal-based interactive mode. 52 | - Type your query, and the chatbot will translate it into a database query and return the results. 53 | - To exit, type `exit`, `quit`, or `bye`. 54 | 55 | 4. **(Optional) Inspect your Ifc database in neo4j desktop** 56 | 57 | ![Inspect your Ifc database in neo4j desktop](.assets/neo4j_inspect_walls.gif) 58 | 59 | ## Limitations 60 | 61 | 1. Works well only with smaller ifc file, might take ages to upload bigger ifc files to the database 62 | 2. LLM Context window limits are easy to achieve - depends on the questions 63 | (a solution for this problem is demonstrated here: https://www.linkedin.com/posts/dmytro-vakulenko-b2a86040_innovation-ifc-ai-agent-variables-activity-7285951367746080768-hRED/ ) 64 | 65 | ## License 66 | 67 | This project is licensed under the [MIT License](LICENSE). You are free to use, modify, and distribute this project as per the terms of the license. 68 | -------------------------------------------------------------------------------- /chatbot.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | from langchain_core.messages import HumanMessage, SystemMessage 3 | from langchain_core.tools import tool 4 | from langchain_openai import AzureChatOpenAI 5 | from langgraph.graph import END, START, StateGraph, MessagesState 6 | from langgraph.prebuilt import ToolNode 7 | from neo4j import Driver 8 | 9 | class Chatbot: 10 | def __init__(self): 11 | self.driver = None 12 | self.database = None 13 | self.agent = None 14 | self.messages = [] 15 | 16 | def initialize(self, driver: Driver, database: str): 17 | self.driver = driver 18 | self.database = database 19 | 20 | if not self.driver or not self.database: 21 | raise ValueError("Driver and database must be provided for initialization.") 22 | 23 | self.agent = get_or_create_agent(self) 24 | 25 | async def message(self, user_message: str) -> str: 26 | if not self.agent: 27 | return "Chatbot is not initialized." 28 | 29 | try: 30 | if len(self.messages) == 0: 31 | self.messages = [HumanMessage(user_message)] 32 | else: 33 | self.messages.append(HumanMessage(user_message)) 34 | 35 | result = await self.agent.ainvoke({"messages": self.messages}, 36 | config={ 37 | "recursion_limit": 42, 38 | "configurable": {"thread_id": "42"}}) 39 | 40 | self.messages = result["messages"] 41 | 42 | response_message = result["messages"][-1] 43 | 44 | return response_message.content 45 | 46 | except Exception as e: 47 | print(f"Error: {str(e)}") 48 | return f"Error: {str(e)}" 49 | 50 | # Tools for the LLM to prepare an answer 51 | 52 | def create_query_ifc_graph_database(chatbot: Chatbot): 53 | @tool 54 | async def query_ifc_graph_database(cypher_query): 55 | """Executes cypher_query in neo4j graph database storing ifc file.""" 56 | print(f'Executes query: {cypher_query}') 57 | 58 | try: 59 | def create_tx(tx): 60 | result = tx.run(cypher_query) 61 | return [record.data() for record in result] 62 | 63 | with chatbot.driver.session(database=chatbot.database) as session: 64 | results = session.execute_read(create_tx) 65 | 66 | return results 67 | except Exception as e: 68 | print(f'Error: {str(e)}') 69 | 70 | return query_ifc_graph_database 71 | 72 | # LLM configuration 73 | 74 | def create_call_model(llm_client, chatbot: Chatbot, max_tokens=128000, buffer_tokens=500): 75 | async def call_model(state: MessagesState): 76 | print('LLM generates answer...') 77 | messages = state['messages'] 78 | 79 | if len(messages) == 1: 80 | messages = [ 81 | SystemMessage( 82 | "You are a chabot assistant answering questions about specific ifc file." 83 | "You translate user questions to ifc terminology." 84 | "You access ifc file via neo4j graph database." 85 | "You have full access to that database and can execute any query on it using query_ifc_graph_database tool." 86 | "If you didn't recieve the answer from the first try you can make more attempts with other queries." 87 | "You can call that tool up to 20 times during the preparation of one answer." 88 | "In the database all ifc entities of the ifc file correspond nodes, ifc entities attributes are nodes attributes." 89 | "When ifc entity references another ifc entity - this is a link/relationship in the neo4j database." 90 | "For example a node of type IfcAxis2Placement3D has a relationship 'Axis' to node of type IfcDirection." 91 | "Database doesn't change during the chat session." 92 | "Before making a query that requires a specific label, you can first execute a query that will check if that label exists." 93 | ), 94 | messages[0] 95 | ] 96 | 97 | response = await llm_client.ainvoke(messages) 98 | 99 | print(f"Total tokens usage: {response.usage_metadata.get('total_tokens')}") 100 | 101 | messages.append(response) 102 | 103 | state['messages'] = messages 104 | 105 | return {"messages": [response]} 106 | return call_model 107 | 108 | def should_continue(state: MessagesState) -> Literal["tools", END]: 109 | messages = state['messages'] 110 | last_message = messages[-1] 111 | 112 | if last_message.tool_calls: 113 | return "tools" 114 | 115 | return END 116 | 117 | # Assemble agent 118 | 119 | def get_or_create_agent(chatbot): 120 | query_ifc_graph_database = create_query_ifc_graph_database(chatbot) 121 | 122 | tools = [ 123 | query_ifc_graph_database 124 | ] 125 | 126 | llm_client = AzureChatOpenAI( 127 | deployment_name="gpt-4o-mini", 128 | temperature=0.3, 129 | max_tokens=4000 130 | ).bind_tools(tools) 131 | 132 | call_model = create_call_model(llm_client, chatbot) 133 | 134 | workflow = StateGraph(MessagesState) 135 | workflow.add_node("agent", call_model) 136 | workflow.add_node("tools", ToolNode(tools)) 137 | workflow.add_edge(START, "agent") 138 | workflow.add_conditional_edges("agent", should_continue) 139 | workflow.add_edge("tools", "agent") 140 | 141 | return workflow.compile() 142 | -------------------------------------------------------------------------------- /ifc_to_neo4j.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable 2 | import ifcopenshell 3 | from neo4j import Driver 4 | import os 5 | import re 6 | from concurrent.futures import ThreadPoolExecutor, as_completed 7 | import time 8 | 9 | PROCESSING_BATCH_SIZE = 500 10 | 11 | def does_database_exist(driver, database_name): 12 | with driver.session(database="system") as session: 13 | result = session.run("SHOW DATABASES") 14 | existing_databases = [record["name"] for record in result] 15 | return database_name in existing_databases 16 | 17 | def create_database(driver, database_name): 18 | with driver.session(database="system") as session: 19 | session.run(f"CREATE DATABASE {database_name}") 20 | print(f"Database '{database_name}' created successfully.") 21 | 22 | def clean_database(driver, database_name): 23 | with driver.session(database=database_name) as session: 24 | session.run("MATCH (n) DETACH DELETE n") 25 | print(f"Database '{database_name}' has been cleaned.") 26 | 27 | def create_nodes_in_batch(driver, batch, database): 28 | def create_node(tx, entities): 29 | for entity in entities: 30 | entity_type = entity.is_a() 31 | entity_id = entity.id() 32 | attributes = entity.get_info() 33 | attributes["entity_id"] = entity_id 34 | 35 | def is_neo4j_compatible(value): 36 | return isinstance(value, (str, int, float, bool)) 37 | 38 | scalar_attributes = {k: v for k, v in attributes.items() if is_neo4j_compatible(v)} 39 | 40 | cypher_query = f""" 41 | MERGE (n:{entity_type} {{ entity_id: $entity_id }}) 42 | SET {", ".join([f"n.{k} = ${k}" for k in scalar_attributes.keys()])} 43 | """ 44 | tx.run(cypher_query, **scalar_attributes) 45 | 46 | with driver.session(database=database) as session: 47 | session.execute_write(create_node, batch) 48 | 49 | def create_relationships_in_batch(driver, batch, database): 50 | def create_relationship(tx, entities): 51 | cypher_query = """ 52 | MATCH (a {{entity_id: $start_id}}) 53 | MATCH (b {{entity_id: $end_id}}) 54 | MERGE (a)-[:{rel_name}]->(b) 55 | MERGE (b)-[:REVERSE_{rel_name}]->(a) 56 | """ 57 | 58 | counter = 0 59 | 60 | for entity in entities: 61 | counter += 1 62 | for rel_name, rel_value in entity.get_info().items(): 63 | if isinstance(rel_value, ifcopenshell.entity_instance): 64 | tx.run( 65 | cypher_query.format(rel_name=rel_name), 66 | start_id=entity.id(), 67 | end_id=rel_value.id() 68 | ) 69 | elif isinstance(rel_value, Iterable): 70 | if all(isinstance(item, ifcopenshell.entity_instance) for item in rel_value): 71 | for related_entity in rel_value: 72 | tx.run( 73 | cypher_query.format(rel_name=rel_name), 74 | start_id=entity.id(), 75 | end_id=related_entity.id() 76 | ) 77 | 78 | if counter % PROCESSING_BATCH_SIZE == 0: 79 | print(f"Processed {counter} nodes") 80 | 81 | with driver.session(database=database) as session: 82 | session.execute_write(create_relationship, batch) 83 | 84 | def parse_ifc_and_populate_neo4j(ifc_file_path, driver, database): 85 | ifc_file = ifcopenshell.open(ifc_file_path) 86 | 87 | entities = sorted(ifc_file, key=lambda e: e.id()) 88 | total_entities = len(entities) 89 | print(f"Total entities to process: {total_entities}") 90 | 91 | batch_size = PROCESSING_BATCH_SIZE 92 | batches = [entities[i:i + batch_size] for i in range(0, total_entities, batch_size)] 93 | 94 | print("Processing nodes...") 95 | start_time = time.time() 96 | with ThreadPoolExecutor() as executor: 97 | futures = [executor.submit(create_nodes_in_batch, driver, batch, database) for batch in batches] 98 | for i, future in enumerate(as_completed(futures), 1): 99 | future.result() # Wait for batch to complete 100 | print(f"Processed batch {i}/{len(batches)} (Nodes)") 101 | node_time = time.time() - start_time 102 | print(f"Node creation completed in {node_time:.2f} seconds.") 103 | 104 | print("Processing relationships...") 105 | # TODO: come up with efficient batching, for now one batch 106 | batches = [entities] 107 | start_time = time.time() 108 | with ThreadPoolExecutor() as executor: 109 | futures = [executor.submit(create_relationships_in_batch, driver, batch, database) for batch in batches] 110 | for i, future in enumerate(as_completed(futures), 1): 111 | future.result() 112 | print(f"Processed batch {i}/{len(batches)} (Relationships)") 113 | relationship_time = time.time() - start_time 114 | print(f"Relationship creation completed in {relationship_time:.2f} seconds.") 115 | 116 | total_time = node_time + relationship_time 117 | print(f"Total processing time: {total_time:.2f} seconds.") 118 | 119 | def process_ifc_file(ifc_file_path, driver:Driver, db_name=None, clean_db=True): 120 | """ 121 | Process an IFC file and populate a Neo4j database with the extracted data. 122 | All IFC entities will become nodes, all links between ifc entities will become links between nodes. 123 | 124 | Args: 125 | ifc_file_path (str): Path to the IFC file to process. 126 | neo4j_uri (str): URI of the Neo4j instance (e.g., "bolt://localhost:7687"). 127 | neo4j_user (str): Username for Neo4j authentication. 128 | neo4j_password (str): Password for Neo4j authentication. 129 | db_name (str, optional): Name of the Neo4j database to use. Defaults to a name derived from the IFC file. 130 | clean_db (bool, optional): Whether to clean the database before populating it 131 | """ 132 | 133 | database_name = db_name or os.path.splitext(os.path.basename(ifc_file_path))[0] 134 | database_name = re.sub(r"[^A-Za-z0-9.]", ".", database_name).lower().strip(".") 135 | 136 | try: 137 | if does_database_exist(driver, database_name): 138 | print(f"Database '{database_name}' already exists. Skipping creation.") 139 | if clean_db: 140 | clean_database(driver, database_name) 141 | else: 142 | create_database(driver, database_name) 143 | 144 | parse_ifc_and_populate_neo4j(ifc_file_path, driver, database_name) 145 | 146 | print(f"Finished populating the database '{database_name}'.") 147 | finally: 148 | driver.close() 149 | --------------------------------------------------------------------------------