├── .gitignore
├── LICENSE
├── README.md
├── agentic-graph-rag
    ├── agentic
    │   ├── README.md
    │   ├── agenticGraphRAG.py
    │   ├── asknews-finance-graph.cypherl
    │   ├── helpers
    │   │   ├── test_community_precompute.py
    │   │   ├── test_config.py
    │   │   └── test_schema.py
    │   ├── jupyter-notebook
    │   │   └── agentic_graph_RAG.ipynb
    │   ├── requirements.txt
    │   └── setup.sh
    ├── impact-analysis
    │   └── data
    │   │   ├── generating.py
    │   │   └── queries.md
    └── vector
    │   ├── README.md
    │   ├── requirements.txt
    │   ├── setup.sh
    │   └── vector_search.py
├── graph-rag
    ├── data
    │   └── memgraph-export-got.cypherl
    ├── graphRAG.ipynb
    └── processed_data.json
├── integrations
    ├── cognee
    │   └── cognee.ipynb
    ├── langchain
    │   ├── langchain-kg-creation.png
    │   └── langchain.ipynb
    ├── langgraph
    │   ├── memgraph-toolkit-chatbot
    │   │   ├── .codespellignore
    │   │   ├── .env.example
    │   │   ├── .gitignore
    │   │   ├── LICENSE
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   ├── langgraph.json
    │   │   ├── pyproject.toml
    │   │   ├── src
    │   │   │   └── agent
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── graph.py
    │   │   ├── static
    │   │   │   ├── langgraph-studio-memgraph-schema.png
    │   │   │   ├── langgraph-studio-memgraph-toolkit.png
    │   │   │   └── studio_ui.png
    │   │   └── tests
    │   │   │   ├── conftest.py
    │   │   │   ├── integration_tests
    │   │   │       ├── __init__.py
    │   │   │       └── test_graph.py
    │   │   │   └── unit_tests
    │   │   │       ├── __init__.py
    │   │   │       └── test_configuration.py
    │   └── synonym-agent
    │   │   ├── agents.py
    │   │   ├── app.py
    │   │   ├── business_rules.yaml
    │   │   ├── requirements.txt
    │   │   └── workflows.py
    ├── llamaindex
    │   ├── agentic-rag-with-graph-tools
    │   │   └── agentic_rag_with_pagerank.ipynb
    │   ├── multi-agent-rag-system
    │   │   ├── data
    │   │   │   └── 2023_canadian_budget.pdf
    │   │   └── multi_agent_rag_system.ipynb
    │   ├── property-graph-index
    │   │   └── llamaindex.ipynb
    │   └── single-agent-rag-system
    │   │   ├── data
    │   │       └── 2023_canadian_budget.pdf
    │   │   └── single_agent_rag_system.ipynb
    └── mcp
    │   └── synonym-agent
    │       ├── .gitignore
    │       ├── .python-version
    │       ├── README.md
    │       ├── init.bash
    │       ├── pyproject.toml
    │       ├── server.py
    │       └── uv.lock
├── knowledge-graph-creation
    ├── catcher-in-the-rye
    │   ├── knowledge_graph.ipynb
    │   └── processed_data.json
    └── game-of-thrones
    │   └── game-of-thrones-kg.ipynb
└── retrieval
    └── vector-search
        ├── chat-with-your-knowledge
            ├── .gitignore
            ├── README.md
            ├── app.py
            ├── controller.py
            ├── embeddings.py
            ├── memgraph_storage.py
            ├── requirements.txt
            ├── storage.py
            ├── wikipedia_detailed_processor.py
            └── wikipedia_processor.py
        ├── simple-example
            ├── README.md
            ├── requirements.txt
            └── vector.py
        └── vector_search_example.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Memgraph
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # :sparkles: Memgraph AI Demos :sparkles:
  2 | 
  3 | There are various ways how AI can be utilized to interact with a graph database
  4 | such as Memgraph. Whether you're creating a knowledge graph from unstructured
  5 | data, figuring out the best retrieval strategy, implementing GraphRAG or
  6 | creating a fully autonomous agent - the possiblities are endless.
  7 | 
  8 | This repository is a collection of our demos and examples and we'll continue
  9 | growing it as we learn. 
 10 | 
 11 | ## Table of Contents
 12 | - [Knowledge graph creation](#knowledge-graph-creation)
 13 | - [Retrieval](#retrieval)
 14 | - [GraphRAG](#graphrag)
 15 | - [Agentic GraphRAG](#agentic-graphrag)
 16 | - [Integrations](#integrations)
 17 | 
 18 | 
 19 | ## [Knowledge graph creation](./knowledge-graph-creation/)
 20 | 
 21 | This directory contains demos focused on building knowledge
 22 | graphs from unstructured data. These examples illustrate how to extract entities
 23 | and relationships to form structured graphs, enhancing data comprehension and
 24 | accessibility.
 25 | 
 26 | **Contents:**
 27 | - **:bulb: Demo: [Catcher in the Rye](./knowledge-graph-creation/catcher-in-the-rye/)**
 28 |   - This demo focuses on constructing a knowledge graph from the
 29 |     text of "Catcher in the Rye". It demonstrates the extraction of entities and
 30 |     relationships from the summary of the book, showcasing how to model and
 31 |     build context from the data within Memgraph.
 32 |   - **:mag_right: Key Features:**
 33 |     - Text preprocessing and entity recognition using [SpaCy](https://spacy.io/)
 34 |     - Building relationships from character interactions and plot developments
 35 |     - Visualization of the resulting knowledge graph to explore the story's
 36 |       dynamics using [Memgraph Lab](https://memgraph.com/docs/data-visualization).
 37 | 
 38 | - **:bulb: Demo: [Game of Thrones](./knowledge-graph-creation/game-of-thrones/)**
 39 |   - This demo illustrates the process of building a knowledge
 40 |     graph from "Game of Thrones" data. It involves extracting key entities such
 41 |     as characters, houses, and locations, and mapping their complex
 42 |     relationships to provide insights into the intricate world of the series.
 43 |   - **:mag_right: Key Features:**
 44 |     - Extraction of entities like characters, houses, and locations
 45 |     - Mapping of relationships including alliances, rivalries, and family ties
 46 |     - Visualization of the complex network within the "Game of Thrones" universe using [Memgraph Lab](https://memgraph.com/docs/data-visualization)
 47 | 
 48 | **:book: Additional resources**
 49 | - [Docs: More about knowledge graphs in Memgraph](https://memgraph.com/docs/data-modeling/knowledge-graph)
 50 | - [Blog: How to Extract Entities and Build a Knowledge Graph with Memgraph and SpaCy](https://memgraph.com/blog/extract-entities-build-knowledge-graph-memgraph-spacy)
 51 | - [YouTube: Knowledge Graph Creation by Entity Extraction in Memgraph](https://www.youtube.com/watch?v=HYYhtKC2jyA)
 52 | 
 53 | 
 54 | ## [Retrieval](./retrieval/)
 55 | 
 56 | This directory contains demos focused on various retrieval strategies to efficiently query and extract relevant information from a knowledge graph. These examples illustrate how to leverage Memgraph's capabilities to perform advanced searches and retrieve data based on specific criteria.
 57 | 
 58 | **Contents:**
 59 | - **:bulb: Demo: [Vector Search](./retrieval/vector-search/simple-example)**
 60 |   - This demo showcases the use of vector search in Memgraph to find semantically similar nodes based on embeddings. It highlights the process of encoding node properties and performing similarity searches to retrieve relevant data.
 61 |   - **:mag_right: Key Features:**
 62 |     - Encoding node properties into embeddings
 63 |     - Performing vector searches to find similar nodes
 64 |     - Advanced querying capabilities to explore the retrieved data
 65 | 
 66 | - **:bulb: Demo: [Build a Movie Similarity Search Engine with Vector Search in Memgraph](./retrieval/vector-search/vector_search_example.ipynb)**
 67 |   - This demo, based on the blog post ["Build a Movie Similarity Search Engine with Vector Search in Memgraph"](https://memgraph.com/blog/build-movie-similarity-search-vector-search-memgraph), walks through the process of building a movie recommendation system using vector search. It uses OpenAI's embedding API to convert movie plot descriptions into high-dimensional vectors, stores them in Memgraph, and retrieves similar movies via vector similarity search.
 68 |   - **:mag_right: Key Features:**
 69 |     - Using OpenAI to generate embeddings from movie plot summaries
 70 |     - Storing and indexing embeddings in Memgraph
 71 |     - Performing vector similarity searches with Cypher queries
 72 |     - Building a basic recommendation system using semantic search
 73 |     - Visualizing and exploring the graph structure of movie relationships
 74 | 
 75 | - **:bulb: Demo: [Vector Search: Turning Unstructured Text into Queryable Knowledge](./retrieval/vector-search/chat-with-your-knowledge)**
 76 |   - This demo illustrates how to transform unstructured text into a queryable knowledge graph using Memgraph's built-in vector search capabilities. By integrating vector embeddings with graph structures, it enables semantic search and interactive applications like Q&A interfaces and automatic quiz generators.
 77 |   - **:mag_right: Key Features:**
 78 |     - **Vector Indexing:** Creating vector indices on nodes to perform efficient similarity searches.
 79 |     - **Data Ingestion:** Transforming paragraphs from unstructured text into graph nodes with embeddings.
 80 |     - **Graph Traversal:** Linking paragraphs to maintain document structure and enable sequential navigation.
 81 |     - **Semantic Search:** Utilizing vector similarity to retrieve contextually relevant information.
 82 |     - **Interactive Applications:** Building tools like Q&A interfaces and quiz generators powered by LLMs and vector search.
 83 | 
 84 | **:book: Additional resources**
 85 | - [Blog: Build a Movie Similarity Search Engine with Vector Search in Memgraph](https://memgraph.com/blog/build-movie-similarity-search-vector-search-memgraph)
 86 | - [Workshop: From Pixels to Knowledge: Vector Search & Knowledge Graph](https://github.com/revaddu/Weblica-Workshop-GraphRAG)
 87 | - [Webinar: Vector Search in Memgraph: Turn Unstructured Text into Queryable Knowledge](https://memgraph.com/webinars/vector-search-in-memgraph)
 88 | - [Blog: Vector Search Demo: Turning Unstructured Text into Queryable Knowledge](https://memgraph.com/blog/vector-search-memgraph-knowledge-graph-demo)
 89 | 
 90 | 
 91 | ## [GraphRAG](./graph-rag/) 
 92 | 
 93 | This directory contains demos focused on building a Graph-based Retrieval-Augmented Generation (GraphRAG) system that uses Memgraph, to perform knowledge graph-based question answering. The demo illustrates how to build an end-to-end GraphRAG system using Memgraph.
 94 | 
 95 | **Contents:**
 96 | - **:bulb: Demo: [GraphRAG](./graph-rag/graphRAG.ipynb)**
 97 |   - This demo demonstrates the implementation of a GraphRAG system using a Game of Thrones dataset. It involves enriching the knowledge graph with unstructured data, performing vector searches, and using LLMs to answer questions based on the graph data.
 98 |   - **:mag_right: Key Features:**
 99 |     - Enriching the knowledge graph with unstructured data
100 |     - Performing vector searches to find relevant nodes
101 |     - Using LLMs to answer questions based on the graph data
102 |     - Embedding the node properties and labels
103 |     - Performing the relevance expansions with Memgraph BFS algorithm
104 | 
105 | ## [Agentic GraphRAG](./agentic-graph-rag/)
106 | 
107 | This directory contains demos focused on building an autonomous agent using the GraphRAG system. These examples illustrate how to create an agent that can interact with a knowledge graph, retrieve relevant information, and generate responses based on the data. The agents are dataset agnostic. 
108 | 
109 | **Contents:**
110 | - **:bulb: Demo: [Agentic GraphRAG](./agentic-graph-rag/agentic/agenticGraphRAG.py)**
111 |   - This demo showcases the creation of an autonomous agent using the GraphRAG system. It highlights the process of integrating Memgraph, Sentence Transformers, and OpenAI's GPT models to build an agent that can answer questions and perform tasks based on the knowledge graph.
112 |   - **:mag_right: Key Features:**
113 |     - Building an autonomous agent using GraphRAG
114 |     - Integrating Memgraph, Sentence Transformers, and OpenAI's GPT models
115 |     - Advanced querying and response generation based on the knowledge graph
116 | 
117 | **:book: Additional resources**
118 |  - [Blog: How To Build Agentic GraphRAG?](https://memgraph.com/blog/build-agentic-graphrag-ai)
119 |  - [Webinar: How to build Agentic GraphRAG?](https://memgraph.com/webinars/how-to-build-agentic-graphrag)
120 | 
121 | ## [Integrations](./integrations/)
122 | 
123 | This directory contains integrations that demonstrate how to
124 | connect and utilize third-party frameworks with Memgraph. These examples
125 | highlight the process of leveraging tools like LlamaIndex and LangChain to
126 | process unstructured data, extract entities and relationships and build
127 | knowledge graphs seamlessly within Memgraph.
128 | 
129 | **Langchain**
130 | - **:bulb: Demo: [KG creation](./integrations/langchain/)**
131 |   - This demo showcases the integration of LangChain with Memgraph
132 |     to create a knowledge graph from unstructured data. It highlights the use of
133 |     LangChain's framework to process text, extract entities and relationships,
134 |     and store them within Memgraph for advanced querying and analysis.
135 |   - **:mag_right: Key Features:**
136 |     - Utilization of LangChain for text processing and entity extraction
137 |     - Construction of a knowledge graph within Memgraph
138 |     - Advanced querying capabilities to explore the structured data
139 | 
140 | **LangGraph**
141 | - **:bulb: Demo: [Graph-Aware Agents with LangGraph and Memgraph AI Toolkit](./integrations/langgraph/memgraph-toolkit-chatbot)**
142 |   - This demo showcases a simple agent built using the LangGraph framework and the [Memgraph AI Toolkit](https://github.com/memgraph/ai-toolkit) to demonstrate how to integrate graph-based tooling into your LLM stack.
143 | 
144 | **LlamaIndex**
145 | - **:bulb: Demo: [KG creation and retrieval](./integrations/llamaindex/property-graph-index)**
146 |   - This demo demonstrates the use of LlamaIndex with Memgraph to
147 |     build a knowledge graph from unstructured data. It showcases the framework's
148 |     ability to parse complex documents, extract meaningful entities and
149 |     relationships and represent them as a knowledge graph in Memgraph.
150 |   - **:mag_right: Key Features:**
151 |     - Parsing of complex documents to extract entities and relationships
152 |     - Integration with Memgraph to construct and store the knowledge graph
153 |     - Visualization and querying of the graph to derive insights from the data
154 |       using [Memgraph Lab](https://memgraph.com/docs/data-visualization)
155 | 
156 | - **:bulb: Demo: [Single-agent RAG system with LlamaIndex](./integrations/llamaindex/single-agent-rag-system)**
157 |   - This demo showcases how to build a Retrieval-Augmented Generation (RAG) system using LlamaIndex and Memgraph with a single-agent architecture. The agent retrieves relevant information from the knowledge graph and generates context-aware responses.
158 |   - **:mag_right: Key Features:**
159 |     - Implementation of a single-agent RAG system for intelligent data retrieval
160 |     - Integration with Memgraph for storing and managing structured knowledge
161 |     - Querying and analyzing the knowledge graph to generate insightful responses
162 | 
163 | - **:bulb: Demo: [Multi-agent RAG System with LlamaIndex](./integrations/llamaindex/multi-agent-rag-system)**
164 |   - This demo extends the RAG framework by utilizing a multi-agent architecture with LlamaIndex and Memgraph. Multiple agents collaborate to retrieve, process, and refine knowledge from the graph, enhancing response accuracy and depth.
165 |   - **:mag_right: Key Features:**
166 |     - Multi-agent system for distributed retrieval
167 |     - Advanced knowledge graph construction and querying with Memgraph
168 |     - Improved contextual understanding through agent collaboration
169 | 
170 | - **:bulb: Demo: [Multi-agent RAG with Memgraph tools](./integrations/llamaindex/agentic-rag-with-graph-tools/agentic_rag_with_pagerank.ipynb)**
171 |   - This demo demonstrates how to integrate Memgraph procedures, such as
172 |       PageRank, as tools within a multi-agent architecture using LlamaIndex. The
173 |       agents work collaboratively to retrieve data from the graph, process it,
174 |       and perform calculations like summing the weight properties of nodes based
175 |       on the PageRank algorithm.
176 |   - **:mag_right: Key Features:**
177 |     - Integration of PageRank as a tool in a multi-agent system
178 |     - Execution of graph algorithms within agents for enhanced retrieval and
179 |       computation
180 |     - Multi-agent collaboration to process and analyze data retrieved from
181 |       Memgraph
182 |     - Dynamic query execution combining graph-based retrieval and computation
183 |       tasks
184 | 
185 | **Cognee**
186 | - **:bulb: Demo: [Cognee x Memgraph integration](./integrations/cognee)**
187 |   - This demo showcases the integration of Cognee with Memgraph to build a
188 |     semantically rich knowledge graph from unstructured natural language input.
189 |     It illustrates how Cognee leverages large language models (LLMs) to extract
190 |     concepts and relationships from raw text and store them in Memgraph for
191 |     advanced querying and visualization.
192 |   - **:mag_right: Key Features:**
193 |     - Conversion of unstructured text into structured graph data using LLMs
194 |     - Seamless connection between Cognee and Memgraph for storage and search
195 |     - Semantic search capabilities to query the knowledge graph using natural
196 |       language
197 |     - Interactive graph visualization and exploration using [Memgraph
198 |       Lab](https://memgraph.com/docs/data-visualization)
199 | 
200 | **:book: Additional resources**
201 | - [Docs: AI Integrations](https://memgraph.com/docs/ai-ecosystem/integrations)
202 | - [Blog: Improved Knowledge Graph Creation with LangChain and LlamaIndex](https://memgraph.com/blog/improved-knowledge-graph-creation-langchain-llamaindex)
203 | - [Blog: How to build single-agent RAG system with LlamaIndex?](https://memgraph.com/blog/single-agent-rag-system)
204 | - [Blog: How to build multi-agent RAG system with LlamaIndex?](https://memgraph.com/blog/multi-agent-rag-system)
205 | - [Blog: How to build Agentic RAG with Pagerank using LlamaIndex?](https://memgraph.com/blog/agentic-rag-with-pagerank)
206 | - [Blog: Introducing the Memgraph MCP Server](https://memgraph.com/blog/introducing-memgraph-mcp-server)
207 | - [Webinar: How to build GenAI apps with LlamaIndex and Memgraph"](https://memgraph.com/webinars/how-to-build-genai-apps-with-llamaindex-and-memgraph)
208 | 


--------------------------------------------------------------------------------
/agentic-graph-rag/agentic/README.md:
--------------------------------------------------------------------------------
 1 | # Agentic GraphRAG
 2 | 
 3 | The purpose of the Agentic GraphRAG demo is to be dataset agnostic. Still, you
 4 | can use our example dataset to assist you in quick start. 
 5 | 
 6 | ## How to run
 7 | 
 8 | ### 1. Prepare the dataset and run Memgraph 
 9 | 
10 | The `setup.sh` script runs Memgraph and imports the dataset from the CYPHERL
11 | file. You can tweak the `setup.sh` script and update the path to point to your
12 | CYPHERL file. 
13 | 
14 | To run the `setup.sh` script, run the following in the terminal:
15 | 
16 | ```
17 | bash ./setup.sh
18 | ```
19 | 
20 | If you haven't updated the `setup.sh` script, it will load an example AskNews
21 | finance dataset. 
22 | 
23 | If you prefer not to use `setup.sh` script, make sure you have Memgraph running
24 | with the dataset loaded and `schema-info-enabled` set to `True`.
25 | 
26 | ### 2. Install dependencies
27 | 
28 | Ensure you have all the required dependencies installed. You can use `pip` to install them. 
29 | 
30 | ```
31 | pip install -r requirements.txt
32 | ```
33 | 
34 | ### 3. Set Up environment variables
35 | 
36 | Create a .env file in the same directory as your script and add your OpenAI API
37 | key:
38 | 
39 | ```
40 | OPENAI_API_KEY=your_openai_api_key
41 | ```
42 | 
43 | or export the OpenAI API key:
44 | 
45 | ```
46 | export OPENAI_API_KEY=your_openai_api_key
47 | ```
48 | 
49 | ### 4. Run the script
50 | 
51 | Use Streamlit to run the script. Open a terminal, navigate to the directory
52 | containing agenticGraphRAG.py, and run:
53 | 
54 | ```
55 | streamlit run agenticGraphRAG.py
56 | ```
57 | 
58 | Besides starting a local web server, the above command will calculate and store
59 | embeddings, communities and community summaries in Memgraph, which will be used
60 | in the retrieval techniques. 
61 | 
62 | ### 5. Access the application
63 | 
64 | Open your web browser and go to the URL provided in the terminal (usually
65 | http://localhost:8501).
66 | 
67 | This will launch the Streamlit application, where you can enter your questions
68 | and interact with the Agentic GraphRAG system.
69 | 
70 | ## Example questions
71 | 
72 | In case you started the app with the provided AskNews finance dataset, here are some questions you can ask:
73 | 
74 | > [!NOTE]  
75 | > The agentic GraphRAG might not act the same if you run the questions listed about. It runs autonomously and makes decisions along the way, which can lead to a different decision in subsequent runs. 
76 | 
77 | 1. **What can you tell me about this dataset?** -> expected question type: Database -> tools: 1. schema, 2. config
78 | 
79 | 2. **What can you tell me about Coca cola?** -> expected question type: Retrieval -> tools: 1. Cypher, 2. Vector relevance expansion
80 | 
81 | 3. **How is coca cola connected in the graph, what is relationship with other companies?** -> expected question type: Structure -> tools: 1. Cypher, 2. Vector relevance expansion
82 | 
83 | 4. **What level of logging does Memgraph have enabled?** -> expected question type: Database -> tools: 1. config, 2. schema
84 | 
85 | 5. **Is Coca Cola in the most important nodes?** -> expected question type: Global -> tools: 1. PageRank, 2. Community -> LLM picks the 10 most important nodes and Coca Cola is not among them; **Is Coca Cola in the 1000 most important nodes?** -> Yes
86 | 
87 | 


--------------------------------------------------------------------------------
/agentic-graph-rag/agentic/helpers/test_community_precompute.py:
--------------------------------------------------------------------------------
 1 | import neo4j 
 2 | 
 3 | 
 4 | 
 5 | def format_community():
 6 |     driver = neo4j.GraphDatabase.driver("bolt://localhost:7687", auth=("", ""))
 7 | 
 8 |     number_of_communities = 0
 9 |     with driver.session() as session:
10 |         result = session.run("""
11 |         CALL community_detection.get()
12 |         YIELD node, community_id 
13 |         SET node.community_id = community_id;
14 |         """
15 |         )
16 | 
17 |         result = session.run("""
18 |         MATCH (n)
19 |         RETURN count(distinct n.community_id) as community_count;
20 |         """
21 |         )
22 |         for record in result:
23 |             number_of_communities = record['community_count']
24 |             print(f"Number of communities: {record['community_count']}")
25 |         
26 | 
27 |     with driver.session() as session:
28 |         for i in range (0, number_of_communities):
29 |             result = session.run(f"""
30 |             MATCH (start), (end) 
31 |             WHERE start.community_id = {i} AND end.community_id = {i} AND id(start) < id(end)
32 |             MATCH p = (start)-[*..1]-(end)
33 |             RETURN p; 
34 |             """)
35 |         community_string = ""
36 |         for record in result:
37 |             path = record['p']
38 |             for rel in path.relationships:
39 |                 start_node = rel.start_node
40 |                 end_node = rel.end_node
41 |                 start_node_properties = {k: v for k, v in start_node.items() if k != 'embedding'}
42 |                 end_node_properties = {k: v for k, v in end_node.items() if k != 'embedding'}
43 |                 community_string += f"({start_node_properties})-[:{rel.type}]->({end_node_properties})\n"
44 |         print(community_string)
45 | 
46 |         
47 | 
48 | # Call the function to test it
49 | format_community()
50 | 
51 |         
52 |         


--------------------------------------------------------------------------------
/agentic-graph-rag/agentic/helpers/test_config.py:
--------------------------------------------------------------------------------
 1 | import neo4j
 2 | 
 3 | 
 4 | def format_config():
 5 |     driver = neo4j.GraphDatabase.driver("bolt://localhost:7687", auth=("", ""))
 6 |     with driver.session() as session:
 7 |         config = session.run("SHOW CONFIG")
 8 |         config_str = "Configurations:\n"
 9 |         for record in config:
10 |             config_str += f"Name: {record['name']} | Default Value: {record['default_value']} | Current Value: {record['current_value']} | Description: {record['description']}\n"
11 |         return config_str
12 | 
13 | 
14 | # Call the function to test it
15 | config_str = format_config()
16 | print(config_str)
17 | 


--------------------------------------------------------------------------------
/agentic-graph-rag/agentic/helpers/test_schema.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import neo4j
 3 | import tiktoken
 4 | 
 5 | 
 6 | def format_schema():
 7 |     driver = neo4j.GraphDatabase.driver("bolt://localhost:7687", auth=("", ""))
 8 |     with driver.session() as session:
 9 |         schema = session.run("SHOW SCHEMA INFO")
10 |         schema_info = json.loads(schema.single().value())
11 |         nodes = schema_info["nodes"]
12 |         edges = schema_info["edges"]
13 |         node_indexes = schema_info["node_indexes"]
14 |         edge_indexes = schema_info["edge_indexes"]
15 | 
16 |         schema_str = "Nodes:\n"
17 |         for node in nodes:
18 |             properties = ", ".join(
19 |                 f"{prop['key']}: {', '.join(t['type'] for t in prop['types'])}"
20 |                 for prop in node["properties"]
21 |             )
22 |             schema_str += f"Labels: {node['labels']} | Properties: {properties}\n"
23 | 
24 |         schema_str += "\nEdges:\n"
25 |         for edge in edges:
26 |             properties = ", ".join(
27 |                 f"{prop['key']}: {', '.join(t['type'] for t in prop['types'])}"
28 |                 for prop in edge["properties"]
29 |             )
30 |             schema_str += f"Type: {edge['type']} | Start Node Labels: {edge['start_node_labels']} | End Node Labels: {edge['end_node_labels']} | Properties: {properties}\n"
31 | 
32 |         schema_str += "\nNode Indexes:\n"
33 |         for index in node_indexes:
34 |             schema_str += (
35 |                 f"Labels: {index['labels']} | Properties: {index['properties']}\n"
36 |             )
37 | 
38 |         schema_str += "\nEdge Indexes:\n"
39 |         for index in edge_indexes:
40 |             schema_str += f"Type: {index['type']} | Properties: {index['properties']}\n"
41 | 
42 |         return schema_str
43 | 
44 | 
45 | # Call the function to test it
46 | schema_str = format_schema()
47 | print(schema_str)
48 | 
49 | # Count the schema string with tiktoken
50 | encoding = tiktoken.get_encoding("cl100k_base")
51 | token_count = len(encoding.encode(schema_str))
52 | print(f"Token count: {token_count}")
53 | 


--------------------------------------------------------------------------------
/agentic-graph-rag/agentic/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit==1.41.1
2 | neo4j==5.26.0
3 | sentence-transformers==3.3.1
4 | openai==1.58.1
5 | python-dotenv==1.0.1
6 | tiktoken==0.8.0
7 | 


--------------------------------------------------------------------------------
/agentic-graph-rag/agentic/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | dataset_path="${1:-asknews-finance-graph.cypherl}"
 4 | 
 5 | echo "Starting Memgraph and importing the $dataset_path dataset for testing..."
 6 | docker run -d --name memgraph_graphRAG -p 7687:7687 -p 7444:7444 memgraph/memgraph-mage:3.2 --log-level=TRACE --also-log-to-stderr --schema-info-enabled=True 
 7 | sleep 10
 8 | 
 9 | echo "Importing the dataset into Memgraph..."
10 | lines=$(wc -l < $dataset_path)
11 | batch_size=100
12 | start=1
13 | while [ $start -le $lines ]; do
14 |   sed -n "${start},$((start + batch_size - 1))p" $dataset_path | cat | docker run -i memgraph/mgconsole:latest --host host.docker.internal
15 |   start=$((start + batch_size))
16 | done
17 | # NOTE: cat data | mgconsole doesn't work because mgconsole can take a limited amount/size of queries
18 | # TODO: Fix mgconsole so that it can take file of any size.
19 | # cat "$dataset_path" | docker run -i memgraph/mgconsole:latest --host host.docker.internal
20 | 
21 | # Wait for user to press Ctrl+C.
22 | echo "Press Ctrl+C to stop the Memgraph container..."
23 | trap 'echo "Stopping Memgraph container..."; docker stop memgraph_graphRAG; echo "Removing Memgraph container..."; docker rm memgraph_graphRAG; exit' SIGINT
24 | 
25 | # Keep the script running.
26 | while true; do
27 |     sleep 1
28 | done
29 | 
30 | docker stop memgraph_graphRAG
31 | 
32 | echo "Removing Memgraph container..."
33 | docker rm memgraph_graphRAG
34 | 


--------------------------------------------------------------------------------
/agentic-graph-rag/impact-analysis/data/generating.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from time import sleep
  3 | from neo4j import GraphDatabase
  4 | 
  5 | def generate_and_ingest_iot_graph(uri="bolt://localhost:7687", user="", password="", num_graphs=3, limit=100, ingest=True, filename="iot_graph.cypherl"):
  6 |     driver = GraphDatabase.driver(uri, auth=(user, password))
  7 |     cypher_statements = []
  8 | 
  9 |     # Add indexes
 10 |     index_statements = [
 11 |         "CREATE INDEX ON :Router(id);",
 12 |         "CREATE INDEX ON :AccessPoint(id);",
 13 |         "CREATE INDEX ON :WirelessDevice(id);",
 14 |         "CREATE INDEX ON :Controller(id);",
 15 |         "CREATE INDEX ON :Sensor(id);",
 16 |         "CREATE INDEX ON :Actuator(id);",
 17 |         "CREATE INDEX ON :Power(id);",
 18 |         "CREATE INDEX ON :PowerHub(id);",
 19 |         "CREATE INDEX ON :Hub(id);"
 20 |     ]
 21 |     
 22 |     router_device_type = ["Asus", "TP-Link", "Netgear", "Linksys"]
 23 |     router_status = ["Online", "Offline"]
 24 |     access_point_device_type = ["Cisco", "D-Link", "Ubiquiti"]
 25 |     access_point_status = ["Online", "Offline"]
 26 |     wireless_device_type = ["Smartphone", "Laptop", "Tablet", "IoTDevice"]
 27 |     controller_device_type = ["Zigbee", "Z-Wave", "PLC", "MicroController"]
 28 |     controller_status = ["Online", "Offline"]
 29 |     sensor_device_type = ["Temperature", "Humidity", "Motion"]
 30 |     actuator_device_type = ["Light", "Thermostat", "Lock", "Camera"]
 31 |         
 32 |     # Counters for each label
 33 |     power_id = 0
 34 |     router_id = 0
 35 |     powerhub_id = 0
 36 |     hub_id = 0
 37 |     access_point_id = 0
 38 |     wireless_device_id = 0
 39 |     controller_id = 0
 40 |     sensor_id = 0
 41 |     actuator_id = 0
 42 |         
 43 |     # Create power nodes, routers, power hubs
 44 |     for _ in range(min(8, limit)):  # Prevent exceeding limit
 45 |         power_id += 1
 46 |         cypher_statements.append(f"CREATE (:Power {{id: {power_id}}});")
 47 |     
 48 |     for _ in range(min(10, limit)):  # Prevent exceeding limit
 49 |         router_id += 1
 50 |         device_type = random.choice(router_device_type)
 51 |         status = random.choice(router_status)
 52 |         cypher_statements.append(f"CREATE (:Router {{id: {router_id}, device_type: '{device_type}', status: '{status}'}});")
 53 |     
 54 |     for _ in range(min(3, limit)):  # Prevent exceeding limit
 55 |         powerhub_id += 1
 56 |         cypher_statements.append(f"CREATE (:PowerHub {{id: {powerhub_id}}});")
 57 |         cypher_statements.append(f"MATCH (p:Power) WITH p ORDER BY rand() LIMIT 1 MATCH (ph:PowerHub {{id: {powerhub_id}}}) CREATE (p)-[:SUPPLIES]->(ph);")
 58 |     
 59 |     # Create hubs, access points, and wireless devices
 60 |     for router in range(router_id):
 61 |         num_hubs = random.randint(2, 4)
 62 |         for _ in range(num_hubs):
 63 |             hub_id += 1
 64 |             cypher_statements.append(f"CREATE (:Hub {{id: {hub_id}}});")
 65 |             cypher_statements.append(f"MATCH (r:Router {{id: {router + 1}}}), (h:Hub {{id: {hub_id}}}) CREATE (r)-[:CONNECTS]->(h);")
 66 | 
 67 |             num_access_points = random.randint(1, 3)
 68 |             for _ in range(num_access_points):
 69 |                 access_point_id += 1
 70 |                 device_type = random.choice(access_point_device_type)
 71 |                 status = random.choice(access_point_status)
 72 |                 cypher_statements.append(f"CREATE (:AccessPoint {{id: {access_point_id}, device_type: '{device_type}', status: '{status}'}});")
 73 |                 cypher_statements.append(f"MATCH (h:Hub {{id: {hub_id}}}), (ap:AccessPoint {{id: {access_point_id}}}) CREATE (ap)-[:CONNECTS]->(h);")
 74 | 
 75 |                 num_devices = random.randint(1, 3)
 76 |                 for _ in range(num_devices):
 77 |                     wireless_device_id += 1
 78 |                     device_type = random.choice(wireless_device_type)
 79 |                     cypher_statements.append(f"CREATE (:WirelessDevice {{id: {wireless_device_id}, device_type: '{device_type}'}});")
 80 |                     cypher_statements.append(f"MATCH (ap:AccessPoint {{id: {access_point_id}}}), (d:WirelessDevice {{id: {wireless_device_id}}}) CREATE (d)-[:CONNECTS]->(ap);")
 81 | 
 82 |         num_controllers = random.randint(2, 3)
 83 |         for _ in range(num_controllers):
 84 |             controller_id += 1
 85 |             device_type = random.choice(controller_device_type)
 86 |             status = random.choice(controller_status)
 87 |             cypher_statements.append(f"CREATE (:Controller {{id: {controller_id}, device_type: '{device_type}', status: '{status}'}});")
 88 |             cypher_statements.append(f"MATCH (h:Hub {{id: {hub_id}}}), (c:Controller {{id: {controller_id}}}) CREATE (c)-[:CONNECTS]->(h);")
 89 | 
 90 |             
 91 |             num_sensors = random.randint(1, 2)
 92 |             for _ in range(num_sensors):
 93 |                 sensor_id += 1
 94 |                 device_type = random.choice(sensor_device_type)
 95 |                 cypher_statements.append(f"CREATE (:Sensor {{id: {sensor_id}, device_type: '{device_type}'}});")
 96 |                 cypher_statements.append(f"MATCH (c:Controller {{id: {controller_id}}}), (s:Sensor {{id: {sensor_id}}}) CREATE (c)-[:HAS]->(s);")
 97 |             
 98 |             num_actuators = random.randint(1, 2)
 99 |             for _ in range(num_actuators):
100 |                 actuator_id += 1
101 |                 device_type = random.choice(actuator_device_type)
102 |                 cypher_statements.append(f"CREATE (:Actuator {{id: {actuator_id}, device_type: '{device_type}'}});")
103 |                 cypher_statements.append(f"MATCH (c:Controller {{id: {controller_id}}}), (a:Actuator {{id: {actuator_id}}}) CREATE (c)-[:HAS]->(a);")
104 |                 
105 |             cypher_statements.append(f"MATCH (ph:PowerHub) WITH ph ORDER BY rand() LIMIT 1 MATCH (c:Controller {{id: {controller_id}}}) CREATE (ph)-[:POWERS]->(c);")
106 | 
107 |     def execute_query(tx, query):
108 |         tx.run(query)
109 | 
110 |     if ingest:
111 |         # Execute index creation outside of transactions
112 |         with driver.session() as session:
113 |             for index_statement in index_statements:
114 |                 session.run("MATCH (n) DETACH DELETE n") 
115 |                 sleep(2)
116 |                 session.run(index_statement)
117 |                 
118 |         for statement in cypher_statements:
119 |             with driver.session() as session:
120 |                 session.execute_write(lambda tx: tx.run(statement))
121 |     driver.close()
122 |     print("IoT graph dataset ingested into Memgraph")
123 |         
124 |     with open(filename, "w") as f:
125 |         f.write("\n".join(index_statements))
126 |         f.write("\n")
127 |         f.write("\n".join(cypher_statements))
128 |     
129 |     print(f"Cypher script saved to {filename}")
130 | 
131 | 
132 | generate_and_ingest_iot_graph(limit=100, ingest=True)
133 | 
134 | 


--------------------------------------------------------------------------------
/agentic-graph-rag/impact-analysis/data/queries.md:
--------------------------------------------------------------------------------
 1 | ### Get the whole graph
 2 | MATCH path=()-[]-() RETURN path;
 3 | 
 4 | 
 5 | ### Get the router nodes 
 6 | MATCH (n:Router) RETURN n; 
 7 | 
 8 | 
 9 | ### Get the particular router 
10 | MATCH path=(n:Router{id:9})-[*2]-() RETURN path
11 | 
12 | 
13 | ### Get the particular controllers 
14 | MATCH (r:Router {id: 9})-[:CONNECTS*]-(c:Controller)
15 | RETURN DISTINCT c
16 | 
17 | ### Get particular end devices 
18 | MATCH (r:Router {id: 9})-[:CONNECTS*]-(c:Controller)-[:HAS]->(d)
19 | RETURN DISTINCT d
20 | 
21 | 
22 | ### Page rank 
23 | CALL pagerank.get()
24 | YIELD node, rank
25 | RETURN node, rank
26 | ORDER BY rank DESC;


--------------------------------------------------------------------------------
/agentic-graph-rag/vector/README.md:
--------------------------------------------------------------------------------
 1 | # Vector Search
 2 | 
 3 | ## How to run
 4 | 
 5 | Follow these steps to run the `vector_search.py` script:
 6 | 
 7 | ### 1. Install dependencies
 8 | 
 9 | Ensure you have all the required dependencies installed. You can use `pip` to install them. 
10 | 
11 | ```
12 | pip install -r requirements.txt
13 | ```
14 | 
15 | ### 2. Set Up environment variables
16 | 
17 | Create a .env file in the same directory as your script and add your OpenAI API key:
18 | 
19 | ```
20 | OPENAI_API_KEY=your_openai_api_key
21 | ```
22 | 
23 | ### 4. Configure Memgraph connection
24 | 
25 | Depending on where your Memgraph is running, adjust the client IP and port 
26 | 
27 | ### 5. Run the script
28 | 
29 | Open a terminal, navigate to the directory containing `vector_search.py`, and run:
30 | 
31 | ```
32 | python vector_search.py
33 | ```


--------------------------------------------------------------------------------
/agentic-graph-rag/vector/requirements.txt:
--------------------------------------------------------------------------------
 1 | neo4j==5.26.0
 2 | sentence-transformers==3.2.1
 3 | ollama==0.3.3
 4 | openai==1.54.0
 5 | spacy==3.8.2
 6 | spacy-llm==0.7.2
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/agentic-graph-rag/vector/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Starting Memgraph with vector search from custom image for testing on GOT dataset..."
 4 | # docker run -d --name memgraph_vector -p 7687:7687 -p 7444:7444 memgraph/memgraph-mage:exp-vector-1 --log-level=DEBUG --also-log-to-stderr --telemetry-enabled=False --experimental-vector-indexes='tag__Entity__embedding__{"dimension":128,"limit":10000}'
 5 | docker run -d --name memgraph_vector -p 7687:7687 -p 7444:7444 memgraph/memgraph-mage --log-level=TRACE --also-log-to-stderr --telemetry-enabled=False 
 6 | if [ -f "dataset.cypherl.gz" ]; then
 7 |     echo "Unzipping the dataset.cypherl.gz file..."
 8 |     gunzip dataset.cypherl.gz
 9 | else
10 |     echo "dataset.cypherl.gz file not found!"
11 |     echo "Probably the dataset.cypher file is already unzipped."
12 | fi
13 | 
14 | sleep 3
15 | 
16 | echo "Importing the dataset into Memgraph..."
17 | cat ../memgraph-export-embeddings-label.cypherl | docker run -i memgraph/mgconsole:latest --host host.docker.internal
18 | 
19 | 
20 | # Wait for user to press Ctrl+C
21 | echo "Press Ctrl+C to stop the Memgraph container..."
22 | trap 'echo "Stopping Memgraph container..."; docker stop memgraph_vector; echo "Removing Memgraph container..."; docker rm memgraph_vector; exit' SIGINT
23 | 
24 | # Keep the script running
25 | while true; do
26 |     sleep 1
27 | done
28 | 
29 | docker stop memgraph_vector
30 | 
31 | echo "Removing Memgraph container..."
32 | docker rm memgraph_vector
33 | 
34 | 	
35 | # {
36 | #    "id": 2,
37 | #    "labels": [
38 | #       "Character"
39 | #    ],
40 | #    "properties": {
41 | #       "embedding": [
42 | #          0.8713272213935852,
43 | #          1.1093124151229858
44 | #       ],
45 | #       "name": "Viserys Targaryen"
46 | #    },
47 | #    "type": "node"
48 | # }


--------------------------------------------------------------------------------
/agentic-graph-rag/vector/vector_search.py:
--------------------------------------------------------------------------------
  1 | from sentence_transformers import SentenceTransformer
  2 | from dotenv import load_dotenv
  3 | import neo4j
  4 | import asyncio
  5 | import ollama
  6 | from openai import AsyncOpenAI
  7 | import os
  8 | import spacy
  9 | from spacy_llm.util import assemble
 10 | import json
 11 | from collections import Counter
 12 | from pathlib import Path
 13 | 
 14 | 
 15 | def compute_embeddings_based_on_node(driver, model):
 16 |     with driver.session() as session:
 17 | 
 18 |         # This is to create an embeddings on each node
 19 |         # Retrieve all nodes
 20 |         result = session.run("MATCH (n) RETURN n")
 21 | 
 22 |         for record in result:
 23 |             node = record["n"]
 24 |             # Combine node labels and properties into a single string
 25 |             node_data = (
 26 |                 " ".join(node.labels)
 27 |                 + " "
 28 |                 + " ".join(f"{k}: {v}" for k, v in node.items())
 29 |             )
 30 | 
 31 |             node_embedding = model.encode(node_data)
 32 | 
 33 |             # Store the embedding back into the node
 34 |             session.run(
 35 |                 f"MATCH (n) WHERE id(n) = {node.element_id} SET n.embedding = {node_embedding.tolist()}"
 36 |             )
 37 | 
 38 |         session.run("MATCH (n) SET n:Entity")
 39 | 
 40 | 
 41 | def find_most_similar_node(driver, question_embedding):
 42 | 
 43 |     with driver.session() as session:
 44 |         result = session.run(
 45 |             f"CALL vector_search.search('tag', 10, {question_embedding.tolist()}) YIELD * RETURN *;"
 46 |         )
 47 |         nodes_data = []
 48 |         for record in result:
 49 |             node = record["node"]
 50 |             properties = {k: v for k, v in node.items() if k != "embedding"}
 51 |             node_data = {
 52 |                 "distance": record["distance"],
 53 |                 "id": node.element_id,
 54 |                 "labels": list(node.labels),
 55 |                 "properties": properties,
 56 |             }
 57 |             nodes_data.append(node_data)
 58 |         print("All similar nodes:")
 59 |         for node in nodes_data:
 60 |             print(node)
 61 | 
 62 |         return nodes_data[0] if nodes_data else None
 63 | 
 64 | 
 65 | def clean_path_nodes(path):
 66 |     cleaned_nodes = []
 67 |     type(path)
 68 |     # print(path.nodes)
 69 |     # print(type(path.nodes))
 70 |     for node in path.nodes:
 71 |         print(path.nodes)
 72 |         print(node)
 73 |         properties = {k: v for k, v in node.items() if k != "embedding"}
 74 |         node_data = {
 75 |             "id": node.element_id,
 76 |             "labels": list(node.labels),
 77 |             "properties": properties,
 78 |             "type": node.type,
 79 |         }
 80 |         cleaned_nodes.append(node_data)
 81 | 
 82 |     return clean_path_nodes
 83 | 
 84 | 
 85 | def get_relevant_data(driver, node, hops):
 86 |     with driver.session() as session:
 87 |         query = (
 88 |             f"MATCH path=((n)-[r*..{hops}]-(m)) WHERE id(n) = {node['id']} RETURN path"
 89 |         )
 90 |         result = session.run(query)
 91 |         paths = []
 92 |         for record in result:
 93 |             path_data = []
 94 |             for segment in record["path"]:
 95 | 
 96 |                 # Process start node without 'embedding' property
 97 |                 start_node_data = {
 98 |                     k: v for k, v in segment.start_node.items() if k != "embedding"
 99 |                 }
100 | 
101 |                 # Process relationship data
102 |                 relationship_data = {
103 |                     "type": segment.type,
104 |                     "properties": segment.get("properties", {}),
105 |                 }
106 | 
107 |                 # Process end node without 'embedding' property
108 |                 end_node_data = {
109 |                     k: v for k, v in segment.end_node.items() if k != "embedding"
110 |                 }
111 | 
112 |                 # Add to path_data as a tuple (start_node, relationship, end_node)
113 |                 path_data.append((start_node_data, relationship_data, end_node_data))
114 | 
115 |             paths.append(path_data)
116 | 
117 |         return paths
118 | 
119 | 
120 | def RAG_prompt(question, relevance_expansion_data):
121 |     prompt = f"""
122 |     You are an AI language model. I will provide you with a question and a set of data obtained through a relevance expansion process in a graph database. The relevance expansion process finds nodes connected to a target node within a specified number of hops and includes the relationships between these nodes.
123 | 
124 |     Question: {question}
125 | 
126 |     Relevance Expansion Data:
127 |     {relevance_expansion_data}
128 | 
129 |     Based on the provided data, please answer the question, make sure to base your answers only based on the provided data. Add a context on what data did you base your answer on. If you do not have enough information to answer the question, please state that you do not have enough information to answer the question.
130 |     """
131 |     return prompt
132 | 
133 | 
134 | def question_prompt(question):
135 |     prompt = f"""
136 |     You are an AI language model. I will provide you with a question. 
137 |     Extract the key information from the questions. The key information is important information that is required to answer the question.
138 | 
139 |     Question: {question}
140 | 
141 |     The output format should be like this: 
142 |     Key Information: [key information 1], [key information 2], ...
143 |     """
144 |     return prompt
145 | 
146 | 
147 | async def get_response(client, prompt):
148 |     response = await client.chat.completions.create(
149 |         model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}]
150 |     )
151 |     return response.choices[0].message.content
152 | 
153 | 
154 | # Split document into sentences
155 | def split_document_sent(text, nlp):
156 |     doc = nlp(text)
157 |     return [sent.text.strip() for sent in doc.sents]
158 | 
159 | 
160 | def process_text(text, nlp, verbose=False):
161 |     doc = nlp(text)
162 |     if verbose:
163 |         print(f"Text: {doc.text}")
164 |         print(f"Entities: {[(ent.text, ent.label_) for ent in doc.ents]}")
165 |     return doc
166 | 
167 | 
168 | # Pipeline to run entity extraction
169 | def extract_entities(text, nlp, verbose=False):
170 |     processed_data = []
171 |     entity_counts = Counter()
172 | 
173 |     sentences = split_document_sent(text, nlp)
174 |     for sent in sentences:
175 |         doc = process_text(sent, nlp, verbose)
176 |         entities = [(ent.text, ent.label_) for ent in doc.ents]
177 | 
178 |         # Store processed data for each sentence
179 |         processed_data.append({"text": doc.text, "entities": entities})
180 | 
181 |         # Update counters
182 |         entity_counts.update([ent[1] for ent in entities])
183 | 
184 |     # Export to JSON
185 |     with open("processed_data.json", "w") as f:
186 |         json.dump(processed_data, f)
187 | 
188 | 
189 | def generate_cypher_queries(nodes, relationships):
190 |     queries = []
191 | 
192 |     # Create nodes
193 |     for node in nodes:
194 |         query = f"""
195 |         MERGE (n:{node['type']}:Entity {{name: '{node['name']}'}}) 
196 |         ON CREATE SET n.id={node['id']} 
197 |         ON MATCH SET n.id={node['id']}
198 |         """
199 |         queries.append(query)
200 | 
201 |     # Create relationships
202 |     for rel in relationships:
203 |         query = (
204 |             f"MATCH (a {{id: {rel['source']}}}), (b {{id: {rel['target']}}}) "
205 |             f"CREATE (a)-[:{rel['relationship']}]->(b)"
206 |         )
207 |         queries.append(query)
208 | 
209 |     return queries
210 | 
211 | 
212 | def main():
213 |     # Create a Neo4j driver
214 |     driver = neo4j.GraphDatabase.driver("bolt://100.64.149.141:7687", auth=("", ""))
215 | 
216 |     # compute_bigger_embeddings_based_on_node(
217 |     #     driver, SentenceTransformer("paraphrase-MiniLM-L6-v2")
218 |     # )
219 | 
220 |     load_dotenv()
221 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
222 | 
223 |     client = AsyncOpenAI()
224 | 
225 |     # question = (
226 |     #     "In what episode was the  Robb Stark the victim, what is the name of episode?"
227 |     # )
228 |     # question = "Who killed Viserys Targaryen in Game of thrones?"
229 |     # question = "Who is Viserys Targaryen?"
230 |     # question = "In which episode was Viserys Targaryen killed?"
231 |     # question = "How was Viserys Targaryen killed in Game of Thrones?"
232 |     # question = "What weapon was used to kill Viserys Targaryen in Game of Thrones?"
233 |     # question = "Who betrayed Viserys Targaryen in Game of Thrones?"
234 |     # question = "What was the method used to kill Viserys Targaryen in Game of Thrones?"
235 |     # question = "To whom was Viserys Targaryen loyal to?"
236 |     question = "Is Khal Drogo married?"
237 | 
238 |     prompt = question_prompt(question)
239 |     response = asyncio.run(get_response(client, prompt))
240 |     print(response)
241 | 
242 |     key_information = response.split("Key Information: ")[1].strip()
243 | 
244 |     model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
245 |     question_embedding = model.encode(key_information)
246 | 
247 |     node = find_most_similar_node(driver, question_embedding)
248 |     if node:
249 |         print("The most similar node is:")
250 |         print(node)
251 | 
252 |     relevant_data = get_relevant_data(driver, node, hops=1)
253 | 
254 |     print("The relevant data is:")
255 |     print(relevant_data)
256 | 
257 |     prompt = RAG_prompt(question, relevant_data)
258 | 
259 |     response = asyncio.run(get_response(client, prompt))
260 |     print("The response is:")
261 |     print(response)
262 | 
263 |     # Load the spaCy model
264 |     nlp = spacy.load("en_core_web_md")
265 | 
266 |     # Sample text summary for processing
267 |     summary = """
268 |         Viserys Targaryen is the last living son of the former king, Aerys II Targaryen (the 'Mad King').
269 |         As one of the last known Targaryen heirs, Viserys Targaryen is obsessed with reclaiming the Iron Throne and 
270 |         restoring his family’s rule over Westeros. Ambitious and arrogant, he often treats his younger sister, Daenerys Targaryen, 
271 |         as a pawn, seeing her only as a means to gain power. His ruthless ambition leads him to make a marriage alliance with 
272 |         Khal Drogo, a powerful Dothraki warlord, hoping Khal Drogo will give him the army he needs. 
273 |         However, Viserys Targaryen’s impatience and disrespect toward the Dothraki culture lead to his downfall;
274 |         he is ultimately killed by Khal Drogo in a brutal display of 'a crown for a king' – having molten gold poured over his head. 
275 |       """
276 | 
277 |     extract_entities(summary, nlp)
278 | 
279 |     # Load processed data from JSON
280 |     json_path = Path("processed_data.json")
281 |     with open(json_path, "r") as f:
282 |         processed_data = json.load(f)
283 | 
284 |     # Prepare nodes and relationships
285 |     nodes = []
286 |     relationships = []
287 | 
288 |     # Formulate a prompt for GPT-4
289 |     prompt = (
290 |         "Extract entities and relationships from the following JSON data. For each entry in data['entities'], "
291 |         "create a 'node' dictionary with fields 'id' (unique identifier), 'name' (entity text), and 'type' (entity label). "
292 |         "For entities that have meaningful connections, define 'relationships' as dictionaries with 'source' (source node id), "
293 |         "'target' (target node id), and 'relationship' (type of connection). Create max 30 nodes, format relationships in the format of capital letters and _ inbetween words and format the entire response in the JSON output containing only variables nodes and relationships without any text inbetween. Use following labels for nodes: Character, Title, Location, House, Death, Event, Allegiance and following relationship types: HAPPENED_IN, SIBLING_OF, PARENT_OF, MARRIED_TO, HEALED_BY, RULES, KILLED, LOYAL_TO, BETRAYED_BY. Make sure the entire JSON file fits in the output"
294 |         "JSON data:\n"
295 |         f"{json.dumps(processed_data)}"
296 |     )
297 | 
298 |     response = asyncio.run(get_response(client, prompt))
299 | 
300 |     structured_data = json.loads(response)  # Assuming GPT-4 outputs structured JSON
301 | 
302 |     # Populate nodes and relationships lists
303 |     nodes.extend(structured_data.get("nodes", []))
304 |     relationships.extend(structured_data.get("relationships", []))
305 | 
306 |     cypher_queries = generate_cypher_queries(nodes, relationships)
307 |     with driver.session() as session:
308 |         for query in cypher_queries:
309 |             try:
310 |                 session.run(query)
311 |                 print(f"Executed query: {query}")
312 |             except Exception as e:
313 |                 print(f"Error executing query: {query}. Error: {e}")
314 | 
315 |     driver.close()
316 | 
317 | 
318 | if __name__ == "__main__":
319 |     main()
320 | 


--------------------------------------------------------------------------------
/graph-rag/processed_data.json:
--------------------------------------------------------------------------------
1 | [{"text": "Viserys Targaryen is the last living son of the former king, Aerys II Targaryen (the 'Mad King').", "entities": [["Viserys Targaryen", "PERSON"], ["Aerys II Targaryen", "PERSON"]]}, {"text": "As one of the last known Targaryen heirs, Viserys Targaryen is obsessed with reclaiming the Iron Throne and \n        restoring his family\u2019s rule over Westeros.", "entities": [["one", "CARDINAL"], ["Targaryen", "PERSON"], ["Viserys Targaryen", "PERSON"], ["the Iron Throne", "FAC"], ["Westeros", "GPE"]]}, {"text": "Ambitious and arrogant, he often treats his younger sister, Daenerys Targaryen, \n        as a pawn, seeing her only as a means to gain power.", "entities": [["Daenerys Targaryen", "PERSON"]]}, {"text": "His ruthless ambition leads him to make a marriage alliance with \n        Khal Drogo, a powerful Dothraki warlord, hoping Khal Drogo will give him the army he needs.", "entities": [["Khal Drogo", "PERSON"], ["Dothraki", "PERSON"], ["Khal Drogo", "PERSON"]]}, {"text": "However, Viserys Targaryen\u2019s impatience and disrespect toward the Dothraki culture lead to his downfall;\n        he is ultimately killed by Khal Drogo in a brutal display of 'a crown for a king' \u2013 having molten gold poured over his head.", "entities": [["Viserys Targaryen", "PERSON"], ["Dothraki", "GPE"], ["Khal Drogo", "PERSON"]]}]


--------------------------------------------------------------------------------
/integrations/cognee/cognee.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Cognee x Memgraph integration\n",
  8 |     "\n",
  9 |     "This notebook demonstrates how to integrate\n",
 10 |     "[Cognee](https://github.com/cognee-ai/cognee) with\n",
 11 |     "[Memgraph](https://memgraph.com), a graph database platform, to automatically\n",
 12 |     "convert unstructured text into a semantically searchable knowledge graph using\n",
 13 |     "Large Language Models (LLMs). \n",
 14 |     "\n",
 15 |     "Cognee is an AI-powered toolkit for cognitive search and graph-based knowledge\n",
 16 |     "representation. It uses LLMs to break down natural language into structured\n",
 17 |     "concepts and relationships, storing them as graphs in Memgraph for further\n",
 18 |     "querying and visualization.\n",
 19 |     "\n",
 20 |     "This notebook demonstrates how to convert Hacker News threadsinto a live\n",
 21 |     "semantic knowledge graph using LLM-powered processing via Cognee and real-time\n",
 22 |     "graph storage via Memgraph.\n",
 23 |     "\n",
 24 |     "\n",
 25 |     "## Prerequisites\n",
 26 |     "\n",
 27 |     "To follow along, you'll need:\n",
 28 |     "\n",
 29 |     "1. **Docker**: Ensure [Docker](https://www.docker.com/) is installed and running\n",
 30 |     "   in the background. \n",
 31 |     "\n",
 32 |     "2. **Memgraph**: The easiest way to run Memgraph is using the following\n",
 33 |     "   commands:\n",
 34 |     "\n",
 35 |     "For Linux/macOS: `curl https://install.memgraph.com | sh`\n",
 36 |     "\n",
 37 |     "For Windows: `iwr https://windows.memgraph.com | iex`\n",
 38 |     "\n",
 39 |     "This will launch Memgraph at `localhost:3000`.\n",
 40 |     "\n",
 41 |     "3. **Python 3.10+**: For our pipeline\n",
 42 |     "\n",
 43 |     "4. **OpenAI API Key**: For LLM processing\n",
 44 |     "\n",
 45 |     "5. **Neccessary dependencies**: To install, open your terminal and run:"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "%pip install cognee dlt requests python-dateutil neo4j python-dotenv"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## Environment setup\n",
 62 |     "\n",
 63 |     "We'll load the environment variables used to configure the LLM and graph\n",
 64 |     "database providers. These will be pulled from a `.env` file (which you must\n",
 65 |     "create securely — don’t share API keys!). In this example, we're using OpenAI.\n",
 66 |     "\n",
 67 |     "Create a file named `.env` in your project root with the following content:\n",
 68 |     "\n",
 69 |     "```\n",
 70 |     "# LLM Configuration\n",
 71 |     "LLM_API_KEY=sk-your-openai-api-key\n",
 72 |     "LLM_MODEL=openai/gpt-4o-mini\n",
 73 |     "LLM_PROVIDER=openai\n",
 74 |     "EMBEDDING_PROVIDER=openai\n",
 75 |     "EMBEDDING_MODEL=openai/text-embedding-3-large\n",
 76 |     "\n",
 77 |     "# Memgraph Configuration\n",
 78 |     "GRAPH_DATABASE_PROVIDER=memgraph\n",
 79 |     "GRAPH_DATABASE_URL=bolt://localhost:7687\n",
 80 |     "GRAPH_DATABASE_USERNAME=\"\"\n",
 81 |     "GRAPH_DATABASE_PASSWORD=\"\"\n",
 82 |     "\n",
 83 |     "# Hacker News API\n",
 84 |     "HN_API_BASE=https://hacker-news.firebaseio.com/v0\n",
 85 |     "```\n",
 86 |     "\n",
 87 |     "## Building the pipeline\n",
 88 |     "\n",
 89 |     "Let's first load our environment in notebook:"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 1,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "from dotenv import load_dotenv\n",
 99 |     "import os\n",
100 |     "\n",
101 |     "load_dotenv()\n"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "### Data extraction from HackerNews\n",
109 |     "\n",
110 |     "Our pipeline starts by extracting data from the Hacker News API:"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 2,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "import dlt\n",
120 |     "import requests\n",
121 |     "from typing import Iterator, Dict, Any\n",
122 |     "from datetime import datetime\n",
123 |     "import time\n",
124 |     "\n",
125 |     "HN_API_BASE = \"https://hacker-news.firebaseio.com/v0\"\n",
126 |     "\n",
127 |     "@dlt.resource(table_name=\"posts\", write_disposition=\"merge\", primary_key=\"id\")\n",
128 |     "def get_posts_incremental(\n",
129 |     "    updated_at=dlt.sources.incremental(\"time\", initial_value=0)\n",
130 |     ") -> Iterator[Dict[str, Any]]:\n",
131 |     "    \"\"\"Extract posts from Hacker News API with incremental loading\"\"\"\n",
132 |     "\n",
133 |     "    # Get latest stories\n",
134 |     "    top_stories_response = requests.get(f\"{HN_API_BASE}/topstories.json\")\n",
135 |     "    top_story_ids = top_stories_response.json()\n",
136 |     "\n",
137 |     "    new_stories_response = requests.get(f\"{HN_API_BASE}/newstories.json\")\n",
138 |     "    new_story_ids = new_stories_response.json()\n",
139 |     "\n",
140 |     "    all_story_ids = list(set(top_story_ids + new_story_ids))[:20]\n",
141 |     "\n",
142 |     "    print(f\"Total story IDs to check: {len(all_story_ids)}\")\n",
143 |     "\n",
144 |     "    for story_id in all_story_ids:\n",
145 |     "        try:\n",
146 |     "            item_response = requests.get(f\"{HN_API_BASE}/item/{story_id}.json\")\n",
147 |     "            if item_response.status_code == 200:\n",
148 |     "                item = item_response.json()\n",
149 |     "\n",
150 |     "                if item and item.get('type') == 'story':\n",
151 |     "                    item_time = item.get('time', 0)\n",
152 |     "                    if item_time > updated_at.last_value:\n",
153 |     "                        # Prepare data for Cognee processing\n",
154 |     "                        item['created_at'] = datetime.fromtimestamp(item['time'])\n",
155 |     "                        item['extracted_at'] = datetime.now()\n",
156 |     "                        item['content_for_cognee'] = f\"Title: {item.get('title', '')}. {item.get('text', '')}\"\n",
157 |     "                        \n",
158 |     "                        print(f\"Yielding post ID {item['id']} titled: {item.get('title', '')}\")\n",
159 |     "\n",
160 |     "                        yield item\n",
161 |     "                    else:\n",
162 |     "                        print(f\"Skipping post {item.get('id')} (old)\")\n",
163 |     "\n",
164 |     "            time.sleep(0.1)  # Rate limiting\n",
165 |     "\n",
166 |     "        except Exception as e:\n",
167 |     "            print(f\"Error fetching story {story_id}: {e}\")\n",
168 |     "            continue\n"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "### Cognee integration for knowledge graph generation\n",
176 |     "\n",
177 |     "Now, you may integrate Cognee to process the extracted text and build your\n",
178 |     "knowledge graph:"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": [
187 |     "import cognee\n",
188 |     "import asyncio\n",
189 |     "from dotenv import load_dotenv\n",
190 |     "\n",
191 |     "load_dotenv()\n",
192 |     "\n",
193 |     "class CogneeMemgraphProcessor:\n",
194 |     "    def __init__(self):\n",
195 |     "        pass\n",
196 |     "\n",
197 |     "    async def process_posts(self, posts_data):\n",
198 |     "        \"\"\"Process posts through Cognee to build knowledge graph\"\"\"\n",
199 |     "\n",
200 |     "        for post in posts_data:\n",
201 |     "            try:\n",
202 |     "                # Add post content to Cognee\n",
203 |     "                content = post.get('content_for_cognee', '')\n",
204 |     "                if content:\n",
205 |     "                    print(f\"Adding post to Cognee: {post.get('title', '')}\")\n",
206 |     "                    await cognee.add(content, dataset_name=\"hackernews_posts\")\n",
207 |     "\n",
208 |     "                # Add metadata as structured data\n",
209 |     "                metadata = {\n",
210 |     "                    \"post_id\": post.get('id'),\n",
211 |     "                    \"author\": post.get('by'),\n",
212 |     "                    \"score\": post.get('score', 0),\n",
213 |     "                    \"url\": post.get('url'),\n",
214 |     "                    \"created_at\": post.get('created_at')\n",
215 |     "                }\n",
216 |     "                metadata_str = \". \".join(f\"{k}: {v}\" for k, v in metadata.items())\n",
217 |     "                await cognee.add(metadata, dataset_name=\"hackernews_metadata\")\n",
218 |     "\n",
219 |     "            except Exception as e:\n",
220 |     "                print(f\"Error processing post {post.get('id')}: {e}\")\n",
221 |     "\n",
222 |     "        # Build the knowledge graph\n",
223 |     "        print(\"Building knowledge graph with Cognee...\")\n",
224 |     "        await cognee.cognify()\n",
225 |     "        print(\"Knowledge graph construction completed!\")\n",
226 |     "\n",
227 |     "    async def search_knowledge_graph(self, query: str):\n",
228 |     "        \"\"\"Perform semantic search on the knowledge graph\"\"\"\n",
229 |     "        results = await cognee.search(query_text=query)\n",
230 |     "        return results\n"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "## Visualize your data in Memgraph\n",
238 |     "\n",
239 |     "Now that the graph is created, we can explore it in the UI by visiting\n",
240 |     "`http://localhost:3000/`.\n",
241 |     "\n",
242 |     "### Explore the graph\n",
243 |     "\n",
244 |     "Use Cypher queries to explore your knowledge graph:\n",
245 |     "\n",
246 |     "```\n",
247 |     "-- View the entire graph structure\n",
248 |     "MATCH p=()-[]-() RETURN p LIMIT 100;\n",
249 |     "\n",
250 |     "-- Find all entities related to \"AI\" or \"artificial intelligence\"\n",
251 |     "MATCH (n)\n",
252 |     "WHERE n.name CONTAINS \"AI\" OR n.name CONTAINS \"artificial intelligence\"\n",
253 |     "RETURN n;\n",
254 |     "\n",
255 |     "-- Discover relationships between programming languages\n",
256 |     "MATCH (lang1)-[r]-(lang2)\n",
257 |     "WHERE lang1.type = \"programming_language\" AND lang2.type = \"programming_language\"\n",
258 |     "RETURN lang1, r, lang2;\n",
259 |     "\n",
260 |     "-- Find the most connected entities (high centrality)\n",
261 |     "MATCH (n)-[r]-()\n",
262 |     "RETURN n.name, count(r) as connections\n",
263 |     "ORDER BY connections DESC\n",
264 |     "LIMIT 10;\n",
265 |     "```\n",
266 |     "\n",
267 |     "\n",
268 |     "## Conclusion\n",
269 |     "\n",
270 |     "This integration demonstrates how fast-moving online discussions, like those on Hacker News, can be transformed into queryable knowledge graphs using Cognee and Memgraph.\n",
271 |     "\n",
272 |     "By combining the Hacker News API with AI-based semantic understanding and high-performance graph database technology, you’ve created a system that can:\n",
273 |     "\n",
274 |     "- **Automatically understand** the semantic content of discussions\n",
275 |     "- **Discover hidden relationships** between concepts and entities\n",
276 |     "- **Enable smart search** that goes beyond keyword matching\n",
277 |     "- **Provide visual insights** through graph exploration\n",
278 |     "- **Scale to handle** large volumes of real-time data\n",
279 |     "\n",
280 |     "The future of knowledge management lies in systems that can think, reason, and discover insights the way humans. With this integration, you’re one step closer to that reality!"
281 |    ]
282 |   }
283 |  ],
284 |  "metadata": {
285 |   "kernelspec": {
286 |    "display_name": "Python 3.10.16 ('cognee_test')",
287 |    "language": "python",
288 |    "name": "python3"
289 |   },
290 |   "language_info": {
291 |    "codemirror_mode": {
292 |     "name": "ipython",
293 |     "version": 3
294 |    },
295 |    "file_extension": ".py",
296 |    "mimetype": "text/x-python",
297 |    "name": "python",
298 |    "nbconvert_exporter": "python",
299 |    "pygments_lexer": "ipython3",
300 |    "version": "3.10.16"
301 |   },
302 |   "orig_nbformat": 4,
303 |   "vscode": {
304 |    "interpreter": {
305 |     "hash": "e20c0ce903d62983291697e6c8a0c04c6dc5f192d75a0a9d78908ad8de17c071"
306 |    }
307 |   }
308 |  },
309 |  "nbformat": 4,
310 |  "nbformat_minor": 2
311 | }
312 | 


--------------------------------------------------------------------------------
/integrations/langchain/langchain-kg-creation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memgraph/ai-demos/c2e49d1b320cabd1106993c9d1d83f8de9b3ec17/integrations/langchain/langchain-kg-creation.png


--------------------------------------------------------------------------------
/integrations/langchain/langchain.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# LangChain\n",
  8 |     "\n",
  9 |     "LangChain is a framework for developing applications powered by large language\n",
 10 |     "models (LLMs). Currently, Memgraph's LangChain integration supports\n",
 11 |     "creating a knowledge graph from unstructured data and querying with natural\n",
 12 |     "language. You can follow the example on [LangChain\n",
 13 |     "docs](https://python.langchain.com/docs/integrations/graphs/memgraph/) or go\n",
 14 |     "through quick start below.\n",
 15 |     "\n",
 16 |     "## Installation\n",
 17 |     "\n",
 18 |     "To install all the required packages, run:\n",
 19 |     "\n",
 20 |     "```shell\n",
 21 |     "pip install langchain langchain-openai neo4j --user\n",
 22 |     "```\n",
 23 |     "\n",
 24 |     "## Environment setup \n",
 25 |     "\n",
 26 |     "Before you get started, make sure you have [Memgraph](/getting-started) running\n",
 27 |     "in the background.\n",
 28 |     "\n",
 29 |     "Then, instantiate `MemgraphGraph` in your Python code. This object holds the\n",
 30 |     "connection to the running Memgraph instance. Make sure to set up all the\n",
 31 |     "environment variables properly."
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "import os\n",
 41 |     "\n",
 42 |     "from langchain_community.chains.graph_qa.memgraph import MemgraphQAChain\n",
 43 |     "from langchain_community.graphs import MemgraphGraph\n",
 44 |     "from langchain_openai import ChatOpenAI\n",
 45 |     "\n",
 46 |     "url = os.environ.get(\"MEMGRAPH_URI\", \"bolt://localhost:7687\")\n",
 47 |     "username = os.environ.get(\"MEMGRAPH_USERNAME\", \"\")\n",
 48 |     "password = os.environ.get(\"MEMGRAPH_PASSWORD\", \"\")\n",
 49 |     "\n",
 50 |     "graph = MemgraphGraph(\n",
 51 |     "    url=url, username=username, password=password, refresh_schema=False\n",
 52 |     ")"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "\n",
 60 |     "The `refresh_schema` is initially set to `False` because there is still no data in\n",
 61 |     "the database and we want to avoid unnecessary database calls.\n",
 62 |     "\n",
 63 |     "To interact with the LLM, you must configure it. Here is how you can set API key as an\n",
 64 |     "environment variable for OpenAI:"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "os.environ[\"OPENAI_API_KEY\"] = \"your-key-here\""
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "## Graph construction\n",
 81 |     "\n",
 82 |     "For the dataset, we'll use the following text about Charles Darwin:"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "text = \"\"\"\n",
 92 |     "    Charles Robert Darwin was an English naturalist, geologist, and biologist,\n",
 93 |     "    widely known for his contributions to evolutionary biology. His proposition that\n",
 94 |     "    all species of life have descended from a common ancestor is now generally\n",
 95 |     "    accepted and considered a fundamental scientific concept. In a joint\n",
 96 |     "    publication with Alfred Russel Wallace, he introduced his scientific theory that\n",
 97 |     "    this branching pattern of evolution resulted from a process he called natural\n",
 98 |     "    selection, in which the struggle for existence has a similar effect to the\n",
 99 |     "    artificial selection involved in selective breeding. Darwin has been\n",
100 |     "    described as one of the most influential figures in human history and was\n",
101 |     "    honoured by burial in Westminster Abbey.\n",
102 |     "\"\"\""
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "To construct the graph, first initialize `LLMGraphTransformer` from the desired\n",
110 |     "LLM and convert the document to the graph structure."
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "llm = ChatOpenAI(temperature=0, model_name=\"gpt-4-turbo\")\n",
120 |     "llm_transformer = LLMGraphTransformer(llm=llm)\n",
121 |     "documents = [Document(page_content=text)]\n",
122 |     "graph_documents = llm_transformer.convert_to_graph_documents(documents)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "The graph structure in the `GraphDocument` format can be forwarded to the\n",
130 |     "`add_graph_documents()` procedure to import in into Memgraph:"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "# Make sure the database is empty\n",
140 |     "graph.query(\"STORAGE MODE IN_MEMORY_ANALYTICAL\")\n",
141 |     "graph.query(\"DROP GRAPH\")\n",
142 |     "graph.query(\"STORAGE MODE IN_MEMORY_TRANSACTIONAL\")\n",
143 |     "\n",
144 |     "# Create KG\n",
145 |     "graph.add_graph_documents(graph_documents)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "The `add_graph_documents()` procedure transforms the list of `graph_documents`\n",
153 |     "into appropriate Cypher queries and executes them in Memgraph.\n",
154 |     "\n",
155 |     "In the below image, you can see how the text was transformed into a knowledge\n",
156 |     "graph and stored into Memgraph.\n",
157 |     "\n",
158 |     "![langchain-kg](langchain-kg-creation.png)\n",
159 |     "\n",
160 |     "For additional options, check the [full\n",
161 |     "guide](https://python.langchain.com/docs/integrations/graphs/memgraph/#additional-options)\n",
162 |     "on the LangChain docs. "
163 |    ]
164 |   }
165 |  ],
166 |  "metadata": {
167 |   "language_info": {
168 |    "name": "python"
169 |   }
170 |  },
171 |  "nbformat": 4,
172 |  "nbformat_minor": 2
173 | }
174 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/.codespellignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memgraph/ai-demos/c2e49d1b320cabd1106993c9d1d83f8de9b3ec17/integrations/langgraph/memgraph-toolkit-chatbot/.codespellignore


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/.env.example:
--------------------------------------------------------------------------------
1 | # To separate your traces from other application
2 | LANGSMITH_PROJECT=new-agent
3 | 
4 | # Add API keys for connecting to LLM providers, data sources, and other integrations here
5 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | uv.lock
164 | .langgraph_api/
165 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 LangChain
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all format lint test tests test_watch integration_tests docker_tests help extended_tests
 2 | 
 3 | # Default target executed when no arguments are given to make.
 4 | all: help
 5 | 
 6 | # Define a variable for the test file path.
 7 | TEST_FILE ?= tests/unit_tests/
 8 | 
 9 | test:
10 | 	python -m pytest $(TEST_FILE)
11 | 
12 | integration_tests:
13 | 	python -m pytest tests/integration_tests 
14 | 
15 | test_watch:
16 | 	python -m ptw --snapshot-update --now . -- -vv tests/unit_tests
17 | 
18 | test_profile:
19 | 	python -m pytest -vv tests/unit_tests/ --profile-svg
20 | 
21 | extended_tests:
22 | 	python -m pytest --only-extended $(TEST_FILE)
23 | 
24 | 
25 | ######################
26 | # LINTING AND FORMATTING
27 | ######################
28 | 
29 | # Define a variable for Python and notebook files.
30 | PYTHON_FILES=src/
31 | MYPY_CACHE=.mypy_cache
32 | lint format: PYTHON_FILES=.
33 | lint_diff format_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d main | grep -E '\.py$$|\.ipynb$$')
34 | lint_package: PYTHON_FILES=src
35 | lint_tests: PYTHON_FILES=tests
36 | lint_tests: MYPY_CACHE=.mypy_cache_test
37 | 
38 | lint lint_diff lint_package lint_tests:
39 | 	python -m ruff check .
40 | 	[ "$(PYTHON_FILES)" = "" ] || python -m ruff format $(PYTHON_FILES) --diff
41 | 	[ "$(PYTHON_FILES)" = "" ] || python -m ruff check --select I $(PYTHON_FILES)
42 | 	[ "$(PYTHON_FILES)" = "" ] || python -m mypy --strict $(PYTHON_FILES)
43 | 	[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && python -m mypy --strict $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
44 | 
45 | format format_diff:
46 | 	ruff format $(PYTHON_FILES)
47 | 	ruff check --select I --fix $(PYTHON_FILES)
48 | 
49 | spell_check:
50 | 	codespell --toml pyproject.toml
51 | 
52 | spell_fix:
53 | 	codespell --toml pyproject.toml -w
54 | 
55 | ######################
56 | # HELP
57 | ######################
58 | 
59 | help:
60 | 	@echo '----'
61 | 	@echo 'format                       - run code formatters'
62 | 	@echo 'lint                         - run linters'
63 | 	@echo 'test                         - run unit tests'
64 | 	@echo 'tests                        - run unit tests'
65 | 	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
66 | 	@echo 'test_watch                   - run unit tests in watch mode'
67 | 
68 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/README.md:
--------------------------------------------------------------------------------
 1 | # Graph-Aware Agent with LangGraph and Memgraph AI Toolkit
 2 | 
 3 | > [!NOTE]  
 4 | > This app was built with the [new LangGraph project template](https://github.com/langchain-ai/new-langgraph-project) and by following the 
 5 | > [Quickstart instructions](https://langchain-ai.github.io/langgraph/tutorials/langgraph-platform/local-server/) from the LangGraph documentation to
 6 | > create a local LangGraph server.
 7 | 
 8 | In this directory, you can find code for a simple agent built using the LangGraph framework and the [Memgraph AI Toolkit](https://github.com/memgraph/ai-toolkit) to demonstrate how to integrate graph-based tooling into your LLM stack. LangGraph helps define structured workflows for language agents, while Memgraph provides powerful graph querying capabilities. Together, they make a compelling combination for building intelligent, context-aware applications.
 9 | 
10 | ![langgraph-studio-memgraph-toolkit](./static/langgraph-studio-memgraph-toolkit.png)
11 | 
12 | ## Prerequisite
13 | 
14 | The agent invokes tools that execute queries against Memgraph database, meaning that you need a running Memgraph instance. In the example, Memgraph should be running on `localhost:7687`. To start Memgraph MAGE, run the following command in your terminal:
15 | 
16 | ```
17 | docker run -p 7687:7687 \
18 |   --name memgraph \
19 |   memgraph/memgraph-mage:latest \
20 |   --schema-info-enabled=true
21 | ```
22 | 
23 | Once Memgraph is running, load the data. In this example, Game of Thrones dataset is loaded from [Memgraph Lab](https://memgraph.com/docs/memgraph-lab). 
24 | 
25 | ## Run the app
26 | 
27 | To run the app, first install the LangGraph CLI:
28 | 
29 | ```
30 | # Python >= 3.11 is required.
31 | 
32 | pip install --upgrade "langgraph-cli[inmem]"
33 | ```
34 | 
35 | Then, install the dependencies:
36 | 
37 | ```
38 | pip install -e .
39 | ```
40 | 
41 | In the end, create `.env` file. Copy the contents of `.env.example` provided in the directory, and update it with your API keys. Your `.env` might look like this:
42 | 
43 | ```
44 | # To separate your traces from other application
45 | LANGSMITH_PROJECT=new-agent
46 | 
47 | # Add API keys for connecting to LLM providers, data sources, and other integrations here
48 | OPENAI_API_KEY=""
49 | LANGSMITH_TRACING=""
50 | LANGSMITH_API_KEY=""
51 | ```
52 | 
53 | [LangSmith API](https://docs.smith.langchain.com/administration/how_to_guides/organization_management/create_account_api_key) key can be generated on their site.
54 | 
55 | 
56 | To test your agent, launch the LangGraph development server with:
57 | 
58 | ```
59 | langgraph dev
60 | ```
61 | 
62 | This will start a local server and open LangGraph Studio in your browser.
63 | To ask the question, add the following JSON to the input:
64 | 
65 | ```
66 | [
67 | 	{
68 | 		"role": "user",
69 | 		"content": "Can you tell me more about my schema?"
70 | 	}
71 | ]
72 | ```
73 | 
74 | And click Submit button. You'll see how the agent invokes `show_schema_info()` tool to provide the necessary details. 
75 | 
76 | ![langgraph-studio-memgraph-schema](./static/langgraph-studio-memgraph-schema.png)
77 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/langgraph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "graphs": {
 3 |     "agent": "./src/agent/graph.py:graph"
 4 |   },
 5 |   "dependencies": [
 6 |     "."
 7 |   ],
 8 |   "env": ".env"
 9 | }
10 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "agent"
 3 | version = "0.0.1"
 4 | description = "Starter template for making a new agent LangGraph."
 5 | authors = [
 6 |     { name = "William Fu-Hinthorn", email = "13333726+hinthornw@users.noreply.github.com" },
 7 | ]
 8 | readme = "README.md"
 9 | license = { text = "MIT" }
10 | requires-python = ">=3.9"
11 | dependencies = [
12 |     "langgraph>=0.2.6",
13 |     "python-dotenv>=1.0.1",
14 |     "langchain-memgraph==0.1.3",
15 |     "langchain==0.3.25",
16 |     "langchain-openai==0.3.17"
17 | ]
18 | 
19 | 
20 | [project.optional-dependencies]
21 | dev = ["mypy>=1.11.1", "ruff>=0.6.1"]
22 | 
23 | [build-system]
24 | requires = ["setuptools>=73.0.0", "wheel"]
25 | build-backend = "setuptools.build_meta"
26 | 
27 | [tool.setuptools]
28 | packages = ["langgraph.templates.agent", "agent"]
29 | [tool.setuptools.package-dir]
30 | "langgraph.templates.agent" = "src/agent"
31 | "agent" = "src/agent"
32 | 
33 | 
34 | [tool.setuptools.package-data]
35 | "*" = ["py.typed"]
36 | 
37 | [tool.ruff]
38 | lint.select = [
39 |     "E",    # pycodestyle
40 |     "F",    # pyflakes
41 |     "I",    # isort
42 |     "D",    # pydocstyle
43 |     "D401", # First line should be in imperative mood
44 |     "T201",
45 |     "UP",
46 | ]
47 | lint.ignore = [
48 |     "UP006",
49 |     "UP007",
50 |     # We actually do want to import from typing_extensions
51 |     "UP035",
52 |     # Relax the convention by _not_ requiring documentation for every function parameter.
53 |     "D417",
54 |     "E501",
55 | ]
56 | [tool.ruff.lint.per-file-ignores]
57 | "tests/*" = ["D", "UP"]
58 | [tool.ruff.lint.pydocstyle]
59 | convention = "google"
60 | 
61 | [dependency-groups]
62 | dev = [
63 |     "anyio>=4.7.0",
64 |     "langgraph-cli[inmem]>=0.2.8",
65 |     "mypy>=1.13.0",
66 |     "pytest>=8.3.5",
67 |     "ruff>=0.8.2",
68 | ]
69 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/src/agent/__init__.py:
--------------------------------------------------------------------------------
1 | """New LangGraph Agent.
2 | 
3 | This module defines a custom graph.
4 | """
5 | 
6 | from agent.graph import graph
7 | 
8 | __all__ = ["graph"]
9 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/src/agent/graph.py:
--------------------------------------------------------------------------------
 1 | """LangGraph single-node graph template.
 2 | 
 3 | Returns a predefined response. Replace logic and configuration as needed.
 4 | """
 5 | 
 6 | from typing import Annotated, TypedDict
 7 | 
 8 | from langgraph.graph import StateGraph
 9 | from langgraph.graph.message import add_messages
10 | from langchain.chat_models import init_chat_model
11 | from langgraph.prebuilt import ToolNode, tools_condition
12 | from langgraph.graph import StateGraph, START
13 | from langchain_memgraph.graphs.memgraph import MemgraphLangChain
14 | from langchain_memgraph import MemgraphToolkit
15 | 
16 | 
17 | url = "bolt://localhost:7687"
18 | username = "memgraph"
19 | password = "memgraph"
20 | 
21 | db = MemgraphLangChain(
22 |     url=url, username=username, password=password, refresh_schema=False
23 | )
24 | 
25 | llm = init_chat_model("openai:gpt-4.1")
26 | toolkit = MemgraphToolkit(db=db, llm=llm)
27 | tools = toolkit.get_tools()
28 | llm_with_tools = llm.bind_tools(tools)
29 | 
30 | 
31 | class State(TypedDict):
32 |     messages: Annotated[list, add_messages]
33 | 
34 | 
35 | def chatbot(state: State):
36 |     return {"messages": [llm_with_tools.invoke(state["messages"])]}
37 | 
38 | 
39 | graph_builder = StateGraph(State)
40 | graph_builder.add_node("chatbot", chatbot)
41 | 
42 | tool_node = ToolNode(tools=tools, name="tools")
43 | graph_builder.add_node("tools", tool_node)
44 | graph_builder.add_conditional_edges(
45 |     "chatbot",
46 |     tools_condition,
47 | )
48 | # Any time a tool is called, we return to the chatbot to decide the next step
49 | graph_builder.add_edge("tools", "chatbot")
50 | graph_builder.add_edge(START, "chatbot")
51 | graph = graph_builder.compile()
52 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/static/langgraph-studio-memgraph-schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memgraph/ai-demos/c2e49d1b320cabd1106993c9d1d83f8de9b3ec17/integrations/langgraph/memgraph-toolkit-chatbot/static/langgraph-studio-memgraph-schema.png


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/static/langgraph-studio-memgraph-toolkit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memgraph/ai-demos/c2e49d1b320cabd1106993c9d1d83f8de9b3ec17/integrations/langgraph/memgraph-toolkit-chatbot/static/langgraph-studio-memgraph-toolkit.png


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/static/studio_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memgraph/ai-demos/c2e49d1b320cabd1106993c9d1d83f8de9b3ec17/integrations/langgraph/memgraph-toolkit-chatbot/static/studio_ui.png


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | 
3 | 
4 | @pytest.fixture(scope="session")
5 | def anyio_backend():
6 |     return "asyncio"
7 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/tests/integration_tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Define any integration tests you want in this directory."""
2 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/tests/integration_tests/test_graph.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from agent import graph
 4 | 
 5 | pytestmark = pytest.mark.anyio
 6 | 
 7 | 
 8 | @pytest.mark.langsmith
 9 | async def test_agent_simple_passthrough() -> None:
10 |     inputs = {"changeme": "some_val"}
11 |     res = await graph.ainvoke(inputs)
12 |     assert res is not None
13 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/tests/unit_tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Define any unit tests you may want in this directory."""
2 | 


--------------------------------------------------------------------------------
/integrations/langgraph/memgraph-toolkit-chatbot/tests/unit_tests/test_configuration.py:
--------------------------------------------------------------------------------
 1 | from langgraph.pregel import Pregel
 2 | 
 3 | from agent.graph import graph
 4 | 
 5 | 
 6 | def test_placeholder() -> None:
 7 |     # TODO: You can add actual unit tests
 8 |     # for your graph and other logic here.
 9 |     assert isinstance(graph, Pregel)
10 | 


--------------------------------------------------------------------------------
/integrations/langgraph/synonym-agent/agents.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from langchain_openai import ChatOpenAI
  3 | from langchain_core.prompts import PromptTemplate
  4 | from langchain_community.graphs import MemgraphGraph
  5 | from langchain_community.chains.graph_qa.prompts import (
  6 |     MEMGRAPH_GENERATION_PROMPT,
  7 | )
  8 | import yaml
  9 | from dotenv import load_dotenv
 10 | 
 11 | load_dotenv()
 12 | 
 13 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
 14 | URI = os.getenv("MEMGRAPH_URI", "bolt://localhost:7687")
 15 | USER = os.getenv("MEMGRAPH_USER", "")
 16 | PASSWORD = os.getenv("MEMGRAPH_PASSWORD", "")
 17 | 
 18 | class BusinessSynonymRule:
 19 |     def __init__(self, label: str, prop: str, explanation: str):
 20 |         self.label = label
 21 |         self.prop = prop
 22 |         self.explanation = explanation
 23 | 
 24 |     def __repr__(self):
 25 |         return f"For label :{self.label}, property {self.prop} -> {self.explanation}"
 26 | 
 27 | 
 28 | def load_business_rules(yaml_file="business_rules.yaml"):
 29 |     """
 30 |     Loads business synonym rules from a YAML file.
 31 | 
 32 |     :param yaml_file: Path to the YAML file containing business rules.
 33 |     :return: A list of BusinessSynonymRule objects.
 34 |     """
 35 |     if not os.path.exists(yaml_file):
 36 |         raise FileNotFoundError(f"Error: The file '{yaml_file}' does not exist.")
 37 | 
 38 |     try:
 39 |         with open(yaml_file, "r", encoding="utf-8") as file:
 40 |             data = yaml.safe_load(file)
 41 |             rules_data = data.get("configuration", {}).get("business_rules", [])
 42 | 
 43 |             # Convert YAML data to BusinessSynonymRule objects
 44 |             business_rules = [
 45 |                 BusinessSynonymRule(rule["label"], rule["prop"], rule["explanation"])
 46 |                 for rule in rules_data
 47 |             ]
 48 |             return business_rules
 49 | 
 50 |     except yaml.YAMLError as e:
 51 |         raise ValueError(f"Error parsing YAML file '{yaml_file}': {e}")
 52 |     except KeyError as e:
 53 |         raise ValueError(f"Missing key in YAML file: {e}")
 54 | 
 55 | 
 56 | # Load the business rules
 57 | business_rules = load_business_rules()
 58 | 
 59 | 
 60 | def clean_cypher_query(cypher_query: str) -> str:
 61 |     """Cleans LLM-generated Cypher query by removing markdown code block markers."""
 62 |     return cypher_query.replace("```", "").replace("cypher\n", "")
 63 | 
 64 | 
 65 | def initialize_graph_context(state):
 66 |     """Agent provides the state with the schema."""
 67 |     graph = MemgraphGraph(url=URI, username=USER, password=PASSWORD)
 68 |     return {"schema": graph.get_schema}
 69 | 
 70 | 
 71 | def generate_cypher_query(state):
 72 |     """Agent for generating Cypher queries."""
 73 |     question = state["question"]
 74 |     schema = state["schema"]
 75 | 
 76 |     llm = ChatOpenAI(temperature=0, model="gpt-4o", openai_api_key=OPENAI_API_KEY)
 77 |     cypher_query = llm.invoke(
 78 |         MEMGRAPH_GENERATION_PROMPT.format(schema=schema, question=question)
 79 |     )
 80 |     cleaned_cypher_query = clean_cypher_query(cypher_query.content)
 81 | 
 82 |     return {"cypher_query": cleaned_cypher_query, "initial_query": cleaned_cypher_query}
 83 | 
 84 | 
 85 | def business_synonym_reasoning(state):
 86 |     """Rewrites the Cypher query based on predefined business synonym rules."""
 87 |     cypher_query = state["cypher_query"]
 88 |     schema = state["schema"]
 89 |     rules = "\n".join([str(x) for x in business_rules])
 90 | 
 91 |     BUSINESS_SYNONYM_PROMPT = """Your task is to analyze and, if necessary, rewrite 
 92 |     the given Cypher query based on the following business synonym rules. 
 93 |     If none of the rules apply, return the query unchanged.
 94 |     
 95 |     Given the following schema of the graph:
 96 |     {schema}
 97 |     
 98 |     The query needs to be rewritten if any of the following rules apply:
 99 |     {rules}
100 | 
101 |     Given the following Cypher query:
102 |     {cypher_query}
103 | 
104 |     Return the rewritten query (if modified) or the original query if no changes were needed.
105 |     Return no additional parts except just the cypher query, no apologies, additional text, ommit everything.
106 |     """
107 | 
108 |     llm = ChatOpenAI(temperature=0, model="gpt-4o", openai_api_key=OPENAI_API_KEY)
109 | 
110 |     # Inject business rules into the prompt (to be defined separately)
111 |     formatted_prompt = BUSINESS_SYNONYM_PROMPT.format(
112 |         schema=schema,
113 |         rules=rules,
114 |         cypher_query=cypher_query,
115 |     )
116 | 
117 |     revised_query = llm.invoke(formatted_prompt)
118 |     cleaned_cypher_query = clean_cypher_query(revised_query.content)
119 | 
120 |     return {"cypher_query": cleaned_cypher_query, "business_reasoning": rules}
121 | 
122 | 
123 | def execute_cypher_query(state):
124 |     """Executes the Cypher query on Memgraph."""
125 |     cypher_query = state["cypher_query"]
126 |     try:
127 |         graph = MemgraphGraph(url=URI, username=USER, password=PASSWORD)
128 |         # result = graph.query(cypher_query)
129 |         data, _, _ = graph._driver.execute_query(
130 |             cypher_query,
131 |             database_=graph._database,
132 |             parameters_={},
133 |         )
134 |         # json_data = [r.data() for r in data]
135 |         return {"query_result": data}
136 |     except Exception as e:
137 |         return {"query_result": f"Error: {str(e)}"}
138 | 
139 | 
140 | def generate_human_readable_response(state):
141 |     """Generates a human-readable response from the query result."""
142 |     question = state["question"]
143 |     context = state["query_result"]
144 | 
145 |     MEMGRAPH_QA_TEMPLATE = """Your task is to form nice and human
146 |     understandable answers. The information part contains the provided
147 |     information that you must use to construct an answer.
148 |     The provided information is authoritative, you must never doubt it or try to
149 |     use your internal knowledge to correct it. Make the answer sound as a
150 |     response to the question. Do not mention that you based the result on the
151 |     given information. Here is an example:
152 | 
153 |     Question: Which managers own Neo4j stocks?
154 |     Context:[manager:CTL LLC, manager:JANE STREET GROUP LLC]
155 |     Helpful Answer: CTL LLC, JANE STREET GROUP LLC owns Neo4j stocks.
156 | 
157 |     Follow this example when generating answers. If the provided information is
158 |     empty, say that you don't know the answer. If anything is in the context, meaning
159 |     that Memgraph returned some of the results, please try to generate a meaningful answer.
160 |     If there is really nothing in the context, then you can say that you don't know 
161 |     the answer.
162 | 
163 |     Information:
164 |     {context}
165 | 
166 |     Question: {question}
167 |     Helpful Answer:"""
168 | 
169 |     qa_prompt = PromptTemplate(
170 |         input_variables=["context", "question"], template=MEMGRAPH_QA_TEMPLATE
171 |     )
172 | 
173 |     llm = ChatOpenAI(temperature=0, model="gpt-4o", openai_api_key=OPENAI_API_KEY)
174 |     final_answer = llm.invoke(qa_prompt.format(context=context, question=question))
175 |     return {"final_answer": final_answer.content}
176 | 


--------------------------------------------------------------------------------
/integrations/langgraph/synonym-agent/app.py:
--------------------------------------------------------------------------------
 1 | from workflows import run_workflow
 2 | import streamlit as st
 3 | import neo4j
 4 | from streamlit_agraph import agraph, Node, Edge, Config
 5 | 
 6 | 
 7 | # Function to render a graph using streamlit-agraph from Neo4j query results
 8 | def render_graph(query_result):
 9 |     nodes = []
10 |     edges = []
11 | 
12 |     # Extract nodes and relationships from query result
13 |     for record in query_result:
14 |         for key, value in record.items():
15 |             if isinstance(value, neo4j.graph.Node):
16 |                 nodes.append(
17 |                     Node(
18 |                         id=str(value["id"]),
19 |                         label=value["name"],
20 |                         size=25,
21 |                         selectable=True,
22 |                     )
23 |                 )
24 |             elif isinstance(value, neo4j.graph.Relationship):
25 |                 edges.append(
26 |                     Edge(
27 |                         source=str(value["start"]),
28 |                         target=str(value["end"]),
29 |                         label=value.get("type", ""),
30 |                     )
31 |                 )
32 | 
33 |     # Only render if there are nodes or edges
34 |     if nodes or edges:
35 |         config = Config(
36 |             width=800,
37 |             height=600,
38 |             directed=True,
39 |             physics=True,
40 |             hierarchical=False,
41 |             selectable=True,
42 |             interaction={"dragNodes": True, "dragView": True, "zoomView": True},
43 |         )
44 |         agraph(nodes=nodes, edges=edges, config=config)
45 |     else:
46 |         st.warning("No graph data found in query result.")
47 | 
48 | 
49 | def generate_frontend():
50 |     # Streamlit App Title
51 |     st.title("Memgraph LangGraph Chatbot")
52 | 
53 |     # Text Input for User Question
54 |     user_question = st.text_input("Enter your question:")
55 | 
56 |     # Checkboxes for display options
57 |     plain_answer = st.checkbox("Plain Answer", value=True)
58 |     query_answer = st.checkbox("Query Answer", value=True)
59 |     graph_view = st.checkbox("Graph View", value=True)
60 | 
61 |     # Run Workflow on Button Click
62 |     if st.button("Generate Answer"):
63 |         if user_question:
64 |             result = run_workflow(user_question)
65 | 
66 |             # Displaying Question
67 |             st.markdown(f"**Question:** {result['question']}")
68 |             st.markdown(f"**Business reasoning:** {result['business_reasoning']}")
69 | 
70 |             # Displaying based on checkbox selection
71 |             if plain_answer:
72 |                 st.markdown(f"**Final Answer:** {result['final_answer']}")
73 |             if query_answer:
74 |                 st.markdown(
75 |                     f"**Query Result:**\n```json\n{result['query_result']}\n```"
76 |                 )
77 |             if graph_view:
78 |                 render_graph(result["query_result"])
79 | 
80 |             # Always show the generated query
81 |             st.markdown(
82 |                 f"**Initial Cypher Query:**\n```cypher\n{result['initial_query']}\n```"
83 |             )
84 |             st.markdown(
85 |                 f"**Final generated Cypher Query:**\n```cypher\n{result['cypher_query']}\n```"
86 |             )
87 |         else:
88 |             st.warning("Please enter a question.")
89 | 
90 | 
91 | if __name__ == "__main__":
92 |     generate_frontend()
93 | 


--------------------------------------------------------------------------------
/integrations/langgraph/synonym-agent/business_rules.yaml:
--------------------------------------------------------------------------------
1 | configuration:
2 |   business_rules:
3 |     - label: "Person"
4 |       prop: "true_id"
5 |       explanation: "This is the property for ID. Whenever a user wants to specify to retrieve the person by id, you will use the true_id property."
6 | 


--------------------------------------------------------------------------------
/integrations/langgraph/synonym-agent/requirements.txt:
--------------------------------------------------------------------------------
 1 | Package                   Version
 2 | ------------------------- -----------
 3 | aiohappyeyeballs          2.6.1
 4 | aiohttp                   3.11.13
 5 | aiosignal                 1.3.2
 6 | altair                    5.5.0
 7 | annotated-types           0.7.0
 8 | anyio                     4.8.0
 9 | async-timeout             4.0.3
10 | attrs                     25.3.0
11 | blinker                   1.9.0
12 | cachetools                5.5.2
13 | certifi                   2025.1.31
14 | charset-normalizer        3.4.1
15 | click                     8.1.8
16 | colorama                  0.4.6
17 | dataclasses-json          0.6.7
18 | distro                    1.9.0
19 | exceptiongroup            1.2.2
20 | frozenlist                1.5.0
21 | gitdb                     4.0.12
22 | GitPython                 3.1.44
23 | greenlet                  3.1.1
24 | h11                       0.14.0
25 | httpcore                  1.0.7
26 | httpx                     0.28.1
27 | httpx-sse                 0.4.0
28 | idna                      3.10
29 | isodate                   0.7.2
30 | Jinja2                    3.1.6
31 | jiter                     0.9.0
32 | jsonpatch                 1.33
33 | jsonpointer               3.0.0
34 | jsonschema                4.23.0
35 | jsonschema-specifications 2024.10.1
36 | langchain                 0.3.20
37 | langchain-community       0.3.19
38 | langchain-core            0.3.45
39 | langchain-openai          0.3.8
40 | langchain-text-splitters  0.3.6
41 | langgraph                 0.3.11
42 | langgraph-checkpoint      2.0.20
43 | langgraph-prebuilt        0.1.3
44 | langgraph-sdk             0.1.57
45 | langsmith                 0.3.15
46 | MarkupSafe                3.0.2
47 | marshmallow               3.26.1
48 | msgpack                   1.1.0
49 | multidict                 6.1.0
50 | mypy-extensions           1.0.0
51 | narwhals                  1.30.0
52 | neo4j                     5.28.1
53 | networkx                  3.4.2
54 | numpy                     2.2.3
55 | openai                    1.66.3
56 | orjson                    3.10.15
57 | packaging                 24.2
58 | pandas                    2.2.3
59 | pillow                    11.1.0
60 | pip                       22.0.2
61 | propcache                 0.3.0
62 | protobuf                  5.29.3
63 | pyarrow                   19.0.1
64 | pydantic                  2.10.6
65 | pydantic_core             2.27.2
66 | pydantic-settings         2.8.1
67 | pydeck                    0.9.1
68 | pyparsing                 3.2.1
69 | python-dateutil           2.9.0.post0
70 | python-dotenv             1.0.1
71 | pytz                      2025.1
72 | PyYAML                    6.0.2
73 | rdflib                    7.1.3
74 | referencing               0.36.2
75 | regex                     2024.11.6
76 | requests                  2.32.3
77 | requests-toolbelt         1.0.0
78 | rpds-py                   0.23.1
79 | setuptools                59.6.0
80 | six                       1.17.0
81 | smmap                     5.0.2
82 | sniffio                   1.3.1
83 | SQLAlchemy                2.0.39
84 | streamlit                 1.43.2
85 | streamlit-agraph          0.0.45
86 | tenacity                  9.0.0
87 | tiktoken                  0.9.0
88 | toml                      0.10.2
89 | tornado                   6.4.2
90 | tqdm                      4.67.1
91 | typing_extensions         4.12.2
92 | typing-inspect            0.9.0
93 | tzdata                    2025.1
94 | urllib3                   2.3.0
95 | watchdog                  6.0.0
96 | yarl                      1.18.3
97 | zstandard                 0.23.0
98 | 


--------------------------------------------------------------------------------
/integrations/langgraph/synonym-agent/workflows.py:
--------------------------------------------------------------------------------
 1 | from colorama import Fore, Style
 2 | from langchain_core.runnables import RunnableLambda
 3 | from langgraph.graph import StateGraph
 4 | from agents import (
 5 |     business_synonym_reasoning,
 6 |     execute_cypher_query,
 7 |     generate_cypher_query,
 8 |     generate_human_readable_response,
 9 |     initialize_graph_context,
10 | )
11 | from typing import TypedDict
12 | 
13 | 
14 | class WorkflowState(TypedDict):
15 |     question: str
16 |     schema: str
17 |     initial_query: str
18 |     business_reasoning: str
19 |     cypher_query: str
20 |     query_result: str
21 |     final_answer: str
22 | 
23 | 
24 | def get_business_reasoning_workflow():
25 |     # Define the state graph
26 |     graph_workflow = StateGraph(WorkflowState)
27 |     graph_workflow.add_node(
28 |         "initialize_graph_context", RunnableLambda(initialize_graph_context)
29 |     )
30 |     graph_workflow.add_node("generate_cypher", RunnableLambda(generate_cypher_query))
31 |     graph_workflow.add_node(
32 |         "synonym_reasoning", RunnableLambda(business_synonym_reasoning)
33 |     )
34 |     graph_workflow.add_node("execute_cypher", RunnableLambda(execute_cypher_query))
35 |     graph_workflow.add_node(
36 |         "generate_response", RunnableLambda(generate_human_readable_response)
37 |     )
38 | 
39 |     graph_workflow.add_edge("initialize_graph_context", "generate_cypher")
40 |     graph_workflow.add_edge("generate_cypher", "synonym_reasoning")
41 |     graph_workflow.add_edge("synonym_reasoning", "execute_cypher")
42 |     graph_workflow.add_edge("execute_cypher", "generate_response")
43 | 
44 |     graph_workflow.set_entry_point("initialize_graph_context")
45 |     workflow = graph_workflow.compile()
46 | 
47 |     return workflow
48 | 
49 | 
50 | def get_basic_cypher_workflow():
51 |     # Define the state graph
52 |     graph_workflow = StateGraph(WorkflowState)
53 |     graph_workflow.add_node(
54 |         "initialize_graph_context", RunnableLambda(initialize_graph_context)
55 |     )
56 |     graph_workflow.add_node("generate_cypher", RunnableLambda(generate_cypher_query))
57 |     graph_workflow.add_node("execute_cypher", RunnableLambda(execute_cypher_query))
58 |     graph_workflow.add_node(
59 |         "generate_response", RunnableLambda(generate_human_readable_response)
60 |     )
61 | 
62 |     graph_workflow.add_edge("initialize_graph_context", "generate_cypher")
63 |     graph_workflow.add_edge("generate_cypher", "execute_cypher")
64 |     graph_workflow.add_edge("execute_cypher", "generate_response")
65 | 
66 |     graph_workflow.set_entry_point("initialize_graph_context")
67 |     workflow = graph_workflow.compile()
68 | 
69 |     return workflow
70 | 
71 | 
72 | def pick_tool(user_question):
73 |     return get_business_reasoning_workflow()
74 | 
75 | 
76 | def run_workflow(user_question):
77 |     initial_state = {"question": user_question}
78 |     workflow = pick_tool(user_question)
79 |     final_output = workflow.invoke(initial_state)
80 | 
81 |     print(
82 |         f"{Fore.YELLOW}Question:{Style.RESET_ALL} {final_output['question']}\n"
83 |         f"{Fore.BLUE}Initial query:{Style.RESET_ALL} {final_output['initial_query']}\n"
84 |         f"{Fore.BLUE}Query:{Style.RESET_ALL} {final_output['cypher_query']}\n"
85 |         f"{Fore.GREEN}Answer:{Style.RESET_ALL} {final_output['final_answer']}\n"
86 |     )
87 |     print()
88 | 
89 |     return final_output
90 | 
91 | 
92 | if __name__ == "__main__":
93 |     run_workflow("How many nodes are there in the graph?")
94 | 


--------------------------------------------------------------------------------
/integrations/llamaindex/agentic-rag-with-graph-tools/agentic_rag_with_pagerank.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Integrating PageRank as a tool in a multi-agent workflow\n",
  8 |     "\n",
  9 |     "In this example, we'll create a multi-agent workflow using LlamaIndex and\n",
 10 |     "Memgraph to perform graph-based querying and computation. We'll explore how to:\n",
 11 |     "\n",
 12 |     "- Set up [**Memgraph**](https://memgraph.com/) as a graph store and create a\n",
 13 |     "  sample dataset.\n",
 14 |     "- Use [**LlamaIndex**](https://www.llamaindex.ai/) to define function agents for\n",
 15 |     "  retrieval and arithmetic operations.\n",
 16 |     "- Implement a **retriever agent** to run the\n",
 17 |     "  [**PageRank**](https://memgraph.com/docs/advanced-algorithms/available-algorithms/pagerank)\n",
 18 |     "  algorithm and extract ranked nodes.\n",
 19 |     "- Use a **calculator agent** to process numerical data from retrieved nodes.\n",
 20 |     "- Design an **AgentWorkflow** that integrates retrieval and computation for\n",
 21 |     "  automated query execution.\n",
 22 |     "\n",
 23 |     "By the end, we'll have a system capable of retrieving graph-based data and\n",
 24 |     "performing calculations dynamically.\n",
 25 |     "\n",
 26 |     "## Prerequisites\n",
 27 |     "\n",
 28 |     "1. Make sure you have [Docker](https://www.docker.com/) running in the\n",
 29 |     "   background. \n",
 30 |     "\n",
 31 |     "2. Run Memgraph\n",
 32 |     "\n",
 33 |     "The easiest way to run Memgraph is using the following commands:\n",
 34 |     "\n",
 35 |     "For Linux/macOS: `curl https://install.memgraph.com | sh`\n",
 36 |     "\n",
 37 |     "For Windows: `iwr https://windows.memgraph.com | iex`\n",
 38 |     "\n",
 39 |     "3. Install neccessary dependencies:\n"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "%pip install llama-index llama-index-graph-stores-memgraph python-dotenv neo4j"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "## Environment setup\n",
 56 |     "\n",
 57 |     "Create a `.env` file that contains your OpenAI API key and the values of\n",
 58 |     "environment variables necessary to connect to your Memgraph instance. If the\n",
 59 |     "user is not created, the default value is the empty string:\n",
 60 |     "\n",
 61 |     "`OPENAI_API_KEY=sk-proj-...` \n",
 62 |     "`URI=bolt://localhost:7687` \n",
 63 |     "`AUTH_USER=\"\"`\n",
 64 |     "`AUTH_PASS=\"\"`\n",
 65 |     "\n",
 66 |     "## Create the script\n",
 67 |     "\n",
 68 |     "Let's first load our `.env` file and set the LLM model we want to use. In this\n",
 69 |     "example, we're using OpenAI's GPT-4 model.\n"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "from dotenv import load_dotenv\n",
 79 |     "load_dotenv()\n",
 80 |     "from llama_index.llms.openai import OpenAI\n",
 81 |     "from llama_index.core import Settings\n",
 82 |     "\n",
 83 |     "# settings\n",
 84 |     "Settings.llm = OpenAI(model=\"gpt-4\",temperature=0)"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "### Connect to Memgraph\n",
 92 |     "\n",
 93 |     "In this section, we'll establish a connection to Memgraph using the environment\n",
 94 |     "variables for authentication and connection details.\n",
 95 |     "\n",
 96 |     "1. **Retrieve Environment Variables**  \n",
 97 |     "   The script fetches the `URI`, `AUTH_USER`, and `AUTH_PASS` values from the\n",
 98 |     "   environment using `os.getenv()`. These values determine how the script\n",
 99 |     "   connects to the Memgraph database.\n",
100 |     "\n",
101 |     "2. **Set Up Authentication**  \n",
102 |     "   The credentials (`AUTH_USER`, `AUTH_PASS`) are combined into a tuple (`AUTH`)\n",
103 |     "   to be used for authentication.\n",
104 |     "\n",
105 |     "3. **Create a Memgraph Connection**  \n",
106 |     "   A connection to Memgraph is established using `GraphDatabase.driver(URI,\n",
107 |     "   auth=AUTH)`.  \n",
108 |     "\n",
109 |     "\n",
110 |     "This setup ensures that the script can interact with your Memgraph instance."
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "import os\n",
120 |     "from neo4j import GraphDatabase\n",
121 |     "from llama_index.graph_stores.memgraph import MemgraphPropertyGraphStore\n",
122 |     "\n",
123 |     "URI = os.getenv(\"URI\")\n",
124 |     "AUTH_USER = os.getenv(\"AUTH_USER\")\n",
125 |     "AUTH_PASS = os.getenv(\"AUTH_PASS\")\n",
126 |     "\n",
127 |     "AUTH = (AUTH_USER, AUTH_PASS)\n",
128 |     "\n",
129 |     "driver = GraphDatabase.driver(URI, auth=AUTH)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "### Define calculator tools\n",
137 |     "\n",
138 |     "Next, define addition and subtraction tools for calculations and a calculator\n",
139 |     "agent. The role of the agent in this case will be to perform basic arithmetic\n",
140 |     "operations with access to the defined tools."
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "from llama_index.core.tools import FunctionTool\n",
150 |     "from llama_index.core.agent.workflow import FunctionAgent\n",
151 |     "\n",
152 |     "def add(a: int, b: int) -> int:\n",
153 |     "    \"\"\"Add two numbers.\"\"\"\n",
154 |     "    return a + b\n",
155 |     "\n",
156 |     "def subtract(a: int, b: int) -> int:\n",
157 |     "    \"\"\"Subtract two numbers.\"\"\"\n",
158 |     "    return a - b\n",
159 |     "\n",
160 |     "# Create agent configs\n",
161 |     "calculator_agent = FunctionAgent(\n",
162 |     "    name=\"calculator\",\n",
163 |     "    description=\"Performs basic arithmetic operations\",\n",
164 |     "    system_prompt=\"You are a calculator assistant.\",\n",
165 |     "    tools=[\n",
166 |     "        FunctionTool.from_defaults(fn=add),\n",
167 |     "        FunctionTool.from_defaults(fn=subtract),\n",
168 |     "    ],\n",
169 |     "    llm=OpenAI(model=\"gpt-4\"),\n",
170 |     ")"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "Next, define a function to execute Cypher queries and implement a PageRank\n",
178 |     "retrieval tool. The retriever agent is responsible for running the PageRank\n",
179 |     "algorithm and retrieving ranked nodes using the defined tool."
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": [
188 |     "def execute_query(query: str):\n",
189 |     "    \"\"\"Runs a given Cypher query inside a session.\"\"\"\n",
190 |     "    with driver.session() as session:\n",
191 |     "        return session.execute_read(lambda tx: list(tx.run(query)))\n",
192 |     "\n",
193 |     "def run_pagerank():\n",
194 |     "    \"\"\"Executes the PageRank algorithm.\"\"\"\n",
195 |     "    query = \"CALL pagerank.get() YIELD node, rank RETURN node, rank ORDER BY rank DESC LIMIT 5\"\n",
196 |     "    return execute_query(query)\n",
197 |     "\n",
198 |     "pagerank_tool = FunctionTool.from_defaults(\n",
199 |     "    fn=run_pagerank,\n",
200 |     "    name=\"pagerank_tool\",\n",
201 |     "    description=\"Runs the PageRank algorithm and retrieves ranked nodes.\"\n",
202 |     ")\n",
203 |     "\n",
204 |     "retriever_agent = FunctionAgent(\n",
205 |     "    name=\"retriever\",\n",
206 |     "    description=\"Manages data retrieval\",\n",
207 |     "    system_prompt=\"You have the ability to run the PageRank algorithm.\",\n",
208 |     "    tools=[\n",
209 |     "        pagerank_tool,\n",
210 |     "    ],\n",
211 |     "    llm=OpenAI(model=\"gpt-4\"),\n",
212 |     "    memory=None\n",
213 |     ")"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "### Create the dataset \n",
221 |     "\n",
222 |     "Now, let's create a small dataset in Memgraph consisting of 10 nodes, each with\n",
223 |     "a weight property. The nodes are connected through `LINKS_TO` relationships,\n",
224 |     "forming a structured graph. To create your graph, run the following Cypher query\n",
225 |     "in your Memgraph instance:\n",
226 |     "\n",
227 |     "`CREATE (n1:Node {id: 1, weight: 1.2}), (n2:Node {id: 2, weight: 2.5}), (n3:Node\n",
228 |     "{id: 3, weight: 0.8}), (n4:Node {id: 4, weight: 1.7}), (n5:Node {id: 5, weight:\n",
229 |     "3.0}), (n6:Node {id: 6, weight: 2.2}), (n7:Node {id: 7, weight: 1.0}), (n8:Node\n",
230 |     "{id: 8, weight: 2.8}), (n9:Node {id: 9, weight: 1.5}), (n10:Node {id: 10,\n",
231 |     "weight: 2.0}), (n1)-[:LINKS_TO]->(n2), (n1)-[:LINKS_TO]->(n3),\n",
232 |     "(n2)-[:LINKS_TO]->(n4), (n3)-[:LINKS_TO]->(n4), (n4)-[:LINKS_TO]->(n5),\n",
233 |     "(n5)-[:LINKS_TO]->(n6), (n6)-[:LINKS_TO]->(n7), (n7)-[:LINKS_TO]->(n8),\n",
234 |     "(n8)-[:LINKS_TO]->(n9), (n9)-[:LINKS_TO]->(n10), (n10)-[:LINKS_TO]->(n1),\n",
235 |     "(n3)-[:LINKS_TO]->(n6), (n4)-[:LINKS_TO]->(n9), (n7)-[:LINKS_TO]->(n2),\n",
236 |     "(n8)-[:LINKS_TO]->(n5);`\n",
237 |     "\n",
238 |     "### Memgraph graph store\n",
239 |     "\n",
240 |     "We'll now establish a connection to **Memgraph**, using\n",
241 |     "`MemgraphPropertyGraphStore` from LlamaIndex. This allows us to store and\n",
242 |     "retrieve structured data efficiently, enabling **graph-based querying** for\n",
243 |     "retrieval-augmented generation (RAG) pipelines."
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": [
252 |     "from llama_index.graph_stores.memgraph import MemgraphPropertyGraphStore\n",
253 |     "\n",
254 |     "graph_store = MemgraphPropertyGraphStore(\n",
255 |     "    username=\"\",  # Your Memgraph username, default is \"\"\n",
256 |     "    password=\"\",  # Your Memgraph password, default is \"\"\n",
257 |     "    url=\"bolt://localhost:7687\"  # Connection URL for Memgraph\n",
258 |     ")"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "### Creating and running the workflow\n",
266 |     "\n",
267 |     "Finally, let's create an **AgentWorkflow** that ties together the previously\n",
268 |     "defined agents, including the **calculator** and **retriever** agents. The\n",
269 |     "workflow runs the PageRank algorithm, retrieves nodes, and sums their weight\n",
270 |     "properties using the addition tool.\n",
271 |     "\n",
272 |     "We define an **async function** to execute the workflow, sending a user query\n",
273 |     "that asks to run the PageRank algorithm and using the addition tool, add all of\n",
274 |     "the weight properties of returned nodes."
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": null,
280 |    "metadata": {},
281 |    "outputs": [],
282 |    "source": [
283 |     "\n",
284 |     "from llama_index.core.agent.workflow import (\n",
285 |     "    AgentWorkflow,\n",
286 |     "    FunctionAgent,\n",
287 |     "    ReActAgent,\n",
288 |     ")\n",
289 |     "import asyncio\n",
290 |     "\n",
291 |     "# Create and run the workflow\n",
292 |     "workflow = AgentWorkflow(\n",
293 |     "    agents=[calculator_agent, retriever_agent], root_agent=\"retriever\"\n",
294 |     ")\n",
295 |     "\n",
296 |     "# Define an async function to run the workflow\n",
297 |     "async def run_workflow():\n",
298 |     "    response = await workflow.run(user_msg=\"Run PageRank algorithm and using addition tool, add all of the weight properties of returned nodes.\")\n",
299 |     "    print(response)\n",
300 |     "\n",
301 |     "# Run the async function using asyncio\n",
302 |     "asyncio.run(run_workflow())"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "markdown",
307 |    "metadata": {},
308 |    "source": [
309 |     "## Conclusion\n",
310 |     "\n",
311 |     "This notebook provides a simple example of how to create and use **Memgraph procedures as tools** when implementing an **Agentic RAG system** with LlamaIndex. By integrating graph algorithms like **PageRank** into agents, we enable more powerful and context-aware data retrieval and computation.\n",
312 |     "\n",
313 |     "This is just the beginning, Memgraph supports a wide range of graph algorithms and procedures that can be leveraged in multi-agent workflows. You can explore more built-in algorithms and create custom ones using [MAGE (Memgraph Advanced Graph Extensions)](https://memgraph.com/docs/advanced-algorithms/available-algorithms) to further enhance your system's capabilities. The possibilities are endless!"
314 |    ]
315 |   }
316 |  ],
317 |  "metadata": {
318 |   "kernelspec": {
319 |    "display_name": "Python 3.10.16 ('llama_examples')",
320 |    "language": "python",
321 |    "name": "python3"
322 |   },
323 |   "language_info": {
324 |    "name": "python",
325 |    "version": "3.10.16"
326 |   },
327 |   "orig_nbformat": 4,
328 |   "vscode": {
329 |    "interpreter": {
330 |     "hash": "42d147008be9a222f6757cc3d1527f7d3e48d8ff31a8ceb9f319427f25b07d46"
331 |    }
332 |   }
333 |  },
334 |  "nbformat": 4,
335 |  "nbformat_minor": 2
336 | }
337 | 


--------------------------------------------------------------------------------
/integrations/llamaindex/multi-agent-rag-system/data/2023_canadian_budget.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memgraph/ai-demos/c2e49d1b320cabd1106993c9d1d83f8de9b3ec17/integrations/llamaindex/multi-agent-rag-system/data/2023_canadian_budget.pdf


--------------------------------------------------------------------------------
/integrations/llamaindex/multi-agent-rag-system/multi_agent_rag_system.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Building multi-agent GraphRAG system with LlamaIndex and Memgraph\n",
  8 |     "\n",
  9 |     "In this example, we build a multi-agent GraphRAG system using LlamaIndex and\n",
 10 |     "Memgraph, integrating retrieval-augmented generation (RAG) with graph-based\n",
 11 |     "querying and tool-using agents. We'll explore how to:\n",
 12 |     "\n",
 13 |     "- Set up **Memgraph** as a graph store for structured knowledge retrieval.\n",
 14 |     "- Use **LlamaIndex** to create a Property Graph Index and perform Memgraph's\n",
 15 |     "  **vector search** on embedded data.\n",
 16 |     "- Implement function agents for both arithmetic operations and semantic\n",
 17 |     "  retrieval.\n",
 18 |     "- Design an **AgentWorkflow** that combines retrieval and computation to answer\n",
 19 |     "  complex queries.\n",
 20 |     "\n",
 21 |     "By the end, we'll have a fully functional GraphRAG pipeline capable of answering\n",
 22 |     "structured queries while performing calculations on retrieved data.\n",
 23 |     "\n",
 24 |     "## Prerequisites\n",
 25 |     "\n",
 26 |     "1. Make sure you have [Docker](https://www.docker.com/) running in the\n",
 27 |     "   background. \n",
 28 |     "\n",
 29 |     "2. Run Memgraph\n",
 30 |     "\n",
 31 |     "The easiest way to run Memgraph is by using the following commands:\n",
 32 |     "\n",
 33 |     "For Linux/macOS: `curl https://install.memgraph.com | sh`\n",
 34 |     "\n",
 35 |     "For Windows: `iwr https://windows.memgraph.com | iex`\n",
 36 |     "\n",
 37 |     "3. Install necessary dependencies:"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "%pip install llama-index llama-index-graph-stores-memgraph python-dotenv"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "## Environment setup\n",
 54 |     "\n",
 55 |     "Create `.env` file that contains your OpenAI API key:\n",
 56 |     "\n",
 57 |     "`OPENAI_API_KEY=sk-proj-...`\n",
 58 |     "\n",
 59 |     "## Create the script\n",
 60 |     "\n",
 61 |     "Let's first load our `.env` file and set the LLM model we want to use. In this\n",
 62 |     "example, we're using OpenAI's gpt-4 model."
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "from dotenv import load_dotenv\n",
 72 |     "load_dotenv()\n",
 73 |     "from llama_index.llms.openai import OpenAI\n",
 74 |     "from llama_index.core import Settings\n",
 75 |     "\n",
 76 |     "# settings\n",
 77 |     "Settings.llm = OpenAI(model=\"gpt-4\",temperature=0)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "### Define calculator tools\n",
 85 |     "\n",
 86 |     "Next, addition and subtraction tools for calculations and a calculator\n",
 87 |     "agent are defined. The role of the agent, in this case, will be to perform basic arithmetic\n",
 88 |     "operations with access to the defined tools."
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "from llama_index.core.tools import FunctionTool\n",
 98 |     "from llama_index.core.agent.workflow import FunctionAgent\n",
 99 |     "\n",
100 |     "def add(a: int, b: int) -> int:\n",
101 |     "    \"\"\"Add two numbers.\"\"\"\n",
102 |     "    return a + b\n",
103 |     "\n",
104 |     "\n",
105 |     "def subtract(a: int, b: int) -> int:\n",
106 |     "    \"\"\"Subtract two numbers.\"\"\"\n",
107 |     "    return a - b\n",
108 |     "\n",
109 |     "# Create agent configs\n",
110 |     "calculator_agent = FunctionAgent(\n",
111 |     "    name=\"calculator\",\n",
112 |     "    description=\"Performs basic arithmetic operations\",\n",
113 |     "    system_prompt=\"You are a calculator assistant.\",\n",
114 |     "    tools=[\n",
115 |     "        FunctionTool.from_defaults(fn=add),\n",
116 |     "        FunctionTool.from_defaults(fn=subtract),\n",
117 |     "    ],\n",
118 |     "    llm=OpenAI(model=\"gpt-4\"),\n",
119 |     ")"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "### Load the dataset \n",
127 |     "\n",
128 |     "Besides the basic operations, we also want to create a RAG pipeline and perform\n",
129 |     "retrieval operations on the dataset of our choice. In this example, we're using\n",
130 |     "the PDF file about the Canadian budget for 2023. The file is transformed into PDF\n",
131 |     "and stored in the `data` directory. Let's load that dataset:"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "from llama_index.core import SimpleDirectoryReader\n",
141 |     "\n",
142 |     "documents = SimpleDirectoryReader(\"./data\").load_data()"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "### Memgraph graph store\n",
150 |     "\n",
151 |     "We'll now establish a connection to **Memgraph**, using\n",
152 |     "`MemgraphPropertyGraphStore` from LlamaIndex. This allows us to store and\n",
153 |     "retrieve structured data efficiently, enabling **graph-based querying** for\n",
154 |     "retrieval-augmented generation (RAG) pipelines."
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "from llama_index.graph_stores.memgraph import MemgraphPropertyGraphStore\n",
164 |     "\n",
165 |     "graph_store = MemgraphPropertyGraphStore(\n",
166 |     "    username=\"\",  # Your Memgraph username, default is \"\"\n",
167 |     "    password=\"\",  # Your Memgraph password, default is \"\"\n",
168 |     "    url=\"bolt://localhost:7687\"  # Connection URL for Memgraph\n",
169 |     ")"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "### Create a knowledge graph in Memgraph\n",
177 |     "\n",
178 |     "This section builds a **Property Graph Index** using `PropertyGraphIndex` from\n",
179 |     "LlamaIndex. This index allows us to store and retrieve structured knowledge in a\n",
180 |     "**graph database (Memgraph)** while leveraging OpenAI embeddings for semantic\n",
181 |     "search."
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "from llama_index.core import PropertyGraphIndex\n",
191 |     "from llama_index.core.indices.property_graph import SchemaLLMPathExtractor\n",
192 |     "from llama_index.embeddings.openai import OpenAIEmbedding\n",
193 |     "\n",
194 |     "index = PropertyGraphIndex.from_documents(\n",
195 |     "    documents,\n",
196 |     "    embed_model=OpenAIEmbedding(model_name=\"text-embedding-ada-002\"),\n",
197 |     "    kg_extractors=[\n",
198 |     "        SchemaLLMPathExtractor(\n",
199 |     "            llm=OpenAI(model=\"gpt-4\", temperature=0.0)\n",
200 |     "        )\n",
201 |     "    ],\n",
202 |     "    property_graph_store=graph_store,\n",
203 |     "    show_progress=True,\n",
204 |     ")"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "### RAG Pipeline: query engine and retrieval agent\n",
212 |     "\n",
213 |     "Let's now set up a **Retrieval-Augmented Generation (RAG) pipeline** using\n",
214 |     "LlamaIndex's `QueryEngineTool` and `FunctionAgent`. The pipeline enables\n",
215 |     "efficient data retrieval from a structured knowledge base (Memgraph) and\n",
216 |     "provides contextual responses using OpenAI's GPT-4.\n",
217 |     "\n",
218 |     "First, we convert the **Property Graph Index** into a **query engine**, allowing\n",
219 |     "structured queries over the indexed data."
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "from llama_index.core.tools import QueryEngineTool\n",
229 |     "\n",
230 |     "query_engine = index.as_query_engine()\n",
231 |     "\n",
232 |     "# rag pipeline as a tool\n",
233 |     "budget_tool = QueryEngineTool.from_defaults(\n",
234 |     "    query_engine, \n",
235 |     "    name=\"canadian_budget_2023\",\n",
236 |     "    description=\"A RAG engine with some basic facts about the 2023 Canadian federal budget.\"\n",
237 |     ")\n",
238 |     "\n",
239 |     "retriever_agent = FunctionAgent(\n",
240 |     "    name=\"retriever\",\n",
241 |     "    description=\"Manages data retrieval\",\n",
242 |     "    system_prompt=\"You are a retrieval assistant.\",\n",
243 |     "    tools=[\n",
244 |     "        budget_tool,\n",
245 |     "    ],\n",
246 |     "    llm=OpenAI(model=\"gpt-4\"),\n",
247 |     ")"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {},
253 |    "source": [
254 |     "### Creating and running the workflow\n",
255 |     "\n",
256 |     "Finally, and most importantly, let's create an **AgentWorkflow** that ties together\n",
257 |     "the previously defined agents, including the **calculator** and **retriever**\n",
258 |     "agents. This workflow enables us to run a sequence of operations involving both\n",
259 |     "data retrieval and arithmetic computations, allowing the agents to interact with\n",
260 |     "one another.\n",
261 |     "\n",
262 |     "We define an **async function** to execute the workflow, sending a user query\n",
263 |     "that asks for both the total amount of the 2023 Canadian federal budget and an\n",
264 |     "additional calculation (adding 3 billion)."
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": null,
270 |    "metadata": {},
271 |    "outputs": [],
272 |    "source": [
273 |     "from llama_index.core.agent.workflow import (\n",
274 |     "    AgentWorkflow,\n",
275 |     "    FunctionAgent,\n",
276 |     "    ReActAgent,\n",
277 |     ")\n",
278 |     "import asyncio\n",
279 |     "\n",
280 |     "# Create and run the workflow\n",
281 |     "workflow = AgentWorkflow(\n",
282 |     "    agents=[calculator_agent, retriever_agent], root_agent=\"calculator\"\n",
283 |     ")\n",
284 |     "\n",
285 |     "# Define an async function to run the workflow\n",
286 |     "async def run_workflow():\n",
287 |     "    response = await workflow.run(user_msg=\"What is the total amount of the 2023 Canadian federal budget? Add 3 billion to that budget using tools\")\n",
288 |     "    print(response)\n",
289 |     "\n",
290 |     "# Run the async function using asyncio\n",
291 |     "asyncio.run(run_workflow())"
292 |    ]
293 |   }
294 |  ],
295 |  "metadata": {
296 |   "kernelspec": {
297 |    "display_name": "Python 3.9.13 64-bit (microsoft store)",
298 |    "language": "python",
299 |    "name": "python3"
300 |   },
301 |   "language_info": {
302 |    "name": "python",
303 |    "version": "3.9.13"
304 |   },
305 |   "orig_nbformat": 4,
306 |   "vscode": {
307 |    "interpreter": {
308 |     "hash": "289d8ae9ac585fcc15d0d9333c941ae27bdf80d3e799883224b20975f2046730"
309 |    }
310 |   }
311 |  },
312 |  "nbformat": 4,
313 |  "nbformat_minor": 2
314 | }
315 | 


--------------------------------------------------------------------------------
/integrations/llamaindex/property-graph-index/llamaindex.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Memgraph Property Graph Index\n",
  8 |     "\n",
  9 |     "In this example, we're using Memgraph's integration with\n",
 10 |     "[LlamaIndex](https://www.llamaindex.ai/) to build a **Property Graph Index**\n",
 11 |     "from a Paul Graham essay and use it to retrieve structured insights.  \n",
 12 |     "\n",
 13 |     "- We start by **downloading** the essay and preparing the text for processing.  \n",
 14 |     "- Next, we **connect to Memgraph**, a graph database, to store and manage our\n",
 15 |     "  structured data.  \n",
 16 |     "- We then **create a Property Graph Index**, transforming the unstructured text\n",
 17 |     "  into a structured graph using OpenAI’s embedding and language models.  \n",
 18 |     "- Finally, we **query the graph** using both a retriever and a query engine to\n",
 19 |     "  extract meaningful relationships from the text.  \n",
 20 |     "\n",
 21 |     "This notebook demonstrates how to turn raw text into a **queryable knowledge\n",
 22 |     "graph**, making it easier to analyze and retrieve insights from documents.\n",
 23 |     "\n",
 24 |     "## Prerequisites\n",
 25 |     "\n",
 26 |     "1. **Run Memgraph**\n",
 27 |     "Before running Memgraph, ensure you have [Docker](https://www.docker.com/)\n",
 28 |     "running in the background. The quickest way to try out Memgraph Platform\n",
 29 |     "(Memgraph database + MAGE library + Memgraph Lab) for the first time is running\n",
 30 |     "the following command:\n",
 31 |     "\n",
 32 |     "For Linux/macOS:\n",
 33 |     "`curl https://install.memgraph.com | sh`\n",
 34 |     "\n",
 35 |     "For Windows:\n",
 36 |     "`iwr https://windows.memgraph.com | iex`\n",
 37 |     "\n",
 38 |     "From here, you can check Memgraph's visual tool, [Memgraph\n",
 39 |     "Lab](https://memgraph.com/docs/data-visualization) on the\n",
 40 |     "`http://localhost:3000/` or the [desktop version](https://memgraph.com/download)\n",
 41 |     "of the app.\n",
 42 |     "\n",
 43 |     "2. **Install necessary dependencies**"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "%pip install llama-index llama-index-graph-stores-memgraph python-dotenv"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "## Create the script\n",
 60 |     "\n",
 61 |     "First, let's create an `.env` file that contains your OpenAI API key:\n",
 62 |     "\n",
 63 |     "`OPENAI_API_KEY=sk-proj-...`\n",
 64 |     "\n",
 65 |     "We then load our `.env` file and set the LLM model we want to use. In this\n",
 66 |     "example, we're using OpenAI's gpt-4 model."
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "from dotenv import load_dotenv\n",
 76 |     "load_dotenv()"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "Next, create the data directory and download the Paul Graham essay we'll be\n",
 84 |     "using as the input data for this example."
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "import urllib.request\n",
 94 |     "import os\n",
 95 |     "\n",
 96 |     "os.makedirs(\"data/paul_graham/\", exist_ok=True)\n",
 97 |     "\n",
 98 |     "url = \"https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt\"\n",
 99 |     "output_path = \"data/paul_graham/paul_graham_essay.txt\"\n",
100 |     "urllib.request.urlretrieve(url, output_path)"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "### Load the dataset\n",
108 |     "\n",
109 |     "Using LlamaIndex's `SimpleDirectoryReader`, we're loading the textual data from\n",
110 |     "our defined data directory. This prepares the document for further processing,\n",
111 |     "such as indexing."
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "import nest_asyncio\n",
121 |     "from llama_index.core import SimpleDirectoryReader\n",
122 |     "\n",
123 |     "nest_asyncio.apply()\n",
124 |     "\n",
125 |     "with open(output_path, \"r\", encoding=\"utf-8\") as file:\n",
126 |     "    content = file.read()\n",
127 |     "\n",
128 |     "with open(output_path, \"w\", encoding=\"utf-8\") as file:\n",
129 |     "    file.write(content)\n",
130 |     "\n",
131 |     "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "### Connect to Memgraph\n",
139 |     "\n",
140 |     "To establish a connection with Memgraph, set up the `MemgraphPropertyGraphStore`\n",
141 |     "class by providing your database credentials. You need to specify the username,\n",
142 |     "password, and connection URL (e.g., `bolt://localhost:7687`).  \n",
143 |     "\n",
144 |     "Once initialized, this `graph_store` object will allow you to interact with\n",
145 |     "Memgraph and store or retrieve graph-based data efficiently."
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "from llama_index.graph_stores.memgraph import MemgraphPropertyGraphStore\n",
155 |     "\n",
156 |     "username = \"\"  # Enter your Memgraph username (default \"\")\n",
157 |     "password = \"\"  # Enter your Memgraph password (default \"\")\n",
158 |     "url = \"\"  # Specify the connection URL, e.g., 'bolt://localhost:7687'\n",
159 |     "\n",
160 |     "graph_store = MemgraphPropertyGraphStore(\n",
161 |     "    username=username,\n",
162 |     "    password=password,\n",
163 |     "    url=url,\n",
164 |     ")"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "### Create a Property Graph Index  \n",
172 |     "\n",
173 |     "Next, we build a **Property Graph Index** using the documents we previously\n",
174 |     "loaded. This index will help structure and store our data efficiently in\n",
175 |     "Memgraph.  \n",
176 |     "\n",
177 |     "- We use `OpenAIEmbedding` to generate vector embeddings for the text.  \n",
178 |     "- We configure `SchemaLLMPathExtractor`, which utilizes an OpenAI model\n",
179 |     "  (`gpt-4`) to extract structured knowledge from the documents.  \n",
180 |     "- The index is stored in Memgraph using the `graph_store` connection.  \n",
181 |     "\n",
182 |     "By running this, we transform unstructured text into a structured property\n",
183 |     "graph, making it easier to query and analyze relationships within the data.\n"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "from llama_index.core import PropertyGraphIndex\n",
193 |     "from llama_index.embeddings.openai import OpenAIEmbedding\n",
194 |     "from llama_index.llms.openai import OpenAI\n",
195 |     "from llama_index.core.indices.property_graph import SchemaLLMPathExtractor\n",
196 |     "\n",
197 |     "index = PropertyGraphIndex.from_documents(\n",
198 |     "    documents,\n",
199 |     "    embed_model=OpenAIEmbedding(model_name=\"text-embedding-ada-002\"),\n",
200 |     "    kg_extractors=[\n",
201 |     "        SchemaLLMPathExtractor(\n",
202 |     "            llm=OpenAI(model=\"gpt-4\", temperature=0.0)\n",
203 |     "        )\n",
204 |     "    ],\n",
205 |     "    property_graph_store=graph_store,\n",
206 |     "    show_progress=True,\n",
207 |     ")"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "markdown",
212 |    "metadata": {},
213 |    "source": [
214 |     "Now that the graph is created, we can explore it in the UI by visiting `http://localhost:3000/`.\n",
215 |     "\n",
216 |     "The easiest way to visualize the entire graph is by running a Cypher command similar to this:\n",
217 |     "\n",
218 |     "`MATCH p=()-[]-() RETURN p;`\n",
219 |     "\n",
220 |     "This command matches all of the possible paths in the graph and returns entire graph.\n",
221 |     "\n",
222 |     "To visualize the schema of the graph, visit the Graph schema tab and generate the new schema based on the newly created graph.\n",
223 |     "\n",
224 |     "To delete an entire graph, use:\n",
225 |     "\n",
226 |     "`MATCH (n) DETACH DELETE n;`\n",
227 |     "\n",
228 |     "### Querying & retrieval \n",
229 |     "\n",
230 |     "Now that we have structured our data into a property graph, we can retrieve\n",
231 |     "relevant information using two different approaches:  \n",
232 |     "\n",
233 |     "1. **Retriever-based Search:**  \n",
234 |     "   - We convert the index into a retriever (`as_retriever`), which allows us to\n",
235 |     "     fetch relevant nodes related to a query.  \n",
236 |     "   - In this example, we query, *\"What happened at Interleaf and Viaweb?\"*, and\n",
237 |     "     print the retrieved nodes.  \n",
238 |     "\n",
239 |     "2. **Query Engine:**  \n",
240 |     "   - We convert the index into a query engine (`as_query_engine`), which\n",
241 |     "     provides a more detailed response by leveraging the structured graph.  \n",
242 |     "   - The response includes a more comprehensive answer based on the extracted\n",
243 |     "     relationships.  \n",
244 |     "\n",
245 |     "This step allows us to interact with our graph and extract meaningful insights\n",
246 |     "from the indexed data.\n"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {},
253 |    "outputs": [],
254 |    "source": [
255 |     "retriever = index.as_retriever(include_text=False)\n",
256 |     "\n",
257 |     "# Example query: \"What happened at Interleaf and Viaweb?\"\n",
258 |     "nodes = retriever.retrieve(\"What happened at Interleaf and Viaweb?\")\n",
259 |     "\n",
260 |     "# Output results\n",
261 |     "print(\"Query Results:\")\n",
262 |     "for node in nodes:\n",
263 |     "    print(node.text)\n",
264 |     "\n",
265 |     "# Alternatively, using a query engine\n",
266 |     "query_engine = index.as_query_engine(include_text=True)\n",
267 |     "\n",
268 |     "# Perform a query and print the detailed response\n",
269 |     "response = query_engine.query(\"What happened at Interleaf and Viaweb?\")\n",
270 |     "print(\"\\nDetailed Query Response:\")\n",
271 |     "print(str(response))"
272 |    ]
273 |   }
274 |  ],
275 |  "metadata": {
276 |   "kernelspec": {
277 |    "display_name": "Python 3.9.13 64-bit (microsoft store)",
278 |    "language": "python",
279 |    "name": "python3"
280 |   },
281 |   "language_info": {
282 |    "name": "python",
283 |    "version": "3.9.13"
284 |   },
285 |   "vscode": {
286 |    "interpreter": {
287 |     "hash": "289d8ae9ac585fcc15d0d9333c941ae27bdf80d3e799883224b20975f2046730"
288 |    }
289 |   }
290 |  },
291 |  "nbformat": 4,
292 |  "nbformat_minor": 2
293 | }
294 | 


--------------------------------------------------------------------------------
/integrations/llamaindex/single-agent-rag-system/data/2023_canadian_budget.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memgraph/ai-demos/c2e49d1b320cabd1106993c9d1d83f8de9b3ec17/integrations/llamaindex/single-agent-rag-system/data/2023_canadian_budget.pdf


--------------------------------------------------------------------------------
/integrations/llamaindex/single-agent-rag-system/single_agent_rag_system.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Building a single-agent GraphRAG system with LlamaIndex and Memgraph\n",
  8 |     "\n",
  9 |     "In this example, we build a single-agent GraphRAG system using LlamaIndex and\n",
 10 |     "Memgraph, integrating retrieval-augmented generation (RAG) with graph-based\n",
 11 |     "querying and tool-using agents. We'll explore how to:\n",
 12 |     "\n",
 13 |     "- Set up **Memgraph** as a graph store for structured knowledge retrieval.\n",
 14 |     "- Use **LlamaIndex** to create a Property Graph Index and perform Memgraph's\n",
 15 |     "  **vector search** on embedded data.\n",
 16 |     "- Implement an agent that uses tools for both arithmetic operations and semantic\n",
 17 |     "  retrieval.\n",
 18 |     "  \n",
 19 |     "\n",
 20 |     "## Prerequisites\n",
 21 |     "\n",
 22 |     "1. Make sure you have [Docker](https://www.docker.com/) running in the\n",
 23 |     "   background. \n",
 24 |     "\n",
 25 |     "2. Run Memgraph\n",
 26 |     "\n",
 27 |     "The easiest way to run Memgraph is by using the following commands:\n",
 28 |     "\n",
 29 |     "For Linux/macOS: `curl https://install.memgraph.com | sh`\n",
 30 |     "\n",
 31 |     "For Windows: `iwr https://windows.memgraph.com | iex`\n",
 32 |     "\n",
 33 |     "3. Install necessary dependencies:\n"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "%pip install llama-index llama-index-graph-stores-memgraph python-dotenv"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "## Environment setup\n",
 50 |     "\n",
 51 |     "Create `.env` file that contains your OpenAI API key:\n",
 52 |     "\n",
 53 |     "`OPENAI_API_KEY=sk-proj-...`\n",
 54 |     "\n",
 55 |     "## Create the script\n",
 56 |     "\n",
 57 |     "Let's first load our `.env` file and set the LLM model we want to use. In this\n",
 58 |     "example, we're using OpenAI's gpt-4 model.\n"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "from dotenv import load_dotenv\n",
 68 |     "load_dotenv()\n",
 69 |     "\n",
 70 |     "from llama_index.llms.openai import OpenAI\n",
 71 |     "from llama_index.core import Settings\n",
 72 |     "\n",
 73 |     "# settings\n",
 74 |     "Settings.llm = OpenAI(model=\"gpt-4\", temperature=0)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "### Define calculator tools\n",
 82 |     "\n",
 83 |     "Next, define addition and multiplication tools for calculations and add them to\n",
 84 |     "`FunctionTool` class."
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "from llama_index.core.tools import FunctionTool\n",
 94 |     "\n",
 95 |     "# function tools\n",
 96 |     "def multiply(a: float, b: float) -> float:\n",
 97 |     "    \"\"\"Multiply two numbers and return the product\"\"\"\n",
 98 |     "    return a * b\n",
 99 |     "\n",
100 |     "multiply_tool = FunctionTool.from_defaults(fn=multiply)\n",
101 |     "\n",
102 |     "def add(a: float, b: float) -> float:\n",
103 |     "    \"\"\"Add two numbers and return the sum\"\"\"\n",
104 |     "    return a + b\n",
105 |     "\n",
106 |     "add_tool = FunctionTool.from_defaults(fn=add)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "### Load the dataset \n",
114 |     "\n",
115 |     "Besides the basic operations, we also want to create a RAG pipeline and perform\n",
116 |     "retrieval operations on the dataset of our choice. In this example, we're using\n",
117 |     "the PDF file about the Canadian budget for 2023. The file is transformed into PDF\n",
118 |     "and stored in the `data` directory. Let's load that dataset:"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "from llama_index.core import SimpleDirectoryReader\n",
128 |     "\n",
129 |     "documents = SimpleDirectoryReader(\"./data\").load_data()"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "### Memgraph graph store\n",
137 |     "\n",
138 |     "We'll now establish a connection to **Memgraph**, using\n",
139 |     "`MemgraphPropertyGraphStore` from LlamaIndex. This allows us to store and\n",
140 |     "retrieve structured data efficiently, enabling **graph-based querying** for\n",
141 |     "retrieval-augmented generation (RAG) pipelines."
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "from llama_index.graph_stores.memgraph import MemgraphPropertyGraphStore\n",
151 |     "\n",
152 |     "graph_store = MemgraphPropertyGraphStore(\n",
153 |     "    username=\"\",  # Your Memgraph username, default is \"\"\n",
154 |     "    password=\"\",  # Your Memgraph password, default is \"\"\n",
155 |     "    url=\"bolt://localhost:7687\"  # Connection URL for Memgraph\n",
156 |     ")\n"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "### Create a knowledge graph in Memgraph\n",
164 |     "\n",
165 |     "This section builds a **Property Graph Index** using `PropertyGraphIndex` from\n",
166 |     "LlamaIndex. This index allows us to store and retrieve structured knowledge in a\n",
167 |     "**graph database (Memgraph)** while leveraging OpenAI embeddings for semantic\n",
168 |     "search."
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "from llama_index.core import PropertyGraphIndex\n",
178 |     "from llama_index.core.indices.property_graph import SchemaLLMPathExtractor\n",
179 |     "from llama_index.embeddings.openai import OpenAIEmbedding\n",
180 |     "\n",
181 |     "index = PropertyGraphIndex.from_documents(\n",
182 |     "    documents,\n",
183 |     "    embed_model=OpenAIEmbedding(model_name=\"text-embedding-ada-002\"),\n",
184 |     "    kg_extractors=[\n",
185 |     "        SchemaLLMPathExtractor(\n",
186 |     "            llm=OpenAI(model=\"gpt-4\", temperature=0.0)\n",
187 |     "        )\n",
188 |     "    ],\n",
189 |     "    property_graph_store=graph_store,\n",
190 |     "    show_progress=True,\n",
191 |     ")"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "### RAG Pipeline: query engine and retrieval agent\n",
199 |     "\n",
200 |     "Let's now set up a **Retrieval-Augmented Generation (RAG) pipeline**. The\n",
201 |     "pipeline enables efficient data retrieval from a structured knowledge base\n",
202 |     "(Memgraph) and provides contextual responses using OpenAI's GPT-4.\n",
203 |     "\n",
204 |     "First, we convert the **Property Graph Index** into a **query engine**, allowing\n",
205 |     "structured queries over the indexed data."
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "query_engine = index.as_query_engine()\n",
215 |     "\n",
216 |     "# smoke test\n",
217 |     "response = query_engine.query(\n",
218 |     "    \"What was the total amount of the 2023 Canadian federal budget?\"\n",
219 |     ")\n",
220 |     "print(response)"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "### Creating and running the agent\n",
228 |     "\n",
229 |     "Let's now create a **RAG agent** that can retrieve budget data and perform\n",
230 |     "calculations. First, we define `budget_tool`, which provides facts about the\n",
231 |     "2023 Canadian federal budget. Then, we create a `ReActAgent` that combines this\n",
232 |     "tool with calculation tools, allowing it to both fetch information and handle\n",
233 |     "math operations. Finally, we ask the agent: \"What is the total amount of the\n",
234 |     "2023 Canadian federal budget multiplied by 3?\" and print the response to see it\n",
235 |     "work step by step."
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {},
242 |    "outputs": [],
243 |    "source": [
244 |     "from llama_index.core.agent import ReActAgent\n",
245 |     "from llama_index.core.tools import QueryEngineTool\n",
246 |     "\n",
247 |     "# RAG pipeline as a tool\n",
248 |     "budget_tool = QueryEngineTool.from_defaults(\n",
249 |     "    query_engine,\n",
250 |     "    name=\"canadian_budget_2023\",\n",
251 |     "    description=\"A RAG engine with some basic facts about the 2023 Canadian federal budget.\"\n",
252 |     ")\n",
253 |     "\n",
254 |     "# Create the agent with tools\n",
255 |     "agent = ReActAgent.from_tools([multiply_tool, add_tool, budget_tool], verbose=True)\n",
256 |     "\n",
257 |     "# Query the agent\n",
258 |     "response = agent.chat(\"What is the total amount of the 2023 Canadian federal budget multiplied by 3? Go step by step, using a tool to do any math.\")\n",
259 |     "\n",
260 |     "print(response)"
261 |    ]
262 |   }
263 |  ],
264 |  "metadata": {
265 |   "kernelspec": {
266 |    "display_name": "Python 3.9.13 64-bit (microsoft store)",
267 |    "language": "python",
268 |    "name": "python3"
269 |   },
270 |   "language_info": {
271 |    "name": "python",
272 |    "version": "3.9.13"
273 |   },
274 |   "orig_nbformat": 4,
275 |   "vscode": {
276 |    "interpreter": {
277 |     "hash": "289d8ae9ac585fcc15d0d9333c941ae27bdf80d3e799883224b20975f2046730"
278 |    }
279 |   }
280 |  },
281 |  "nbformat": 4,
282 |  "nbformat_minor": 2
283 | }
284 | 


--------------------------------------------------------------------------------
/integrations/mcp/synonym-agent/.gitignore:
--------------------------------------------------------------------------------
1 | business_rules.yaml
2 | 


--------------------------------------------------------------------------------
/integrations/mcp/synonym-agent/.python-version:
--------------------------------------------------------------------------------
1 | 3.13
2 | 


--------------------------------------------------------------------------------
/integrations/mcp/synonym-agent/README.md:
--------------------------------------------------------------------------------
 1 | ## Quick Start
 2 | 
 3 | ```
 4 | # 1. Run Memgraph with a dataset loaded.
 5 | # 2. `business_rules.yaml` should contain data in the following format:
 6 | #        configuration:
 7 | #          business_rules:
 8 | #            - label: "label"
 9 | #              prop: "property"
10 | #              explanation: "Explain to LLM when and how to use the label+property pair."
11 | # 3.
12 | export OPENAI_API_KEY=...
13 | export PYTHONPATH={{script_dir}}/../../langgraph/synonym-agent
14 | ./init.bash
15 | ```
16 | 
17 | Add the implementation time (2025-03-16), MCP did NOT have framework support
18 | for workflows. `langgraph`-based workflow was used instead.
19 | 


--------------------------------------------------------------------------------
/integrations/mcp/synonym-agent/init.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | curl -LsSf https://astral.sh/uv/install.sh | sh
 4 | 
 5 | uv init || true
 6 | uv add mcp[cli]
 7 | uv add ollama
 8 | uv add colorama
 9 | uv add langchain_core
10 | uv add langgraph
11 | uv add langchain_openai
12 | uv add langchain_community
13 | uv add neo4j
14 | uv add black
15 | 
16 | uv run mcp dev server.py
17 | 
18 | # NOTE: uv run black *.py
19 | 


--------------------------------------------------------------------------------
/integrations/mcp/synonym-agent/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "synonym-agent"
 3 | version = "0.1.0"
 4 | description = "Add your description here"
 5 | readme = "README.md"
 6 | requires-python = ">=3.13"
 7 | dependencies = [
 8 |     "black>=25.1.0",
 9 |     "colorama>=0.4.6",
10 |     "langchain-community>=0.3.19",
11 |     "langchain-core>=0.3.45",
12 |     "langchain-openai>=0.3.8",
13 |     "langgraph>=0.3.11",
14 |     "mcp[cli]>=1.4.1",
15 |     "neo4j>=5.28.1",
16 |     "ollama>=0.4.7",
17 | ]
18 | 


--------------------------------------------------------------------------------
/integrations/mcp/synonym-agent/server.py:
--------------------------------------------------------------------------------
 1 | import ollama
 2 | from mcp.server.fastmcp import FastMCP
 3 | 
 4 | # NOTE: Add/export {{path_to_project}}/integrations/langgraph/synonym-agent/ to
 5 | # PYTHONPATH.
 6 | from workflows import run_workflow
 7 | 
 8 | 
 9 | def prompt_llama3(content):
10 |     response = ollama.chat(
11 |         model="llama3",
12 |         messages=[
13 |             {
14 |                 "role": "user",
15 |                 "content": content,
16 |             },
17 |         ],
18 |     )
19 |     return response["message"]["content"]
20 | 
21 | 
22 | mcp = FastMCP("Synonym Agent")
23 | 
24 | 
25 | # Simple example of a prompt under MCP.
26 | @mcp.prompt()
27 | def prompt_llama3_via_mcp(question: str) -> str:
28 |     return prompt_llama3(question)
29 | 
30 | 
31 | # NOTE: At the time of implementation (2025-03-16), MCP does NOT support
32 | # complex workflows or tree of thoughts -> all reasoning logic has to be
33 | # implemented under a single prompt. Using langgraph workflow example.
34 | @mcp.prompt()
35 | def answer_business_specific_question(question: str) -> str:
36 |     return run_workflow(question)["final_answer"]
37 | 


--------------------------------------------------------------------------------
/knowledge-graph-creation/catcher-in-the-rye/processed_data.json:
--------------------------------------------------------------------------------
1 | [{"text": "'The Catcher in the Rye' by J.D. Salinger follows Holden Caulfield, a troubled teenager who narrates his experiences over a few days after being expelled from his elite boarding school, Pencey Prep.", "entities": [["J.D. Salinger", "PERSON"], ["Holden Caulfield", "PERSON"], ["a few days", "DATE"], ["Pencey", "GPE"]]}, {"text": "Set in post-World War II New York City, the story revolves around Holden\u2019s encounters with various characters, reflecting his disillusionment with the adult world and his search for identity and meaning.", "entities": [["post-World War II", "EVENT"], ["New York City", "GPE"], ["Holden", "PERSON"]]}, {"text": "The novel begins with Holden being expelled due to poor academic performance, which sets the stage for his wandering through New York City.", "entities": [["Holden", "PERSON"], ["New York City", "GPE"]]}, {"text": "His isolation becomes a central theme, symbolizing his struggle with mental health and alienation.", "entities": []}, {"text": "Throughout the book, Holden interacts with multiple characters, including teachers, former classmates, strangers, and his younger sister, Phoebe.", "entities": [["Holden", "PERSON"], ["Phoebe", "PERSON"]]}, {"text": "Each interaction reveals his distrust of adults and his disdain for what he calls phoniness.", "entities": []}, {"text": "He idolizes Phoebe as a symbol of innocence and sincerity, which stands in contrast to his views on the rest of society.", "entities": [["Phoebe", "PERSON"]]}, {"text": "Holden\u2019s fixation on preserving innocence is symbolized by his dream of being the catcher in the rye, a protector who saves children from losing their innocence.", "entities": [["Holden", "PERSON"]]}, {"text": "Key symbols also include his red hunting hat, which represents Holden's uniqueness and desire for protection, and the Museum of Natural History, a place he values for its permanence in contrast to life\u2019s constant change and unpredictability.", "entities": [["Holden", "PERSON"], ["the Museum of Natural History", "ORG"]]}, {"text": "Holden\u2019s narrative reveals symptoms of depression and lingering trauma from the death of his younger brother, Allie, which complicates his ability to cope with the challenges of adulthood.", "entities": [["Holden", "PERSON"], ["Allie", "PERSON"]]}, {"text": "His internal struggles suggest unresolved grief and a fear of growing up.", "entities": []}, {"text": "The climax of the story occurs when Holden, overwhelmed, plans to run away but has a meaningful encounter with Phoebe that changes his mind.", "entities": [["Holden", "PERSON"], ["Phoebe", "PERSON"]]}, {"text": "Her innocence and love provide him with a sense of purpose, grounding him and encouraging him to continue facing his reality.", "entities": []}, {"text": "By the novel\u2019s end, Holden reluctantly begins to accept life\u2019s imperfections and complexities.", "entities": [["Holden", "PERSON"]]}, {"text": "The main characters include Holden Caulfield, who is marked by cynicism, vulnerability, and compassion; Phoebe Caulfield, his younger sister who represents innocence and serves as an emotional anchor for Holden; Mr. Antolini, a former teacher who offers him guidance and represents an adult Holden partially trusts; and Allie Caulfield, Holden\u2019s deceased younger brother, whose memory profoundly impacts him.", "entities": [["Holden Caulfield", "PERSON"], ["Phoebe Caulfield", "PERSON"], ["Holden", "PERSON"], ["Antolini", "PERSON"], ["Holden", "PERSON"], ["Allie Caulfield", "PERSON"], ["Holden", "PERSON"]]}, {"text": "The novel is set primarily in New York City, with scenes at Pencey Prep and various urban locations, emphasizing Holden's sense of disorientation and social critique.", "entities": [["New York City", "GPE"], ["Pencey Prep", "ORG"], ["Holden", "PERSON"]]}, {"text": "Themes of alienation, innocence, identity, and the challenges of adolescence permeate the novel, creating a poignant exploration of a young person grappling with mental health and the transition to adulthood.", "entities": []}]


--------------------------------------------------------------------------------
/knowledge-graph-creation/game-of-thrones/game-of-thrones-kg.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Expanding the knowledge\n",
  8 |     "\n",
  9 |     "Let's say that now we want to expand our existing knowledge graph with\n",
 10 |     "additional information to enrich the dataset, provide more context and retrieve\n",
 11 |     "more relevant data. \n",
 12 |     "\n",
 13 |     "In this example, we will take **unstructured data**, such as the\n",
 14 |     "character description summary provided below, extract entities from that\n",
 15 |     "summary, generate triplets to build the knowledge graph create queries and\n",
 16 |     "eventually execute those queries in Memgraph to incorporate with the existing\n",
 17 |     "graph. \n",
 18 |     "\n",
 19 |     "\n",
 20 |     "This highlights the possibility of loading an unstructured data into the Memgraph. \n",
 21 |     "\n",
 22 |     "Here is an example of unstructured data: "
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# Sample text summary for processing\n",
 32 |     "summary = \"\"\"\n",
 33 |     "    Viserys Targaryen is the last living son of the former king, Aerys II Targaryen (the 'Mad King').\n",
 34 |     "    As one of the last known Targaryen heirs, Viserys Targaryen is obsessed with reclaiming the Iron Throne and \n",
 35 |     "    restoring his family’s rule over Westeros. Ambitious and arrogant, he often treats his younger sister, Daenerys Targaryen, \n",
 36 |     "    as a pawn, seeing her only as a means to gain power. His ruthless ambition leads him to make a marriage alliance with \n",
 37 |     "    Khal Drogo, a powerful Dothraki warlord, hoping Khal Drogo will give him the army he needs. \n",
 38 |     "    However, Viserys Targaryen’s impatience and disrespect toward the Dothraki culture lead to his downfall;\n",
 39 |     "    he is ultimately killed by Khal Drogo in a brutal display of 'a crown for a king' – having molten gold poured over his head. \n",
 40 |     "    \"\"\""
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "### Entity extraction\n",
 48 |     "\n",
 49 |     "The first step in the process is to extract entities from the summary using\n",
 50 |     "[SpaCy’s LLM](https://spacy.io/usage/large-language-models).\n",
 51 |     "\n",
 52 |     "To begin, we need to install SpaCy and the specific model we wll be using."
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "%pip install spacy\n",
 62 |     "%pip install spacy_llm\n",
 63 |     "!python -m spacy download en_core_web_md"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "We are extracting entities from the text, that is, preprocessing the data before\n",
 71 |     "sending it to the GPT model, to get more accurate and relevant results. By\n",
 72 |     "using SpaCy, we can identify key entities such as characters and locations\n",
 73 |     "for a better understanding of the semantics in the text.\n",
 74 |     "\n",
 75 |     "This is useful because SpaCy is specifically trained to recognize\n",
 76 |     "linguistic patterns and relationships in text, which helps to isolate and\n",
 77 |     "highlight the most important pieces of information. By preprocessing the text\n",
 78 |     "this way, we ensure that the GPT model receives a more structured input, helps\n",
 79 |     "reduce noise and irrelevant data, leading to more precise and context-aware\n",
 80 |     "outputs. "
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "import os\n",
 90 |     "import spacy\n",
 91 |     "from spacy_llm.util import assemble\n",
 92 |     "import json\n",
 93 |     "from collections import Counter\n",
 94 |     "from pathlib import Path\n",
 95 |     "\n",
 96 |     "# Split document into sentences\n",
 97 |     "def split_document_sent(text, nlp):\n",
 98 |     "    doc = nlp(text)\n",
 99 |     "    return [sent.text.strip() for sent in doc.sents]\n",
100 |     "\n",
101 |     "\n",
102 |     "def process_text(text, nlp, verbose=False):\n",
103 |     "    doc = nlp(text)\n",
104 |     "    if verbose:\n",
105 |     "        print(f\"Text: {doc.text}\")\n",
106 |     "        print(f\"Entities: {[(ent.text, ent.label_) for ent in doc.ents]}\")\n",
107 |     "    return doc\n",
108 |     "\n",
109 |     "\n",
110 |     "# Pipeline to run entity extraction\n",
111 |     "def extract_entities(text, nlp, verbose=False):\n",
112 |     "    processed_data = []\n",
113 |     "    entity_counts = Counter()\n",
114 |     "\n",
115 |     "    sentences = split_document_sent(text, nlp)\n",
116 |     "    for sent in sentences:\n",
117 |     "        doc = process_text(sent, nlp, verbose)\n",
118 |     "        entities = [(ent.text, ent.label_) for ent in doc.ents]\n",
119 |     "\n",
120 |     "        # Store processed data for each sentence\n",
121 |     "        processed_data.append({\"text\": doc.text, \"entities\": entities})\n",
122 |     "\n",
123 |     "        # Update counters\n",
124 |     "        entity_counts.update([ent[1] for ent in entities])\n",
125 |     "\n",
126 |     "    # Export to JSON\n",
127 |     "    with open(\"processed_data.json\", \"w\") as f:\n",
128 |     "        json.dump(processed_data, f)\n",
129 |     "\n"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "### Generate queries\n",
137 |     "\n",
138 |     "After the spacyLLM has pre-processed the entities, the data is passed to the GPT model to generate structured data consisting of nodes and relationships. From that, we generate the Cypher queries which will be executed in Memgraph."
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "def generate_cypher_queries(nodes, relationships):\n",
148 |     "    queries = []\n",
149 |     "\n",
150 |     "    # Create nodes\n",
151 |     "    for node in nodes:\n",
152 |     "        query = f\"\"\"\n",
153 |     "        MERGE (n:{node['type']}:Entity {{name: '{node['name']}'}}) \n",
154 |     "        ON CREATE SET n.id={node['id']} \n",
155 |     "        ON MATCH SET n.id={node['id']}\n",
156 |     "        \"\"\"\n",
157 |     "        queries.append(query)\n",
158 |     "\n",
159 |     "    # Create relationships\n",
160 |     "    for rel in relationships:\n",
161 |     "        query = f\"MATCH (a {{id: {rel['source']}}}), (b {{id: {rel['target']}}}) \" \\\n",
162 |     "                f\"CREATE (a)-[:{rel['relationship']}]->(b)\"\n",
163 |     "        queries.append(query)\n",
164 |     "\n",
165 |     "    return queries"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "### Enriching the graph\n",
173 |     "\n",
174 |     "The `enrich_graph_data` function will merge new knowledge into the graph by doing the following:\n",
175 |     "\n",
176 |     "1. Extracting the entities with SpacyLLM into JSON\n",
177 |     "2. Creating nodes and relationships based on extracted entities with GPT model\n",
178 |     "3. Loading data into Memgraph"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": [
187 |     "    \n",
188 |     "def enrich_graph_data(driver, summary):\n",
189 |     "    nest_asyncio.apply()\n",
190 |     "    \n",
191 |     "    load_dotenv()\n",
192 |     "    os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
193 |     "\n",
194 |     "    client = AsyncOpenAI()\n",
195 |     "\n",
196 |     "    # Load the spaCy model\n",
197 |     "    nlp = spacy.load(\"en_core_web_md\")\n",
198 |     "\n",
199 |     "    # Sample text summary for processing\n",
200 |     "    summary = \"\"\"\n",
201 |     "        Viserys Targaryen is the last living son of the former king, Aerys II Targaryen (the 'Mad King').\n",
202 |     "        As one of the last known Targaryen heirs, Viserys Targaryen is obsessed with reclaiming the Iron Throne and \n",
203 |     "        restoring his family’s rule over Westeros. Ambitious and arrogant, he often treats his younger sister, Daenerys Targaryen, \n",
204 |     "        as a pawn, seeing her only as a means to gain power. His ruthless ambition leads him to make a marriage alliance with \n",
205 |     "        Khal Drogo, a powerful Dothraki warlord, hoping Khal Drogo will give him the army he needs. \n",
206 |     "        However, Viserys Targaryen’s impatience and disrespect toward the Dothraki culture lead to his downfall;\n",
207 |     "        he is ultimately killed by Khal Drogo in a brutal display of 'a crown for a king' – having molten gold poured over his head. \n",
208 |     "    \"\"\"\n",
209 |     "\n",
210 |     "    extract_entities(summary, nlp)\n",
211 |     "\n",
212 |     "    # Load processed data from JSON\n",
213 |     "    json_path = Path(\"processed_data.json\")\n",
214 |     "    with open(json_path, \"r\") as f:\n",
215 |     "        processed_data = json.load(f)\n",
216 |     "\n",
217 |     "    # Prepare nodes and relationships\n",
218 |     "    nodes = []\n",
219 |     "    relationships = []\n",
220 |     "\n",
221 |     "    # Formulate a prompt for GPT-4\n",
222 |     "    prompt = (\n",
223 |     "        \"Extract entities and relationships from the following JSON data. For each entry in data['entities'], \"\n",
224 |     "        \"create a 'node' dictionary with fields 'id' (unique identifier), 'name' (entity text), and 'type' (entity label). \"\n",
225 |     "        \"For entities that have meaningful connections, define 'relationships' as dictionaries with 'source' (source node id), \"\n",
226 |     "        \"'target' (target node id), and 'relationship' (type of connection). Create max 30 nodes, format relationships in the format of capital letters and _ inbetween words and format the entire response in the JSON output containing only variables nodes and relationships without any text inbetween. Use following labels for nodes: Character, Title, Location, House, Death, Event, Allegiance and following relationship types: HAPPENED_IN, SIBLING_OF, PARENT_OF, MARRIED_TO, HEALED_BY, RULES, KILLED, LOYAL_TO, BETRAYED_BY. Make sure the entire JSON file fits in the output\"\n",
227 |     "        \"JSON data:\\n\"\n",
228 |     "        f\"{json.dumps(processed_data)}\"\n",
229 |     "    )\n",
230 |     "\n",
231 |     "    response = asyncio.run(get_response(client, prompt))\n",
232 |     "\n",
233 |     "    structured_data = json.loads(response)  # Assuming GPT-4 outputs structured JSON\n",
234 |     "\n",
235 |     "    # Populate nodes and relationships lists\n",
236 |     "    nodes.extend(structured_data.get(\"nodes\", []))\n",
237 |     "    relationships.extend(structured_data.get(\"relationships\", []))\n",
238 |     "\n",
239 |     "    cypher_queries = generate_cypher_queries(nodes, relationships)\n",
240 |     "    with driver.session() as session:\n",
241 |     "        for query in cypher_queries:\n",
242 |     "            try:\n",
243 |     "                session.run(query)\n",
244 |     "                print(f\"Executed query: {query}\")\n",
245 |     "            except Exception as e:\n",
246 |     "                print(f\"Error executing query: {query}. Error: {e}\")\n",
247 |     "\n",
248 |     "\n",
249 |     "enrich_graph_data(driver, summary)"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "The knowledge graph now has additional knowledge, that is being enriched from unstructured text. "
257 |    ]
258 |   }
259 |  ],
260 |  "metadata": {
261 |   "language_info": {
262 |    "name": "python"
263 |   }
264 |  },
265 |  "nbformat": 4,
266 |  "nbformat_minor": 2
267 | }
268 | 


--------------------------------------------------------------------------------
/retrieval/vector-search/chat-with-your-knowledge/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,venv
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,venv
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # poetry
102 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
104 | #   commonly ignored for libraries.
105 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 | 
108 | # pdm
109 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | #   in version control.
113 | #   https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 | 
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 | 
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 | 
123 | # SageMath parsed files
124 | *.sage.py
125 | 
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | 
142 | # mkdocs documentation
143 | /site
144 | 
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 | 
150 | # Pyre type checker
151 | .pyre/
152 | 
153 | # pytype static type analyzer
154 | .pytype/
155 | 
156 | # Cython debug symbols
157 | cython_debug/
158 | 
159 | # PyCharm
160 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
163 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | #.idea/
165 | 
166 | ### Python Patch ###
167 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
168 | poetry.toml
169 | 
170 | # ruff
171 | .ruff_cache/
172 | 
173 | # LSP config files
174 | pyrightconfig.json
175 | 
176 | ### venv ###
177 | # Virtualenv
178 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
179 | [Bb]in
180 | [Ii]nclude
181 | [Ll]ib
182 | [Ll]ib64
183 | [Ll]ocal
184 | [Ss]cripts
185 | pyvenv.cfg
186 | pip-selfcheck.json
187 | 
188 | ### VisualStudioCode ###
189 | .vscode/*
190 | !.vscode/settings.json
191 | !.vscode/tasks.json
192 | !.vscode/launch.json
193 | !.vscode/extensions.json
194 | !.vscode/*.code-snippets
195 | 
196 | # Local History for Visual Studio Code
197 | .history/
198 | 
199 | # Built Visual Studio Code Extensions
200 | *.vsix
201 | 
202 | ### VisualStudioCode Patch ###
203 | # Ignore all local history of files
204 | .history
205 | .ionide
206 | 
207 | # End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,venv
208 | 
209 | faiss_index/


--------------------------------------------------------------------------------
/retrieval/vector-search/chat-with-your-knowledge/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/memgraph/ai-demos/c2e49d1b320cabd1106993c9d1d83f8de9b3ec17/retrieval/vector-search/chat-with-your-knowledge/README.md


--------------------------------------------------------------------------------
/retrieval/vector-search/chat-with-your-knowledge/app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from controller import StorageController, LLMController
  3 | 
  4 | @st.cache_resource
  5 | def get_controller():
  6 |     return StorageController()
  7 | 
  8 | 
  9 | @st.cache_resource
 10 | def get_llm_controller():
 11 |     return LLMController()
 12 | 
 13 | 
 14 | controller = get_controller()
 15 | llm_controller = get_llm_controller()
 16 | 
 17 | # --- Sidebar Navigation ---
 18 | st.sidebar.title("📂 Navigation")
 19 | page = st.sidebar.radio(
 20 |     "Go to",
 21 |     [
 22 |         "Ingest Wikipedia",
 23 |         "Ingest by Yourself",
 24 |         "Dataset Exploration",
 25 |         "Chat With Your Knowledge",
 26 |         "Generate Pub Quiz",
 27 |     ],
 28 | )
 29 | 
 30 | 
 31 | # --- Shared helpers ---
 32 | def difficulty_flag(level: str) -> str:
 33 |     level = level.lower()
 34 |     return {"easy": "🟢", "medium": "🟡", "hard": "🔴"}.get(level, "⚪️")
 35 | 
 36 | 
 37 | # --- Shared language prefix input ---
 38 | st.sidebar.markdown("### Language Settings")
 39 | lang_prefix = st.sidebar.text_input("Optional language prefix", value="en")
 40 | 
 41 | # ==============================
 42 | # 📥 Ingest Wikipedia
 43 | # ==============================
 44 | if page == "Ingest Wikipedia":
 45 |     st.title("📥 Ingest Wikipedia Page into Memgraph")
 46 | 
 47 |     with st.form("ingest_form"):
 48 |         category = st.text_input("Enter Wikipedia page title", value="")
 49 |         save_as_category = st.text_input(
 50 |             "Save as category (empty will save with same name)", value=""
 51 |         )
 52 |         ingestion_mode = st.radio(
 53 |             "Ingestion mode", options=["Ingest from scratch", "Update dataset"], index=0
 54 |         )
 55 |         section_filter = st.text_input(
 56 |             "Target section (e.g. Plot, Reception, Cast)", value=""
 57 |         )
 58 |         submitted = st.form_submit_button("Ingest")
 59 | 
 60 |         if submitted:
 61 |             with st.spinner("🔄 Ingesting and creating vector index..."):
 62 |                 mode = (
 63 |                     "replace" if ingestion_mode == "Ingest from scratch" else "append"
 64 |                 )
 65 |                 has_section_filter = (
 66 |                     section_filter is not None and len(section_filter) > 0
 67 |                 )
 68 |                 method = "detailed" if has_section_filter else "quick"
 69 |                 section = section_filter
 70 |                 count = controller.ingest_wikipedia(
 71 |                     category,
 72 |                     save_as_category,
 73 |                     lang_prefix,
 74 |                     mode=mode,
 75 |                     method=method,
 76 |                     section_filter=section_filter if has_section_filter else None,
 77 |                 )
 78 |                 if count is not None:
 79 |                     verb = "Replaced" if mode == "replace" else "Appended"
 80 |                     st.success(
 81 |                         f"✅ {verb} {count} paragraphs from '{category}' into storage."
 82 |                     )
 83 |                 else:
 84 |                     st.success(
 85 |                         f"✅ Paragraphs from '{category}' already exist in storage!"
 86 |                     )
 87 | 
 88 | # ==============================
 89 | # ✍️ Ingest by Yourself
 90 | # ==============================
 91 | elif page == "Ingest by Yourself":
 92 |     st.title("✍️ Ingest a Custom Paragraph")
 93 | 
 94 |     available_categories = controller.get_all_categories()
 95 | 
 96 |     with st.form("custom_ingest_form"):
 97 |         st.markdown("#### Paste your content")
 98 |         user_paragraph = st.text_area("Text to ingest", height=300)
 99 | 
100 |         st.markdown("#### Choose where to save it")
101 |         existing_label = st.selectbox(
102 |             "Save to existing label:", options=available_categories + [""]
103 |         )
104 |         new_label = st.text_input(
105 |             "Or enter a new label (will override above if filled):"
106 |         )
107 | 
108 |         submitted = st.form_submit_button("📥 Ingest Text")
109 | 
110 |         if submitted:
111 |             if not user_paragraph.strip():
112 |                 st.warning("⚠️ Please paste some text.")
113 |             else:
114 |                 target_label = (
115 |                     new_label.strip() if new_label.strip() else existing_label
116 |                 )
117 |                 if not target_label:
118 |                     st.warning("⚠️ Please select or enter a category name.")
119 |                 else:
120 |                     with st.spinner("Embedding and saving..."):
121 |                         count = controller.ingest_custom_text(
122 |                             target_label,
123 |                             user_paragraph,
124 |                             lang_prefix=lang_prefix,
125 |                             mode="append",
126 |                         )
127 |                         st.success(f"✅ Ingested 1 paragraph into '{target_label}'.")
128 | 
129 | # ==============================
130 | # 📊 Dataset Exploration
131 | # ==============================
132 | elif page == "Dataset Exploration":
133 |     st.title("📊 Explore Your Ingested Dataset")
134 | 
135 |     available_categories = controller.get_all_categories()
136 |     if not available_categories:
137 |         st.info("ℹ️ No datasets found. Please ingest something first.")
138 |     else:
139 |         selected_category = st.selectbox(
140 |             "Select a category to explore:", options=available_categories
141 |         )
142 |         if st.button("🔍 Retrieve Dataset"):
143 |             with st.spinner(f"Retrieving paragraphs from '{selected_category}'..."):
144 |                 paragraphs = controller.get_all_paragraphs_from_category(
145 |                     selected_category
146 |                 )
147 |                 if not paragraphs:
148 |                     st.warning("No paragraphs found for the selected category.")
149 |                 else:
150 |                     st.success(f"✅ Found {len(paragraphs)} paragraphs.")
151 |                     for i, item in enumerate(paragraphs):
152 |                         with st.expander(f"📄 Paragraph {i+1}", expanded=False):
153 |                             st.markdown(item["content"])
154 | 
155 | 
156 | # ==============================
157 | # 💬 Chat With Your Knowledge (Chatbot)
158 | # ==============================
159 | elif page == "Chat With Your Knowledge":
160 |     st.title("💬 Chat with Your Knowledge")
161 | 
162 |     available_categories = controller.get_all_categories()
163 |     if not available_categories:
164 |         st.info("ℹ️ No categories ingested yet. Please ingest some data first.")
165 |     else:
166 |         category = st.selectbox(
167 |             "Select a page to chat with:", options=available_categories
168 |         )
169 | 
170 |         # Initialize chat history in session
171 |         if "chat_history" not in st.session_state:
172 |             st.session_state.chat_history = []
173 | 
174 |         # Display previous chat messages
175 |         for msg in st.session_state.chat_history:
176 |             with st.chat_message(msg["role"]):
177 |                 st.markdown(msg["content"])
178 | 
179 |         # Chat input box
180 |         user_input = st.chat_input("Ask a question about the selected page...")
181 |         if user_input:
182 |             st.chat_message("user").markdown(user_input)
183 |             st.session_state.chat_history.append(
184 |                 {"role": "user", "content": user_input}
185 |             )
186 | 
187 |             # Semantic search
188 |             with st.spinner("🔍 Retrieving relevant knowledge..."):
189 |                 context = controller.get_similar_documents(category, user_input, 10)
190 | 
191 |             # Generate answer
192 |             with st.spinner("🧠 GPT-4o is thinking..."):
193 |                 answer = llm_controller.answer_question_based_on_excerpts(
194 |                     user_input, context, lang_prefix
195 |                 )
196 | 
197 |             # Display bot response
198 |             st.chat_message("assistant").markdown(answer)
199 |             st.session_state.chat_history.append(
200 |                 {"role": "assistant", "content": answer}
201 |             )
202 | 
203 |             # Optional: display excerpts in a toggle box
204 |             with st.expander("📚 View source excerpts"):
205 |                 for i, excerpt in enumerate(context):
206 |                     st.markdown(f"**Excerpt {i+1}:**")
207 |                     st.markdown(excerpt)
208 | 
209 | 
210 | # ==============================
211 | # 🧠 Generate Pub Quiz
212 | # ==============================
213 | elif page == "Generate Pub Quiz":
214 |     st.title("🧠 Generate a Pub Quiz")
215 | 
216 |     available_categories = controller.get_all_categories()
217 |     if not available_categories:
218 |         st.info("ℹ️ No categories ingested yet. Please ingest a some data first.")
219 |     else:
220 |         category = st.selectbox("Select a page:", options=available_categories)
221 |         number_of_questions = st.number_input(
222 |             "Number of questions", min_value=1, max_value=50, value=5, step=1
223 |         )
224 |         better_explanation = st.text_input(
225 |             "What kind of questions would you like to focus on?",
226 |             value="No specific kind.",
227 |         )
228 | 
229 |         if st.button("🎲 Generate Pub Quiz"):
230 |             with st.spinner("Selecting paragraphs and generating quiz..."):
231 |                 quiz = llm_controller.generate_quiz(
232 |                     category, number_of_questions, lang_prefix, better_explanation
233 |                 )
234 |                 if quiz is None:
235 |                     st.warning("Unable to generate quiz!")
236 |                 else:
237 |                     for i, qa in enumerate(quiz, 1):
238 |                         st.markdown(
239 |                             f"**{difficulty_flag(qa['difficulty'])} Q{i}:** {qa['question']}"
240 |                         )
241 |                         with st.expander("Show Answer", expanded=True):
242 |                             st.markdown(f"**A{i}:** {qa['answer']}")
243 |                         with st.expander("Show Explanation", expanded=True):
244 |                             st.markdown(f"**E{i}:** {qa['explanation']}")
245 |                         st.markdown("---")
246 | 


--------------------------------------------------------------------------------
/retrieval/vector-search/chat-with-your-knowledge/controller.py:
--------------------------------------------------------------------------------
  1 | from embeddings import EmbeddingGenerator
  2 | from memgraph_storage import MemgraphStorage
  3 | from dotenv import load_dotenv
  4 | from openai import OpenAI
  5 | from wikipedia_processor import WikipediaProcessor
  6 | from wikipedia_detailed_processor import DetailedWikipediaProcessor
  7 | import os
  8 | import json
  9 | import re
 10 | from typing import List
 11 | 
 12 | load_dotenv()
 13 | 
 14 | def get_ks_storage():
 15 |     return MemgraphStorage()
 16 | 
 17 | 
 18 | def sanitize_category(category: str) -> str:
 19 |     # Converts label to valid Cypher identifier (e.g., no spaces or special chars)
 20 |     return re.sub(r"[^a-zA-Z0-9_]", "_", category)
 21 | 
 22 | 
 23 | def extract_json(text: str) -> str:
 24 |     """Extract a JSON string from Markdown-style code blocks or plain output."""
 25 |     match = re.search(r"```json\s*(.*?)```", text, re.DOTALL)
 26 |     if match:
 27 |         return match.group(1).strip()
 28 |     return text.strip()
 29 | 
 30 | 
 31 | class StorageController:
 32 |     def __init__(self):
 33 |         self._storage = get_ks_storage()
 34 |         self._embedding_generator = EmbeddingGenerator()
 35 |         self._wikipedia_processor = WikipediaProcessor()
 36 |         self._wikipedi_detailed_processor = DetailedWikipediaProcessor()
 37 | 
 38 |     def get_all_categories(self) -> List[str]:
 39 |         return self._storage.get_all_categories()
 40 | 
 41 |     def ingest_wikipedia(
 42 |         self, category, save_as_category, lang_prefix, mode="replace", method="quick", section_filter=None
 43 |     ):
 44 |         if len(category) == 0:
 45 |             return 0
 46 |         category = sanitize_category(category)
 47 | 
 48 |         if len(save_as_category) == 0:
 49 |             save_as_category = category
 50 |         save_as_category = sanitize_category(save_as_category)
 51 |         
 52 |         if method == "quick":
 53 |             paragraphs, embeddings = (
 54 |                 self._wikipedia_processor.process_wikipedia_documents(
 55 |                     category, lang_prefix
 56 |                 )
 57 |             )
 58 |         else:
 59 |         # Else detailed
 60 |             paragraphs, embeddings = (
 61 |                 self._wikipedi_detailed_processor.process_detailed_sections(
 62 |                     category, lang_prefix, section_filter
 63 |                 )
 64 |             )
 65 | 
 66 |         if len(paragraphs) == 0:
 67 |             return 0
 68 | 
 69 |         return self._storage.ingest_paragraphs(
 70 |             save_as_category, paragraphs, embeddings, lang_prefix, mode
 71 |         )
 72 | 
 73 |     def get_similar_documents(self, category: str, question: str, n: int) -> List[str]:
 74 |         category = sanitize_category(category)
 75 |         query_vector = self._embedding_generator.get_question_embedding(question)
 76 | 
 77 |         results = self._storage.get_similar_documents(category, query_vector, n)
 78 | 
 79 |         context = [result["content"] for result in results]
 80 |         return context
 81 | 
 82 |     def get_paragraph_ids(self, category: str) -> List[int]:
 83 |         category = sanitize_category(category)
 84 |         return self._storage.get_paragraph_ids(category)
 85 |     
 86 |     def get_all_paragraphs_from_category(self, category: str):
 87 |         category = sanitize_category(category)
 88 |         return self._storage.get_all_paragraphs(category)
 89 | 
 90 |     
 91 |     def ingest_custom_text(self, category, paragraph, lang_prefix="custom", mode="append"):
 92 |         category = sanitize_category(category)
 93 |         paragraphs = [paragraph.strip()]
 94 |         embeddings = self._embedding_generator.get_embeddings(paragraphs)
 95 |         return self._storage.ingest_paragraphs(category, paragraphs, embeddings, lang_prefix, mode)
 96 |     
 97 |     def delete_paragraph(self, category: str, paragraph_id: str):
 98 |         category = sanitize_category(category)
 99 |         return self._storage.delete_paragraph(category, paragraph_id)
100 | 
101 | 
102 | class LLMController:
103 |     def __init__(self):
104 |         self._client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
105 |         self._storage = get_ks_storage()
106 | 
107 |     def answer_question_based_on_excerpts(
108 |         self, question: str, context: List[str], lang_prefix: str
109 |     ) -> str:
110 |         context_text = "\n\n".join(context)
111 | 
112 |         prompt = f"""
113 |     Using only the information from the following Wikipedia excerpts, answer the question below in two parts:
114 | 
115 |     1. Provide a **brief answer** (1–2 sentences max) that directly and clearly addresses the question. Begin it with "Short answer:".
116 |     2. Then write a **detailed, coherent paragraph** that explains the answer using strictly the content from the excerpts. Begin it with "Coherent answer:".
117 | 
118 |     **Do not use any external knowledge** or make assumptions. If the answer is not present in the excerpts, clearly state that the information is not available.
119 | 
120 |     Avoid repeating phrases or listing bullet points. The longer explanation should read as a natural, well-written summary suitable for someone unfamiliar with the topic.
121 | 
122 |     Respond in the following language: {lang_prefix}
123 | 
124 |     Question: "{question}"
125 | 
126 |     Excerpts:
127 |     {context_text}
128 | 
129 |     Answer:
130 |     """
131 | 
132 |         response = self._client.chat.completions.create(
133 |             model="gpt-4o",
134 |             messages=[{"role": "user", "content": prompt}],
135 |             temperature=0.0,
136 |         )
137 |         answer = response.choices[0].message.content
138 | 
139 |         return answer
140 | 
141 |     def generate_quiz(
142 |         self,
143 |         category: str,
144 |         number_of_questions: int,
145 |         lang_prefix: str,
146 |         better_explanation: str,
147 |     ):
148 |         category = sanitize_category(category)
149 |         results = self._storage.sample_n_connected_paragraphs(
150 |             category, number_of_questions
151 |         )
152 |         if not results:
153 |             return None
154 |         context = [r["content"] for r in results]
155 | 
156 |         if len(context) == 0:
157 |             return None
158 | 
159 |         context_text = "\n\n".join(context)
160 | 
161 |         quiz_prompt = f"""
162 |         You are a pub quiz master. Using only the information in the text below, generate {number_of_questions} fun and challenging quiz questions with their answers.
163 |         Questions should be easy to medium challenging. You can use your external knowledge to judge whether the question is something that the pub quiz participants can know. 
164 |         You can not use external knowledge to form your answers, they should be only formed from the below text information.
165 | 
166 |         In addition to the question and answer, also include a short **explanation** that clearly states where and how the answer was derived from the provided text. This explanation should help someone understand the context or logic behind the answer, based solely on the text.
167 | 
168 |         Respond in the following language: **{lang_prefix}**.
169 | 
170 |         Focus only on facts from the content itself (avoid questions about references, publications, or sources).
171 |         
172 |         Here is an additional instruction for you to focus on specific questions:
173 |         {better_explanation}
174 | 
175 |         Return a valid JSON array of {number_of_questions} objects. Each object must contain:
176 |         - "question": the quiz question
177 |         - "difficulty": "easy", "medium", "hard"
178 |         - "answer": the correct answer
179 |         - "explanation": a concise 2–3 sentence explanation based solely on the text which offers any relevant information to the answer
180 | 
181 |         Example:
182 |         [
183 |         {{
184 |             "question": "What year was Rome founded?",
185 |             "difficulty": "Easy",
186 |             "answer": "753 BC",
187 |             "explanation": "The text states that Rome was founded in 753 BC by Romulus. This marks the beginning of Roman history according to tradition."
188 |         }},
189 |         ...
190 |         ]
191 | 
192 |         Text:
193 |         {context_text}
194 |         """
195 | 
196 |         response = self._client.chat.completions.create(
197 |             model="gpt-4o",
198 |             messages=[{"role": "user", "content": quiz_prompt}],
199 |             temperature=0.7,
200 |         )
201 | 
202 |         raw_output = response.choices[0].message.content.strip()
203 |         cleaned_output = extract_json(raw_output)
204 | 
205 |         try:
206 |             quiz = json.loads(cleaned_output)
207 |             return quiz
208 |         except json.JSONDecodeError as e:
209 |             raise e
210 | 


--------------------------------------------------------------------------------
/retrieval/vector-search/chat-with-your-knowledge/embeddings.py:
--------------------------------------------------------------------------------
 1 | from sentence_transformers import SentenceTransformer
 2 | from typing import List
 3 | 
 4 | model_name = "all-mpnet-base-v2"
 5 | 
 6 | class EmbeddingGenerator:
 7 |     def __init__(self):
 8 |         self._model = SentenceTransformer(model_name)
 9 |     
10 |     def get_embeddings(self, paragraphs: List[str]):
11 |         embeddings = self._model.encode(paragraphs, convert_to_numpy=True)
12 |         return embeddings
13 | 
14 |     def get_question_embedding(self, query: str):
15 |         query_vector = self._model.encode(query).tolist()
16 |         return query_vector
17 | 


--------------------------------------------------------------------------------
/retrieval/vector-search/chat-with-your-knowledge/memgraph_storage.py:
--------------------------------------------------------------------------------
  1 | from gqlalchemy import Memgraph
  2 | from storage import Storage
  3 | import random
  4 | import uuid
  5 | 
  6 | from typing import List
  7 | 
  8 | 
  9 | class MemgraphStorage(Storage):
 10 |     def __init__(self):
 11 |         super().__init__()
 12 |         self._memgraph = Memgraph()
 13 |         self._memgraph.execute("CREATE INDEX ON :All")
 14 | 
 15 |     def get_all_categories(self):
 16 |         results = self._memgraph.execute_and_fetch(
 17 |             """
 18 |             MATCH (n) 
 19 |             WITH labels(n) AS l
 20 |             UNWIND l AS ll
 21 |             RETURN DISTINCT ll AS label
 22 |             ORDER BY label"""
 23 |         )
 24 |         return [record["label"] for record in results]
 25 | 
 26 |     def get_similar_documents(self, label: str, query_vector: str, n: int):
 27 |         results = self._memgraph.execute_and_fetch(
 28 |             f"""
 29 |             CALL vector_search.search("{label.lower()}_vector_index", {n}, $query_vector)
 30 |             YIELD node, similarity
 31 |             RETURN node.content AS content, similarity
 32 |             """,
 33 |             {"query_vector": query_vector},
 34 |         )
 35 | 
 36 |         return results
 37 | 
 38 |     def get_paragraph_ids(self, category: str) -> List[int]:
 39 |         ids = list(
 40 |             self._memgraph.execute_and_fetch(f"MATCH (p:{category}) RETURN p.id AS id")
 41 |         )
 42 |         return [x["id"] for x in ids]
 43 | 
 44 |     def sample_n_connected_paragraphs(self, category: str, number_of_questions: int):
 45 |         ids = self.get_paragraph_ids(category)
 46 |         if not len(ids):
 47 |             return None
 48 | 
 49 |         sample_size = min(number_of_questions, len(ids))
 50 | 
 51 |         start_ids = random.sample(ids, k=sample_size)
 52 |         results = list(
 53 |             self._memgraph.execute_and_fetch(
 54 |                 f"""
 55 |             UNWIND $ids AS id
 56 |             MATCH path=(p:{category} {{id: id}})-[:NEXT *bfs 0..5]->(next)
 57 |             WITH project(path) as graph
 58 |             UNWIND graph.nodes as nodes
 59 |             RETURN nodes.content AS content
 60 |             ORDER BY nodes.index ASC
 61 |             """,
 62 |                 {"ids": start_ids},
 63 |             )
 64 |         )
 65 |         if len(results) == 0:
 66 |             results = list(
 67 |                 self._memgraph.execute_and_fetch(
 68 |                     f"""
 69 |                 UNWIND $ids AS id
 70 |                 MATCH (node:{category} {{id: id}})
 71 |                 RETURN node.content AS content
 72 |                 ORDER BY node.index ASC
 73 |                 """,
 74 |                     {"ids": start_ids},
 75 |                 )
 76 |             )
 77 | 
 78 |         return results
 79 | 
 80 |     def ingest_paragraphs(
 81 |         self,
 82 |         category: str,
 83 |         paragraphs: List,
 84 |         embeddings: List,
 85 |         lang_prefix: str,
 86 |         mode: str,
 87 |     ):
 88 |         if mode == "replace":
 89 |             self._memgraph.execute("STORAGE MODE IN_MEMORY_ANALYTICAL")
 90 |             self._memgraph.execute("DROP GRAPH")
 91 |             self._memgraph.execute("CREATE INDEX ON :All")
 92 | 
 93 |         paragraph_nodes = []
 94 |         for idx, (text, vector) in enumerate(zip(paragraphs, embeddings)):
 95 |             para_id = str(uuid.uuid4())
 96 |             vector_list = vector.tolist()
 97 |             content = text.strip()
 98 | 
 99 |             # Create the paragraph node
100 |             self._memgraph.execute(
101 |                 f"""
102 |                 CREATE (p:{category}:All {{
103 |                     id: $id,
104 |                     content: $content,
105 |                     page: $page,
106 |                     index: $idx,
107 |                     vector: $vector,
108 |                     lang_prefix: $lang_prefix
109 |                 }})
110 |                 """,
111 |                 {
112 |                     "id": para_id,
113 |                     "content": content,
114 |                     "page": category,
115 |                     "idx": idx,
116 |                     "vector": vector_list,
117 |                     "lang_prefix": lang_prefix,
118 |                 },
119 |             )
120 |             paragraph_nodes.append((para_id, idx))
121 | 
122 |         # Create :NEXT relationships between consecutive paragraphs
123 |         for (id1, _), (id2, _) in zip(paragraph_nodes[:-1], paragraph_nodes[1:]):
124 |             self._memgraph.execute(
125 |                 f"""
126 |                 MATCH (p1:{category} {{id: $id1}}), (p2:{category} {{id: $id2}})
127 |                 CREATE (p1)-[:NEXT]->(p2)
128 |                 """,
129 |                 {"id1": id1, "id2": id2},
130 |             )
131 | 
132 |         dimension = len(embeddings[0])
133 |         capacity = len(embeddings) * 2
134 | 
135 |         index_name = f"{category.lower()}_vector_index"
136 |         self._memgraph.execute(
137 |             f"""
138 |             CREATE VECTOR INDEX {index_name} ON :{category}(vector)
139 |             WITH CONFIG {{
140 |                 "dimension": {dimension},
141 |                 "capacity": {capacity},
142 |                 "metric": "cos"
143 |             }}
144 |         """
145 |         )
146 | 
147 |         return len(paragraphs)
148 | 
149 |     def get_all_paragraphs(self, category: str) -> List[str]:
150 |         results = self._memgraph.execute_and_fetch(
151 |             f"""
152 |             MATCH (p:{category})
153 |             RETURN p.content AS content, p.id as id
154 |             ORDER BY p.index ASC
155 |             """
156 |         )
157 |         return [
158 |             {"content": record["content"], "id": record["id"]} for record in results
159 |         ]
160 | 
161 |     def delete_paragraph(self, category: str, paragraph_id: str):
162 |         self._memgraph.execute(
163 |             """
164 |             MATCH (p {id: $id})
165 |             DETACH DELETE p
166 |             """,
167 |             {"id": paragraph_id},
168 |         )
169 | 


--------------------------------------------------------------------------------
/retrieval/vector-search/chat-with-your-knowledge/requirements.txt:
--------------------------------------------------------------------------------
  1 | adlfs==2024.12.0
  2 | aiohappyeyeballs==2.6.1
  3 | aiohttp==3.11.17
  4 | aiosignal==1.3.2
  5 | altair==5.5.0
  6 | annotated-types==0.7.0
  7 | anyio==4.9.0
  8 | async-timeout==5.0.1
  9 | attrs==25.3.0
 10 | azure-core==1.33.0
 11 | azure-datalake-store==0.0.53
 12 | azure-identity==1.21.0
 13 | azure-storage-blob==12.25.1
 14 | banks==2.1.2
 15 | beautifulsoup4==4.13.4
 16 | blinker==1.9.0
 17 | cachetools==5.5.2
 18 | certifi==2025.1.31
 19 | cffi==1.17.1
 20 | charset-normalizer==3.4.1
 21 | click==8.1.8
 22 | colorama==0.4.6
 23 | cryptography==44.0.2
 24 | dacite==1.9.2
 25 | dataclasses-json==0.6.7
 26 | Deprecated==1.2.18
 27 | dirtyjson==1.0.8
 28 | distro==1.9.0
 29 | exceptiongroup==1.2.2
 30 | faiss-cpu==1.10.0
 31 | filelock==3.18.0
 32 | filetype==1.2.0
 33 | frozenlist==1.6.0
 34 | fsspec==2025.3.2
 35 | gitdb==4.0.12
 36 | GitPython==3.1.44
 37 | GQLAlchemy==1.7.0
 38 | greenlet==3.2.0
 39 | griffe==1.7.2
 40 | h11==0.14.0
 41 | httpcore==1.0.8
 42 | httpx==0.28.1
 43 | huggingface-hub==0.30.2
 44 | idna==3.10
 45 | isodate==0.7.2
 46 | Jinja2==3.1.6
 47 | jiter==0.9.0
 48 | joblib==1.4.2
 49 | jsonschema==4.23.0
 50 | jsonschema-specifications==2024.10.1
 51 | llama-cloud==0.1.18
 52 | llama-cloud-services==0.6.12
 53 | llama-index==0.12.31
 54 | llama-index-agent-openai==0.4.6
 55 | llama-index-cli==0.4.1
 56 | llama-index-core==0.12.31
 57 | llama-index-embeddings-openai==0.3.1
 58 | llama-index-indices-managed-llama-cloud==0.6.11
 59 | llama-index-llms-openai==0.3.37
 60 | llama-index-multi-modal-llms-openai==0.4.3
 61 | llama-index-program-openai==0.3.1
 62 | llama-index-question-gen-openai==0.3.0
 63 | llama-index-readers-file==0.4.7
 64 | llama-index-readers-llama-parse==0.4.0
 65 | llama-index-readers-wikipedia==0.3.0
 66 | llama-parse==0.6.12
 67 | MarkupSafe==3.0.2
 68 | marshmallow==3.26.1
 69 | mpmath==1.3.0
 70 | msal==1.32.0
 71 | msal-extensions==1.3.1
 72 | multidict==6.4.3
 73 | mypy-extensions==1.0.0
 74 | narwhals==1.35.0
 75 | neo4j==5.28.1
 76 | nest-asyncio==1.6.0
 77 | networkx==3.4.2
 78 | nltk==3.9.1
 79 | numpy==1.26.4
 80 | nvidia-cublas-cu12==12.4.5.8
 81 | nvidia-cuda-cupti-cu12==12.4.127
 82 | nvidia-cuda-nvrtc-cu12==12.4.127
 83 | nvidia-cuda-runtime-cu12==12.4.127
 84 | nvidia-cudnn-cu12==9.1.0.70
 85 | nvidia-cufft-cu12==11.2.1.3
 86 | nvidia-curand-cu12==10.3.5.147
 87 | nvidia-cusolver-cu12==11.6.1.9
 88 | nvidia-cusparse-cu12==12.3.1.170
 89 | nvidia-cusparselt-cu12==0.6.2
 90 | nvidia-nccl-cu12==2.21.5
 91 | nvidia-nvjitlink-cu12==12.4.127
 92 | nvidia-nvtx-cu12==12.4.127
 93 | openai==1.75.0
 94 | packaging==24.2
 95 | pandas==2.2.3
 96 | pillow==11.2.1
 97 | platformdirs==4.3.7
 98 | propcache==0.3.1
 99 | protobuf==5.29.4
100 | psutil==6.1.1
101 | pyarrow==19.0.1
102 | pycparser==2.22
103 | pydantic==2.11.3
104 | pydantic_core==2.33.1
105 | pydeck==0.9.1
106 | PyJWT==2.10.1
107 | pymgclient==1.3.1
108 | pypdf==5.4.0
109 | python-dateutil==2.9.0.post0
110 | python-dotenv==1.1.0
111 | pytz==2025.2
112 | PyYAML==6.0.2
113 | referencing==0.36.2
114 | regex==2024.11.6
115 | requests==2.32.3
116 | rpds-py==0.24.0
117 | safetensors==0.5.3
118 | scikit-learn==1.6.1
119 | scipy==1.15.2
120 | sentence-transformers==4.1.0
121 | six==1.17.0
122 | smmap==5.0.2
123 | sniffio==1.3.1
124 | soupsieve==2.6
125 | SQLAlchemy==2.0.40
126 | streamlit==1.44.1
127 | striprtf==0.0.26
128 | sympy==1.13.1
129 | tenacity==9.1.2
130 | threadpoolctl==3.6.0
131 | tiktoken==0.9.0
132 | tokenizers==0.21.1
133 | toml==0.10.2
134 | torch==2.6.0
135 | tornado==6.4.2
136 | tqdm==4.67.1
137 | transformers==4.51.3
138 | triton==3.2.0
139 | typing-inspect==0.9.0
140 | typing-inspection==0.4.0
141 | typing_extensions==4.13.2
142 | tzdata==2025.2
143 | urllib3==2.4.0
144 | watchdog==6.0.0
145 | wikipedia==1.4.0
146 | Wikipedia-API==0.8.1
147 | wrapt==1.17.2
148 | yarl==1.20.0
149 | 


--------------------------------------------------------------------------------
/retrieval/vector-search/chat-with-your-knowledge/storage.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from typing import List
 3 | 
 4 | class Storage(ABC):
 5 |     def get_all_categories(self):
 6 |         pass
 7 |     
 8 |     def ingest_category(self):
 9 |         pass
10 |     
11 |     def get_similar_documents(self, label: str, question: str, n: int):
12 |         pass
13 |     
14 |     def get_paragraph_ids(self, category: str):
15 |         pass
16 |     
17 |     def sample_n_connected_paragraphs(self, category: str, number_of_questions: int):
18 |         pass
19 | 
20 |     def ingest_paragraphs(self, category: str, paragraphs: List[str], embeddings: List, lang_prefix: str, mode: str):
21 |         pass


--------------------------------------------------------------------------------
/retrieval/vector-search/chat-with-your-knowledge/wikipedia_detailed_processor.py:
--------------------------------------------------------------------------------
 1 | import wikipediaapi
 2 | from embeddings import EmbeddingGenerator
 3 | 
 4 | 
 5 | class DetailedWikipediaProcessor:
 6 |     def __init__(self):
 7 |         self._wiki = wikipediaapi.Wikipedia(
 8 |             language="en",
 9 |             user_agent='user_agent="WikiReaderBot/1.0 (mrdjen.josip@gmail.com)',
10 |             extract_format=wikipediaapi.ExtractFormat.WIKI
11 |         )
12 |         self._embeddings_generator = EmbeddingGenerator()
13 | 
14 |     def _extract_paragraphs(self, section, min_length=40) -> list[str]:
15 |         paragraphs = []
16 |         if len(section.text.strip()) > min_length:
17 |             paragraphs.extend(
18 |                 [
19 |                     p.strip()
20 |                     for p in section.text.split("\n")
21 |                     if len(p.strip()) > min_length
22 |                 ]
23 |             )
24 |         for sub_section in section.sections:
25 |             paragraphs.extend(self._extract_paragraphs(sub_section))
26 |         return paragraphs
27 | 
28 |     def process_detailed_sections(
29 |         self, category: str, language_prefix: str = "en", section_filter: str = None
30 |     ):
31 |         self._wiki = wikipediaapi.Wikipedia(
32 |             language="en",
33 |             user_agent='user_agent="WikiReaderBot/1.0 (mrdjen.josip@gmail.com)',
34 |             extract_format=wikipediaapi.ExtractFormat.WIKI
35 |         )
36 |         page = self._wiki.page(category)
37 | 
38 |         if not page.exists():
39 |             print(f"❌ Page '{category}' does not exist.")
40 |             return [], []
41 | 
42 |         if section_filter:
43 |             # Only grab sections that match the filter
44 |             for section in page.sections:
45 |                 if section.title.lower() == section_filter.lower():
46 |                     paragraphs = self._extract_paragraphs(section)
47 |                     break
48 |             else:
49 |                 print(f"⚠️ Section '{section_filter}' not found.")
50 |                 return [], []
51 |         else:
52 |             # Grab all content recursively
53 |             paragraphs = self._extract_paragraphs(page)
54 | 
55 |         embeddings = self._embeddings_generator.get_embeddings(paragraphs)
56 |         return paragraphs, embeddings
57 | 


--------------------------------------------------------------------------------
/retrieval/vector-search/chat-with-your-knowledge/wikipedia_processor.py:
--------------------------------------------------------------------------------
 1 | from llama_index.readers.wikipedia import WikipediaReader
 2 | from embeddings import EmbeddingGenerator
 3 | 
 4 | class WikipediaProcessor:
 5 |     def __init__(self):
 6 |         self._embeddings_generator = EmbeddingGenerator()
 7 | 
 8 |     def process_wikipedia_documents(self, category: str, language_prefix: str = ""):
 9 |         reader = WikipediaReader()
10 |         documents = reader.load_data(pages=[category], lang_prefix=language_prefix)
11 |         paragraphs = [p.strip() for doc in documents for p in doc.text.split('\n') if len(p.strip()) > 40]
12 |         embeddings = self._embeddings_generator.get_embeddings(paragraphs)
13 |         
14 |         return paragraphs, embeddings
15 | 
16 | 


--------------------------------------------------------------------------------
/retrieval/vector-search/simple-example/README.md:
--------------------------------------------------------------------------------
 1 | ### How to run
 2 | 
 3 | Follow these steps to run the `vector.py` script:
 4 | 
 5 | ### 1. Install dependencies
 6 | To run this demo, just first install Python requirements: 
 7 | 
 8 | ```
 9 | pip install -r requirements.txt
10 | ```
11 | 
12 | ### 2. Run the script 
13 | 
14 | Locate the script and in terminal run:
15 | 
16 | ```
17 | python vector.py
18 | ```
19 | 
20 | 


--------------------------------------------------------------------------------
/retrieval/vector-search/simple-example/requirements.txt:
--------------------------------------------------------------------------------
1 | neo4j==5.26.0
2 | sentence-transformers==3.2.1
3 | python-dotenv==1.0.1
4 | 


--------------------------------------------------------------------------------
/retrieval/vector-search/simple-example/vector.py:
--------------------------------------------------------------------------------
  1 | from sentence_transformers import SentenceTransformer
  2 | from dotenv import load_dotenv
  3 | import neo4j
  4 | import asyncio
  5 | from openai import AsyncOpenAI
  6 | import os
  7 | import json
  8 | from collections import Counter
  9 | from pathlib import Path
 10 | from time import sleep
 11 | 
 12 | 
 13 | def compute_tripplets_embeddings(driver, model):
 14 |     with driver.session() as session:
 15 |         # Retrieve all relationships
 16 |         result = session.run("MATCH (n:Person)-[r]->(m) RETURN n, r, m")
 17 |         print("Embedded data: ")
 18 | 
 19 |         for record in result:
 20 |             node1 = record["n"]
 21 |             relationship = record["r"]
 22 |             node2 = record["m"]
 23 |             # Check if the relationship already has an embedding
 24 |             if "embedding" in node1:
 25 |                 print("Embedding already exists")
 26 |                 return
 27 |             # Combine node labels and properties into a single string
 28 |             tripplet_data = (
 29 |                 " ".join(node1.labels)
 30 |                 + " "
 31 |                 + " ".join(f"{k}: {v}" for k, v in node1.items())
 32 |                 + " "
 33 |                 + relationship.type
 34 |                 + " "
 35 |                 + " ".join(f"{k}: {v}" for k, v in relationship.items())
 36 |                 + " "
 37 |                 + " ".join(node2.labels)
 38 |                 + " "
 39 |                 + " ".join(f"{k}: {v}" for k, v in node2.items())
 40 |             )
 41 |             print(tripplet_data)
 42 |             # Compute the embedding for the tripplet
 43 |             tripplet_embedding = model.encode(tripplet_data)
 44 | 
 45 |             # Store the tripplet data on node1
 46 |             session.run(
 47 |                 f"MATCH (n:Person) WHERE id(n) = {node1.element_id} SET n.embedding = {tripplet_embedding.tolist()}"
 48 |             )
 49 | 
 50 | 
 51 | 
 52 | def compute_node_embeddings(driver, model):
 53 |     with driver.session() as session:
 54 |         # Retrieve all nodes
 55 |         result = session.run("MATCH (n:Person) RETURN n")
 56 |         print("Embedded data: ")
 57 |         for record in result:
 58 |             node = record["n"]
 59 |             # Check if the node already has an embedding
 60 |             if "embedding" in node:
 61 |                 print("Embedding already exists")
 62 |                 return
 63 | 
 64 |             # Combine node labels and properties into a single string
 65 |             node_data = (
 66 |                 " ".join(node.labels)
 67 |                 + " "
 68 |                 + " ".join(f"{k}: {v}" for k, v in node.items())
 69 |             )
 70 |             print(node_data)
 71 |             # Compute the embedding for the node
 72 |             node_embedding = model.encode(node_data)
 73 | 
 74 |             # Store the embedding back into the node
 75 |             session.run(
 76 |                 f"MATCH (n) WHERE id(n) = {node.element_id} SET n.embedding = {node_embedding.tolist()}"
 77 |             )
 78 | 
 79 | 
 80 | def find_most_similar_node(driver, question_embedding):
 81 | 
 82 |     with driver.session() as session:
 83 |         # Perform the vector search on all nodes based on the question embedding
 84 |         result = session.run(
 85 |             f"CALL vector_search.search('person_index', 5, {question_embedding.tolist()}) YIELD * RETURN *;"
 86 |         )
 87 |         nodes_data = []
 88 | 
 89 |         # Retrieve all similar nodes and print them
 90 |         for record in result:
 91 |             node = record["node"]
 92 |             properties = {k: v for k, v in node.items() if k != "embedding"}
 93 |             node_data = {
 94 |                 "distance": record["distance"],
 95 |                 "id": node.element_id,
 96 |                 "labels": list(node.labels),
 97 |                 "properties": properties,
 98 |             }
 99 |             nodes_data.append(node_data)
100 |         print("All similar nodes:")
101 |         for node in nodes_data:
102 |             print(node)
103 | 
104 |         # Return the most similar node
105 |         return nodes_data[0] if nodes_data else None
106 | 
107 | 
108 | def seed_database(driver):
109 |     with driver.session() as session:
110 |         
111 |         # Clear the database
112 |         session.run("MATCH (n) DETACH DELETE n")
113 |         sleep(1)
114 | 
115 | 
116 | 
117 |         # Create a few nodes
118 |         session.run("CREATE (:Person {name: 'Alice', age: 30})")
119 |         session.run("CREATE (:Person {name: 'Bob', age: 25})")
120 |         session.run("CREATE (:Person {name: 'Charlie', age: 35})")
121 |         session.run("CREATE (:Person {name: 'David', age: 40})")
122 |         session.run("CREATE (:Person {name: 'Eve', age: 20})")
123 |         session.run("CREATE (:Person {name: 'Frank', age: 45})")
124 |         session.run("CREATE (:Person {name: 'Grace', age: 50})")
125 |         session.run("CREATE (:Person {name: 'Hannah', age: 55})")
126 |         session.run("CREATE (:Person {name: 'Jack', age: 65})")
127 |         
128 | 
129 |         session.run("CREATE (:Person {name: 'Peter', age: 30})")
130 |         session.run("CREATE (:Person {name: 'Peter', age: 60})")
131 |         session.run("CREATE (:Person {name: 'Peter', age: 90})")
132 |         session.run("CREATE (:Person {name: 'John', age: 30})")
133 |         session.run("CREATE (:Person {name: 'John', age: 60})")
134 |         session.run("CREATE (:Person {name: 'John', age: 90})")
135 |         session.run("CREATE (:Person {name: 'Petar', age: 30})")
136 |         session.run("CREATE (:Person {name: 'Petar', age: 60})")
137 |         session.run("CREATE (:Person {name: 'Petar', age: 90})")
138 | 
139 |         session.run("CREATE (:Bank {name: 'Deutsche Bank AG'})")
140 |         session.run("CREATE (:Bank {name: 'Commerzbank'})")
141 |         session.run("CREATE (:Bank {name: 'Unicredit Bank AG'})")
142 | 
143 |         session.run("CREATE (:Country {name: 'Germany'})")
144 |         session.run("CREATE (:Country {name: 'Canada'})")
145 | 
146 |         session.run("CREATE (:City {name: 'Munich'})")
147 | 
148 | 
149 |         session.run("MATCH (p:Person {name: 'Peter', age: 30}), (o:Country {name: 'Germany'}) MERGE (p)-[:LIVES_IN]->(o);")
150 |         session.run("MATCH (p:Person {name: 'John', age: 30}), (o:Country {name: 'Germany'}) MERGE (p)-[:LIVES_IN]->(o);")
151 |         session.run("MATCH (p:Person {name: 'Charlie', age: 35}), (o:Country {name: 'Germany'}) MERGE (p)-[:LIVES_IN]->(o);")
152 | 
153 | 
154 |         session.run("MATCH (p:Person {name: 'Peter', age: 60}), (b:Bank {name: 'Deutsche Bank AG'}) MERGE (p)-[:IS_CLIENT]->(b);")
155 |         session.run("MATCH (p:Person {name: 'John', age: 60}), (b:Bank {name: 'Commerzbank'}) MERGE (p)-[:IS_CLIENT]->(b);")
156 | 
157 | 
158 |         session.run("MATCH (p:Person {name: 'Bob', age: 25}), (o:Country {name: 'Canada'}) MERGE (p)-[:LIVES_IN]->(o);")
159 |         session.run("MATCH (p:Person {name: 'David', age: 40}), (o:Country {name: 'Canada'}) MERGE (p)-[:LIVES_IN]->(o);")
160 |         session.run("MATCH (p:Person {name: 'Eve', age: 20}), (o:Country {name: 'Canada'}) MERGE (p)-[:LIVES_IN]->(o);")
161 | 
162 |         session.run("MATCH (p:Person {name: 'Frank', age: 45}), (o:City {name: 'Munich'}) MERGE (p)-[:WORKS_IN]->(o);")
163 | 
164 |   
165 | 
166 | 
167 |         session.run("""CREATE VECTOR INDEX person_index ON :Person(embedding) WITH CONFIG {"dimension": 384, "capacity": 1000, "metric": "cos","resize_coefficient": 2}""")
168 | 
169 | 
170 | 
171 | def main(question, tripplets):
172 | 
173 |     print("The question: ", question)
174 |     driver = neo4j.GraphDatabase.driver("bolt://localhost:7687", auth=("", ""))
175 | 
176 |     # Seed the database with some data
177 |     seed_database(driver)
178 |     
179 |     # Load the SentenceTransformer model
180 |     model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
181 | 
182 |     if tripplets:
183 |         compute_tripplets_embeddings(driver, model)
184 |     else:
185 |         compute_node_embeddings(driver, model)
186 | 
187 |     question_embedding = model.encode(question)
188 |     most_similar_node = find_most_similar_node(driver, question_embedding)
189 | 
190 | 
191 | if __name__ == "__main__":
192 |     # question = "Is there any Person in the database?"
193 |     # question = "Is there any person Peter?"
194 |     # question = "Is there any person Petar?"
195 |     # question = "Is there any person that lives in Germany?"
196 |     # question = "Is there any Peter that lives in Germany?"
197 |     question = "Is there any Peter that lives in Munich?"
198 | 
199 |     main(question=question, tripplets=True)
200 | 


--------------------------------------------------------------------------------
/retrieval/vector-search/vector_search_example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Prerequisites\n",
  8 |     "\n",
  9 |     "- **Docker**  \n",
 10 |     "  Required to run Memgraph, as Memgraph is a native Linux application and cannot be installed directly on Windows or macOS.\n",
 11 |     "\n",
 12 |     "- **pandas**  \n",
 13 |     "  A fast, powerful, flexible, and easy-to-use open-source data analysis and manipulation tool, built on top of the Python programming language.\n",
 14 |     "\n",
 15 |     "- **kagglehub**  \n",
 16 |     "  A powerful and flexible library similar to pandas for data manipulation and analysis.\n",
 17 |     "\n",
 18 |     "- **sentence_transformers**  \n",
 19 |     "  A library for state-of-the-art sentence embeddings, similar in purpose to pandas but focused on text data.\n",
 20 |     "\n",
 21 |     "- **neo4j**  \n",
 22 |     "  Used to query Memgraph."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "# Build a Movie Similarity Search Engine with Vector Search in Memgraph\n",
 30 |     "\n",
 31 |     "In this example, we will demonstrate how vector search can be used to find movies based on their plots or short descriptions. For this, we will use the Wikipedia Movie Plots dataset, available on Kaggle.\n",
 32 |     "To get started, launch Memgraph. \n",
 33 |     "\n",
 34 |     "To start Memgraph, run:\n",
 35 |     "`docker run -p 7687:7687 -p 7444:7444 memgraph/memgraph:3.0.0`\n",
 36 |     "\n",
 37 |     "After that we need to create an Vector index in Memgraph, you can do it via Python client: "
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "import neo4j\n",
 47 |     "\n",
 48 |     "driver = neo4j.GraphDatabase.driver(\"bolt://localhost:7687\", auth=(\"\", \"\"))\n",
 49 |     "with driver.session() as session:\n",
 50 |     "    session.run(\"\"\"CREATE VECTOR INDEX movies_index ON :Movie(embedding) WITH CONFIG {\"dimension\": 384, \"capacity\": 100, \"metric\": \"cos\"};\"\"\")\n"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "We created a vector index `movies_index`  which is defined on label `Movie`  and property `embedding` .\n",
 58 |     "\n",
 59 |     "Firstly let’s load the dataset:"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 1,
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "name": "stdout",
 69 |      "output_type": "stream",
 70 |      "text": [
 71 |       "   Release Year                             Title Origin/Ethnicity  \\\n",
 72 |       "0          1901            Kansas Saloon Smashers         American   \n",
 73 |       "1          1901     Love by the Light of the Moon         American   \n",
 74 |       "2          1901           The Martyred Presidents         American   \n",
 75 |       "3          1901  Terrible Teddy, the Grizzly King         American   \n",
 76 |       "4          1902            Jack and the Beanstalk         American   \n",
 77 |       "\n",
 78 |       "                             Director Cast    Genre  \\\n",
 79 |       "0                             Unknown  NaN  unknown   \n",
 80 |       "1                             Unknown  NaN  unknown   \n",
 81 |       "2                             Unknown  NaN  unknown   \n",
 82 |       "3                             Unknown  NaN  unknown   \n",
 83 |       "4  George S. Fleming, Edwin S. Porter  NaN  unknown   \n",
 84 |       "\n",
 85 |       "                                           Wiki Page  \\\n",
 86 |       "0  https://en.wikipedia.org/wiki/Kansas_Saloon_Sm...   \n",
 87 |       "1  https://en.wikipedia.org/wiki/Love_by_the_Ligh...   \n",
 88 |       "2  https://en.wikipedia.org/wiki/The_Martyred_Pre...   \n",
 89 |       "3  https://en.wikipedia.org/wiki/Terrible_Teddy,_...   \n",
 90 |       "4  https://en.wikipedia.org/wiki/Jack_and_the_Bea...   \n",
 91 |       "\n",
 92 |       "                                                Plot  \n",
 93 |       "0  A bartender is working at a saloon, serving dr...  \n",
 94 |       "1  The moon, painted with a smiling face hangs ov...  \n",
 95 |       "2  The film, just over a minute long, is composed...  \n",
 96 |       "3  Lasting just 61 seconds and consisting of two ...  \n",
 97 |       "4  The earliest known adaptation of the classic f...  \n"
 98 |      ]
 99 |     }
100 |    ],
101 |    "source": [
102 |     "import pandas as pd\n",
103 |     "import kagglehub\n",
104 |     "\n",
105 |     "dataset_path = kagglehub.dataset_download(\"jrobischon/wikipedia-movie-plots\")\n",
106 |     "df = pd.read_csv(dataset_path + \"/wiki_movie_plots_deduped.csv\")\n",
107 |     "print(df.head())"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "This dataset consists of 32,432 movies. To keep the example simple and understandable, we will reduce the dataset size. We will filter movies based on the director."
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 2,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "name": "stdout",
124 |      "output_type": "stream",
125 |      "text": [
126 |       "(9, 8)\n"
127 |      ]
128 |     }
129 |    ],
130 |    "source": [
131 |     "nolan_movies = df[df['Director'] == 'Christopher Nolan']\n",
132 |     "nolan_movies.reset_index(drop=True, inplace=True)\n",
133 |     "print(nolan_movies.shape)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "We also need a function to compute embeddings:"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 3,
146 |    "metadata": {},
147 |    "outputs": [
148 |     {
149 |      "name": "stdout",
150 |      "output_type": "stream",
151 |      "text": [
152 |       "(9, 384)\n"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "from sentence_transformers import SentenceTransformer\n",
158 |     "\n",
159 |     "def compute_embeddings(texts):\n",
160 |     "    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')\n",
161 |     "    return model.encode(texts)\n",
162 |     "    \n",
163 |     "embeddings = compute_embeddings(nolan_movies['Plot'].values)\n",
164 |     "print(embeddings.shape)"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "Now, let’s import these movies into Memgraph:"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 4,
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": [
180 |     "import neo4j\n",
181 |     "\n",
182 |     "driver = neo4j.GraphDatabase.driver(\"bolt://localhost:7687\", auth=(\"\", \"\"))\n",
183 |     "with driver.session() as session:\n",
184 |     "    for index, row in nolan_movies.iterrows():\n",
185 |     "        # remove quotes from the title to avoid parsing issues\n",
186 |     "        title = row[\"Title\"].replace('\"', '')\n",
187 |     "        \n",
188 |     "        embedding = embeddings[index].tolist()\n",
189 |     "        embeddings_str = \",\".join([str(x) for x in embedding])\n",
190 |     "        query = f'CREATE (m:Movie {{title: \"{title}\", year: {row[\"Release Year\"]}, embedding: [{embeddings_str}]}})'\n",
191 |     "        session.run(query)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "As you can see, we computed the embeddings with `compute_embeddings` function and then we used these embedding vectors to store it in the `embedding` property of each node.\n",
199 |     "\n",
200 |     "After we have imported the data into Memgraph we can start with our experiments!\n",
201 |     "\n",
202 |     "We will define a function which we can use to find most similar movies described with the plot:"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 5,
208 |    "metadata": {},
209 |    "outputs": [],
210 |    "source": [
211 |     "def find_movie(plot):\n",
212 |     "    embeddings = compute_embeddings([plot])\n",
213 |     "    embeddings_str = \",\".join([str(x) for x in embeddings[0]])\n",
214 |     "    with driver.session() as session:\n",
215 |     "        query = f\"CALL vector_search.search('movies_index', 3, [{embeddings_str}]) yield node, similarity return node.title, similarity\"\n",
216 |     "        result = session.run(query)\n",
217 |     "        for record in result:\n",
218 |     "            print(record)"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "markdown",
223 |    "metadata": {},
224 |    "source": [
225 |     "Now, let’s try to find the Inception, by using the following plot:"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 6,
231 |    "metadata": {},
232 |    "outputs": [
233 |     {
234 |      "name": "stdout",
235 |      "output_type": "stream",
236 |      "text": [
237 |       "<Record node.title='Inception' similarity=0.5250678062438965>\n",
238 |       "<Record node.title='Interstellar' similarity=0.2907602787017822>\n",
239 |       "<Record node.title='The Dark Knight' similarity=0.2784501910209656>\n"
240 |      ]
241 |     }
242 |    ],
243 |    "source": [
244 |     "plot = \"A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into the mind of a C.E.O.\"\n",
245 |     "find_movie(plot)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "metadata": {},
251 |    "source": [
252 |     "Next, let’s attempt to find Memento:"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 7,
258 |    "metadata": {},
259 |    "outputs": [
260 |     {
261 |      "name": "stdout",
262 |      "output_type": "stream",
263 |      "text": [
264 |       "<Record node.title='Memento' similarity=0.37598633766174316>\n",
265 |       "<Record node.title='Insomnia' similarity=0.26347029209136963>\n",
266 |       "<Record node.title='Inception' similarity=0.23714923858642578>\n"
267 |      ]
268 |     }
269 |    ],
270 |    "source": [
271 |     "plot = \"An insurance investigator suffers from anterograde amnesia, leaving him unable to form new memories, and uses notes and tattoos to track down his wife's killer.\"\n",
272 |     "find_movie(plot)"
273 |    ]
274 |   }
275 |  ],
276 |  "metadata": {
277 |   "kernelspec": {
278 |    "display_name": "usr",
279 |    "language": "python",
280 |    "name": "python3"
281 |   },
282 |   "language_info": {
283 |    "codemirror_mode": {
284 |     "name": "ipython",
285 |     "version": 3
286 |    },
287 |    "file_extension": ".py",
288 |    "mimetype": "text/x-python",
289 |    "name": "python",
290 |    "nbconvert_exporter": "python",
291 |    "pygments_lexer": "ipython3",
292 |    "version": "3.10.12"
293 |   }
294 |  },
295 |  "nbformat": 4,
296 |  "nbformat_minor": 2
297 | }
298 | 


--------------------------------------------------------------------------------