├── tinygraph
├── llm
│ ├── __init__.py
│ ├── groq.py
│ ├── zhipu.py
│ └── base.py
├── embedding
│ ├── __init__.py
│ ├── zhipu.py
│ └── base.py
├── utils.py
├── prompt.py
└── graph.py
├── requirements.txt
├── images
├── 运行结果.png
├── 图数据库示例.png
├── Tiny-Graphrag流程图.png
└── Learning-Algorithms节点的详细信息.png
├── .gitignore
├── README.md
├── example
└── data.md
├── Tiny-Graphrag_test.ipynb
└── Tiny-Graphrag_User_Guide_and_Code_Documentation.md
/tinygraph/llm/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tinygraph/embedding/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | neo4j
2 | numpy
3 | tqdm
4 | zhipuai
5 |
--------------------------------------------------------------------------------
/images/运行结果.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/limafang/tiny-graphrag/HEAD/images/运行结果.png
--------------------------------------------------------------------------------
/images/图数据库示例.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/limafang/tiny-graphrag/HEAD/images/图数据库示例.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled python modules
2 | *.pyc
3 | __pycache__/
4 | data_info.txt
5 | workspace/
6 | .vscode/
--------------------------------------------------------------------------------
/images/Tiny-Graphrag流程图.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/limafang/tiny-graphrag/HEAD/images/Tiny-Graphrag流程图.png
--------------------------------------------------------------------------------
/images/Learning-Algorithms节点的详细信息.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/limafang/tiny-graphrag/HEAD/images/Learning-Algorithms节点的详细信息.png
--------------------------------------------------------------------------------
/tinygraph/embedding/zhipu.py:
--------------------------------------------------------------------------------
1 | from zhipuai import ZhipuAI
2 | from typing import List
3 | from .base import BaseEmb
4 |
5 |
6 | class zhipuEmb(BaseEmb):
7 | def __init__(self, model_name: str, api_key: str, **kwargs):
8 | super().__init__(model_name=model_name, **kwargs)
9 | self.client = ZhipuAI(api_key=api_key)
10 |
11 | def get_emb(self, text: str) -> List[float]:
12 | emb = self.client.embeddings.create(
13 | model=self.model_name,
14 | input=text,
15 | )
16 | return emb.data[0].embedding
17 |
--------------------------------------------------------------------------------
/tinygraph/embedding/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from typing import List, Any, Optional
3 |
4 |
5 | class BaseEmb(ABC):
6 | def __init__(
7 | self,
8 | model_name: str,
9 | model_params: Optional[dict[str, Any]] = None,
10 | **kwargs: Any,
11 | ):
12 | self.model_name = model_name
13 | self.model_params = model_params or {}
14 |
15 | @abstractmethod
16 | def get_emb(self, input: str) -> List[float]:
17 | """Sends a text input to the embedding model and retrieves the embedding.
18 |
19 | Args:
20 | input (str): Text sent to the embedding model
21 |
22 | Returns:
23 | List[float]: The embedding vector from the model.
24 | """
25 | pass
26 |
--------------------------------------------------------------------------------
/tinygraph/llm/groq.py:
--------------------------------------------------------------------------------
1 | from groq import Groq
2 | from typing import Any, Optional
3 | from .base import BaseLLM
4 |
5 |
6 | class groqLLM(BaseLLM):
7 | """Implementation of the BaseLLM interface using zhipuai."""
8 |
9 | def __init__(
10 | self,
11 | model_name: str,
12 | api_key: str,
13 | model_params: Optional[dict[str, Any]] = None,
14 | **kwargs: Any,
15 | ):
16 | super().__init__(model_name, model_params, **kwargs)
17 | self.client = Groq(api_key=api_key)
18 |
19 | def predict(self, input: str) -> str:
20 | """Sends a text input to the zhipuai model and retrieves a response.
21 |
22 | Args:
23 | input (str): Text sent to the zhipuai model
24 |
25 | Returns:
26 | str: The response from the zhipuai model.
27 | """
28 | response = self.client.chat.completions.create(
29 | model=self.model_name,
30 | messages=[{"role": "user", "content": input}],
31 | )
32 | return response.choices[0].message.content
33 |
--------------------------------------------------------------------------------
/tinygraph/llm/zhipu.py:
--------------------------------------------------------------------------------
1 | from zhipuai import ZhipuAI
2 | from typing import Any, Optional
3 | from .base import BaseLLM
4 |
5 |
6 | class zhipuLLM(BaseLLM):
7 | """Implementation of the BaseLLM interface using zhipuai."""
8 |
9 | def __init__(
10 | self,
11 | model_name: str,
12 | api_key: str,
13 | model_params: Optional[dict[str, Any]] = None,
14 | **kwargs: Any,
15 | ):
16 | super().__init__(model_name, model_params, **kwargs)
17 | self.client = ZhipuAI(api_key=api_key)
18 |
19 | def predict(self, input: str) -> str:
20 | """Sends a text input to the zhipuai model and retrieves a response.
21 |
22 | Args:
23 | input (str): Text sent to the zhipuai model
24 |
25 | Returns:
26 | str: The response from the zhipuai model.
27 | """
28 | response = self.client.chat.completions.create(
29 | model=self.model_name,
30 | messages=[{"role": "user", "content": input}],
31 | )
32 | return response.choices[0].message.content
33 |
--------------------------------------------------------------------------------
/tinygraph/llm/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from typing import Any, Optional
3 |
4 |
5 | class BaseLLM(ABC):
6 | """Interface for large language models.
7 |
8 | Args:
9 | model_name (str): The name of the language model.
10 | model_params (Optional[dict[str, Any]], optional): Additional parameters passed to the model when text is sent to it. Defaults to None.
11 | **kwargs (Any): Arguments passed to the model when for the class is initialised. Defaults to None.
12 | """
13 |
14 | def __init__(
15 | self,
16 | model_name: str,
17 | model_params: Optional[dict[str, Any]] = None,
18 | **kwargs: Any,
19 | ):
20 | self.model_name = model_name
21 | self.model_params = model_params or {}
22 |
23 | @abstractmethod
24 | def predict(self, input: str) -> str:
25 | """Sends a text input to the LLM and retrieves a response.
26 |
27 | Args:
28 | input (str): Text sent to the LLM
29 |
30 | Returns:
31 | str: The response from the LLM.
32 | """
33 |
--------------------------------------------------------------------------------
/tinygraph/utils.py:
--------------------------------------------------------------------------------
1 | import re
2 | import numpy as np
3 | from typing import List, Tuple
4 | from hashlib import md5
5 | import json
6 | import os
7 |
8 |
9 | def get_text_inside_tag(html_string: str, tag: str):
10 | # html_string 为待解析文本,tag为查找标签
11 | pattern = f"<{tag}>(.*?)<\/{tag}>"
12 | try:
13 | result = re.findall(pattern, html_string, re.DOTALL)
14 | return result
15 | except SyntaxError as e:
16 | raise ("Json Decode Error: {error}".format(error=e))
17 |
18 |
19 | def read_json_file(file_path):
20 | try:
21 | with open(file_path, "r", encoding="utf-8") as file:
22 | return json.load(file)
23 | except:
24 | return {}
25 |
26 |
27 | def write_json_file(data, file_path):
28 | with open(file_path, "w", encoding="utf-8") as file:
29 | json.dump(data, file, indent=4, ensure_ascii=False)
30 |
31 |
32 | def compute_mdhash_id(content, prefix: str = ""):
33 | return prefix + md5(content.encode()).hexdigest()
34 |
35 |
36 | def save_triplets_to_txt(triplets, file_path):
37 | with open(file_path, "a", encoding="utf-8") as file:
38 | file.write(f"{triplets[0]},{triplets[1]},{triplets[2]}\n")
39 |
40 |
41 | def cosine_similarity(vector1: List[float], vector2: List[float]) -> float:
42 | """
43 | calculate cosine similarity between two vectors
44 | """
45 | dot_product = np.dot(vector1, vector2)
46 | magnitude = np.linalg.norm(vector1) * np.linalg.norm(vector2)
47 | if not magnitude:
48 | return 0
49 | return dot_product / magnitude
50 |
51 |
52 | def create_file_if_not_exists(file_path: str):
53 | if not os.path.exists(file_path):
54 | with open(file_path, "w") as f:
55 | f.write("")
56 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Tiny-Graphrag
2 |
3 | Tiny-Graphrag 是一个简洁版本的 GraphRAG 实现,旨在提供一个最简单的 GraphRAG 系统,包含所有必要的功能。我们实现了添加文档的全部流程,以及本地查询和全局查询的功能。
4 |
5 | ## 安装
6 |
7 | Tiny-Graphrag 需要以下版本的 Neo4j 和 JDK,以及 GDS 插件:
8 |
9 | - Neo4j: 5.24.0
10 | - OpenJDK: 17.0.12
11 | - GDS: 2.10.1
12 |
13 | ## 快速开始
14 |
15 | 首先克隆仓库:
16 |
17 | ```shell
18 | git clone https://github.com/limafang/tiny-graphrag.git
19 | cd tiny-graphrag
20 | ```
21 |
22 | 安装必要依赖:
23 |
24 | ```shell
25 | pip install -r requirements.txt
26 | ```
27 |
28 | 接下来,你需要配置使用的 LLM 和 Embedding 服务。目前我们只支持 zhipu 的 LLM 和 Embedding 服务:
29 |
30 | ```python
31 | from tinygraph.graph import TinyGraph
32 | from tinygraph.embedding.zhipu import zhipuEmb
33 | from tinygraph.llm.zhipu import zhipuLLM
34 |
35 | emb = zhipuEmb("model name", "your key")
36 | llm = zhipuLLM("model name", "your key")
37 | graph = TinyGraph(
38 | url="your url",
39 | username="neo4j name",
40 | password="neo4j password",
41 | llm=llm,
42 | emb=emb,
43 | )
44 | ```
45 |
46 | 使用 TinyGraph 添加文档。目前支持所有文本格式的文件。这一步的时间可能较长,结束后,在当前目录下会生成一个 `workspace` 文件夹,包含 `community`、`chunk` 和 `doc` 信息:
47 |
48 | ```python
49 | graph.add_document("example/data.md")
50 | ```
51 |
52 | 完成文档添加后,可以使用 TinyGraph 进行查询。TinyGraph 支持本地查询和全局查询:
53 |
54 | ```python
55 | local_res = graph.local_query("what is ML")
56 | print(local_res)
57 | global_res = graph.global_query("what is ML")
58 | print(global_res)
59 | ```
60 |
61 | 通过以上步骤,你可以快速上手 Tiny-Graphrag,体验其强大的文档管理和查询功能。
62 |
63 | ## 代码解读
64 | 本仓库提供了Tiny-Graphrag项目核心代码的解读文档,用于帮助新手快速理解整个项目,详情见:
65 | - Tiny-Graphrag_User_Guide_and_Code_Documentation.md
66 |
67 | ## 致谢
68 |
69 | 编写 Tiny-Graphrag 的过程中,我们参考了以下项目:
70 |
71 | [GraphRAG](https://github.com/microsoft/graphrag)
72 |
73 | [nano-graphrag](https://github.com/gusye1234/nano-graphrag)
74 |
75 | 需要说明的是,Tiny-Graphrag 是一个简化版本的 GraphRAG 实现,并不适用于生产环境,如果你需要一个更完整的 GraphRAG 实现,我们建议你使用上述项目。
76 |
--------------------------------------------------------------------------------
/example/data.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 |
3 | ## 1.1 Introduction
4 |
5 | Following a drizzling, we take a walk on the wet street. Feeling the gentle breeze and seeing the sunset glow, we bet the weather must be nice tomorrow. Walking to a fruit stand, we pick up a green watermelon with curly root and muffled sound; while hoping the watermelon is ripe, we also expect some good aca- demic marks this semester after all the hard work on studies. We wish readers to share the same confidence in their studies, but to begin with, let us take an informal discussion on what is machine learning .
6 |
7 | Taking a closer look at the scenario described above, we notice that it involves many experience-based predictions. For example, why would we expect beautiful weather tomorrow after observing the gentle breeze and sunset glow? We expect this beautiful weather because,from our experience,theweather on the following day is often beautiful when we experience such a scene in the present day. Also, why do we pick the watermelon with green color, curly root, and muffled sound? It is because we have eaten and enjoyed many watermelons, and those sat- isfying the above criteria are usually ripe. Similarly, our learn- ing experience tells us that hard work leads to good academic marks. We are confident in our predictions because we learned from experience and made experience-based decisions.
8 |
9 | Mitchell ( 1997 ) provides a more formal definition: ‘‘A computer program is said to learn from experience $E$ for some class of tasks $T$ and performance measure $P$ , if its performance at tasks in $T$ , as measured by $P$ , improves with experience $E$ .’’
10 |
11 | E.g., Hand et al. ( 2001 ).
12 |
13 | While humans learn from experience, can computers do the same? The answer is ‘‘yes’’, and machine learning is what we need. Machine learning is the technique that improves system performance by learning from experience via computational methods. In computer systems, experience exists in the form of data, and the main task of machine learning is to develop learning algorithms that build models from data. By feeding the learning algorithm with experience data, we obtain a model that can make predictions (e.g., the watermelon is ripe) on new observations (e.g., an uncut watermelon). If we consider com- puter science as the subject of algorithms, then machine learn- ing is the subject of learning algorithms .
14 |
15 | In this book, we use ‘‘model’’ as a general term for the out- come learned from data. In some other literature, the term ‘‘model’’may refer to the global outcome (e.g., a decision tree), while the term ‘‘pattern’’ refers to the local outcome (e.g., a single rule).
--------------------------------------------------------------------------------
/Tiny-Graphrag_test.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "/home/calvin-lucas/Documents/DataWhale_Learning_Material/tiny-graphrag\n"
13 | ]
14 | }
15 | ],
16 | "source": [
17 | "# 注意:重新运行前需要:重启整个内核\n",
18 | "import os\n",
19 | "import sys\n",
20 | "sys.path.append('.') # 添加当前目录到 Python 路径\n",
21 | "print(os.getcwd()) # 验证下当前工作路径"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 2,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "# 导入模块\n",
31 | "from tinygraph.graph import TinyGraph\n",
32 | "from tinygraph.embedding.zhipu import zhipuEmb\n",
33 | "from tinygraph.llm.zhipu import zhipuLLM\n",
34 | "\n",
35 | "from neo4j import GraphDatabase\n",
36 | "from dotenv import load_dotenv # 用于加载环境变量"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 3,
42 | "metadata": {},
43 | "outputs": [],
44 | "source": [
45 | "# 配置使用的 LLM 和 Embedding 服务,现在只支持 ZhipuAI\n",
46 | "# 加载 .env文件, 从而导入api_key\n",
47 | "load_dotenv() # 加载工作目录下的 .env 文件\n",
48 | "\n",
49 | "emb = zhipuEmb(\n",
50 | " model_name=\"embedding-2\", # 嵌入模型\n",
51 | " api_key=os.getenv('API_KEY')\n",
52 | ")\n",
53 | "llm = zhipuLLM(\n",
54 | " model_name=\"glm-3-turbo\", # LLM 模型\n",
55 | " api_key=os.getenv('API_KEY')\n",
56 | ")\n",
57 | "graph = TinyGraph(\n",
58 | " url=\"neo4j://localhost:7687\",\n",
59 | " username=\"neo4j\",\n",
60 | " password=\"neo4j-passwordTGR\", # 初次登陆的默认密码为neo4j,此后需修改再使用\n",
61 | " llm=llm,\n",
62 | " emb=emb,\n",
63 | ")\n"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 4,
69 | "metadata": {},
70 | "outputs": [
71 | {
72 | "name": "stdout",
73 | "output_type": "stream",
74 | "text": [
75 | "Document 'example/data.md' has already been loaded, skipping import process.\n"
76 | ]
77 | }
78 | ],
79 | "source": [
80 | "# 使用 TinyGraph 添加文档。目前支持所有文本格式的文件。这一步的时间可能较长;\n",
81 | "# 结束后,在当前目录下会生成一个 `workspace` 文件夹,包含 `community`、`chunk` 和 `doc` 信息\n",
82 | "graph.add_document(\"example/data.md\")"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 5,
88 | "metadata": {},
89 | "outputs": [
90 | {
91 | "name": "stdout",
92 | "output_type": "stream",
93 | "text": [
94 | "数据库连接正常,节点数量: 29\n"
95 | ]
96 | }
97 | ],
98 | "source": [
99 | "# 再次验证数据库连接\n",
100 | "with graph.driver.session() as session:\n",
101 | " result = session.run(\"MATCH (n) RETURN count(n) as count\")\n",
102 | " count = result.single()[\"count\"]\n",
103 | " print(f\"数据库连接正常,节点数量: {count}\")"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 6,
109 | "metadata": {},
110 | "outputs": [
111 | {
112 | "name": "stdout",
113 | "output_type": "stream",
114 | "text": [
115 | "\n",
116 | "本地查询结果:\n",
117 | "The term \"dl\" is not explicitly defined in the provided context. However, based on the context's focus on machine learning, \"dl\" might commonly be interpreted as an abbreviation for \"deep learning,\" which is a subset of machine learning that involves neural networks with many layers (hence \"deep\"). Deep learning has become a prominent field, particularly in the realm of artificial intelligence, where it is used to recognize patterns and make predictions from large datasets.\n",
118 | "\n",
119 | "If \"dl\" refers to something else in the context of the user query, there would be no information to discern its meaning without further clarification or additional context.\n"
120 | ]
121 | }
122 | ],
123 | "source": [
124 | "# 执行局部查询测试\n",
125 | "local_res = graph.local_query(\"what is dl?\")\n",
126 | "print(\"\\n本地查询结果:\")\n",
127 | "print(local_res)\n"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 7,
133 | "metadata": {},
134 | "outputs": [
135 | {
136 | "name": "stdout",
137 | "output_type": "stream",
138 | "text": [
139 | "\n",
140 | "全局查询结果:\n",
141 | "The term 'dl' is not explicitly mentioned in the provided data tables. Therefore, I don't know what 'dl' refers to in the context of the user's question. If 'dl' stands for 'Deep Learning,' it is a subset of machine learning that uses neural networks with many layers for feature extraction and modeling. However, this context is not provided in the data tables.\n"
142 | ]
143 | }
144 | ],
145 | "source": [
146 | "\n",
147 | "# 执行全局查询测试\n",
148 | "global_res = graph.global_query(\"what is dl?\")\n",
149 | "print(\"\\n全局查询结果:\")\n",
150 | "print(global_res)"
151 | ]
152 | }
153 | ],
154 | "metadata": {
155 | "kernelspec": {
156 | "display_name": "TinyGraphRAG_2025-04-08",
157 | "language": "python",
158 | "name": "python3"
159 | },
160 | "language_info": {
161 | "codemirror_mode": {
162 | "name": "ipython",
163 | "version": 3
164 | },
165 | "file_extension": ".py",
166 | "mimetype": "text/x-python",
167 | "name": "python",
168 | "nbconvert_exporter": "python",
169 | "pygments_lexer": "ipython3",
170 | "version": "3.10.16"
171 | }
172 | },
173 | "nbformat": 4,
174 | "nbformat_minor": 2
175 | }
176 |
--------------------------------------------------------------------------------
/tinygraph/prompt.py:
--------------------------------------------------------------------------------
1 | GEN_NODES = """
2 | ## Goal
3 | Please identify and extract triplet information from the provided article, focusing only on entities and relationships related to significant knowledge points.
4 | Each triplet should be in the form of (Subject, Predicate, Object).
5 | Follow these guidelines:
6 |
7 | 1. **Subject:** Concepts in Bayesian Optimization
8 | 2. **Predicate:** The action or relationship that links the subject to the object.
9 | 3. **Object:** Concepts in Bayesian Optimization that is affected by or related to the action of the subject.
10 |
11 | ## Example
12 | For the sentence "Gaussian Processes are used to model the objective function in Bayesian Optimization" the triplet would be:
13 |
14 | Gaussian Processesare used to model the objective function in
15 |
16 | For the sentence "John read a book on the weekend," which is not related to any knowledge points, no triplet should be extracted.
17 |
18 | ## Instructions
19 | 1. Read through the article carefully.
20 | 2. Think step by step. Try to find some useful knowledge points from the article. You need to reorganize the content of the sentence into corresponding knowledge points.
21 | 3. Identify key sentences that contain relevant triplet information related to significant knowledge points.
22 | 4. Extract and format the triplets as per the given example, excluding any information that is not relevant to significant knowledge points.
23 |
24 | ## Output Format
25 | For each identified triplet, provide:
26 | [Entity]The action or relationship
27 |
28 | ## Article
29 |
30 | {text}
31 |
32 | ## Your response
33 | """
34 |
35 | GET_ENTITY = """
36 | ## Goal
37 |
38 | You are an experienced machine learning teacher.
39 | You need to identify the key concepts related to machine learning that the article requires students to master. For each concept, provide a brief description that explains its relevance and importance in the context of the article.
40 |
41 | ## Example
42 |
43 | article:
44 | "In the latest study, we explored the potential of using machine learning algorithms for disease prediction. We used support vector machines (SVM) and random forest algorithms to analyze medical data. The results showed that these models performed well in predicting disease risk through feature selection and cross-validation. In particular, the random forest model showed better performance in dealing with overfitting problems. In addition, we discussed the application of deep learning in medical image analysis."
45 |
46 | response:
47 |
48 | Support Vector Machine (SVM)
49 | A supervised learning model used for classification and regression tasks, particularly effective in high-dimensional spaces.
50 |
51 |
52 | Random Forest Algorithm
53 | An ensemble learning method that builds multiple decision trees and merges them together to get a more accurate and stable prediction, often used to reduce overfitting.
54 |
55 |
56 | Feature Selection
57 | The process of selecting a subset of relevant features for use in model construction, crucial for improving model performance and reducing complexity.
58 |
59 |
60 | Overfitting
61 | A common issue where a model learns the details and noise in the training data to the extent that it negatively impacts the model's performance on new data.
62 |
63 |
64 | Deep Learning
65 | A subset of machine learning that uses neural networks with many layers to model complex patterns in large datasets, often applied in image and speech recognition tasks.
66 |
67 |
68 | ## Format
69 |
70 | Wrap each concept in the HTML tag , and include the name of the concept in the tag and its description in the tag.
71 |
72 | ## Article
73 |
74 | {text}
75 |
76 | ## Your response
77 | """
78 |
79 |
80 | ENTITY_DISAMBIGUATION = """
81 | ## Goal
82 | Given multiple entities with the same name, determine if they can be merged into a single entity. If merging is possible, provide the transformation from entity id to entity id.
83 |
84 | ## Guidelines
85 | 1. **Entities:** A list of entities with the same name.
86 | 2. **Merge:** Determine if the entities can be merged into a single entity.
87 | 3. **Transformation:** If merging is possible, provide the transformation from entity id to entity id.
88 |
89 | ## Example
90 | 1. Entities:
91 | [
92 | {"name": "Entity A", "entity id": "entity-1"},
93 | {"name": "Entity A", "entity id": "entity-2"},
94 | {"name": "Entity A", "entity id": "entity-3"}
95 | ]
96 |
97 | Your response should be:
98 |
99 | {"entity-2": "entity-1", "entity-3": "entity-1"}
100 |
101 |
102 | 2. Entities:
103 | [
104 | {"name": "Entity B", "entity id": "entity-4"},
105 | {"name": "Entity C", "entity id": "entity-5"},
106 | {"name": "Entity B", "entity id": "entity-6"}
107 | ]
108 |
109 | Your response should be:
110 |
111 | None
112 |
113 | ## Output Format
114 | Provide the following information:
115 | - Transformation: A dictionary mapping entity ids to the final entity id after merging.
116 |
117 | ## Given Entities
118 | {entities}
119 |
120 | ## Your response
121 | """
122 |
123 | GET_TRIPLETS = """
124 | ## Goal
125 | Identify and extract all the relationships between the given concepts from the provided text.
126 | Identify as many relationships between the concepts as possible.
127 | The relationship in the triple should accurately reflect the interaction or connection between the two concepts.
128 |
129 | ## Guidelines:
130 | 1. **Subject:** The first entity from the given entities.
131 | 2. **Predicate:** The action or relationship linking the subject to the object.
132 | 3. **Object:** The second entity from the given entities.
133 |
134 | ## Example:
135 | 1. Article :
136 | "Gaussian Processes are used to model the objective function in Bayesian Optimization"
137 | Given entities:
138 | [{{"name": "Gaussian Processes", "entity id": "entity-1"}}, {{"name": "Bayesian Optimization", "entity id": "entity-2"}}]
139 | Output:
140 | Gaussian Processesentity-1are used to model the objective function inentity-2
141 |
142 | 2. Article :
143 | "Hydrogen is a colorless, odorless, non-toxic gas and is the lightest and most abundant element in the universe. Oxygen is a gas that supports combustion and is widely present in the Earth's atmosphere. Water is a compound made up of hydrogen and oxygen, with the chemical formula H2O."
144 | Given entities:
145 | [{{"name": "Hydrogen", "entity id": "entity-3"}}, {{"name": "Oxygen", "entity id": "entity-4"}}, {{"name": "Water", "entity id": "entity-5"}}]
146 | Output:
147 | Hydrogenentity-3is a component ofentity-5
148 | 3. Article :
149 | "John read a book on the weekend"
150 | Given entities:
151 | []
152 | Output:
153 | None
154 |
155 | ## Format:
156 | For each identified triplet, provide:
157 | **the entity should just from "Given Entities"**
158 | [Entity][Entity ID][The action or relationship][Entity ID]
159 |
160 | ## Given Entities:
161 | {entity}
162 |
163 | ### Article:
164 | {text}
165 |
166 | ## Additional Instructions:
167 | - Before giving your response, you should analyze and think about it sentence by sentence.
168 | - Both the subject and object must be selected from the given entities and cannot change their content.
169 | - If no relevant triplet involving both entities is found, no triplet should be extracted.
170 | - If there are similar concepts, please rewrite them into a form that suits our requirements.
171 |
172 | ## Your response:
173 | """
174 |
175 | TEST_PROMPT = """
176 | ## Foundation of students
177 | {state}
178 | ## Gole
179 | You will help students solve question through multiple rounds of dialogue.
180 | Please follow the steps below to help students solve the question:
181 | 1. Explain the basic knowledge and principles behind the question and make sure the other party understands these basic concepts.
182 | 2. Don't give a complete answer directly, but guide the student to think about the key steps of the question.
183 | 3. After guiding the student to think, let them try to solve the question by themselves. Give appropriate hints and feedback to help them correct their mistakes and further improve their solutions.
184 | 4. Return to TERMINATE after solving the problem
185 | """
186 |
187 | GEN_COMMUNITY_REPORT = """
188 | ## Role
189 | You are an AI assistant that helps a human analyst to perform general information discovery.
190 | Information discovery is the process of identifying and assessing relevant information associated with certain entities (e.g., organizations and individuals) within a network.
191 |
192 | ## Goal
193 | Write a comprehensive report of a community.
194 | Given a list of entities that belong to the community as well as their relationships and optional associated claims. The report will be used to inform decision-makers about information associated with the community and their potential impact.
195 | The content of this report includes an overview of the community's key entities, their legal compliance, technical capabilities, reputation, and noteworthy claims.
196 |
197 | ## Report Structure
198 |
199 | The report should include the following sections:
200 |
201 | - TITLE: community's name that represents its key entities - title should be short but specific. When possible, include representative named entities in the title.
202 | - SUMMARY: An executive summary of the community's overall structure, how its entities are related to each other, and significant information associated with its entities.
203 | - DETAILED FINDINGS: A list of 5-10 key insights about the community. Each insight should have a short summary followed by multiple paragraphs of explanatory text grounded according to the grounding rules below. Be comprehensive.
204 |
205 | Return output as a well-formed JSON-formatted string with the following format:
206 | {{
207 | "title": ,
208 | "summary": ,
209 | "findings": [
210 | {{
211 | "summary":,
212 | "explanation":
213 | }},
214 | {{
215 | "summary":,
216 | "explanation":
217 | }}
218 | ...
219 | ]
220 | }}
221 |
222 | ## Grounding Rules
223 | Do not include information where the supporting evidence for it is not provided.
224 |
225 | ## Example Input
226 | -----------
227 | Text:
228 | ```
229 | Entities:
230 | ```csv
231 | entity,description
232 | VERDANT OASIS PLAZA,Verdant Oasis Plaza is the location of the Unity March
233 | HARMONY ASSEMBLY,Harmony Assembly is an organization that is holding a march at Verdant Oasis Plaza
234 | ```
235 | Relationships:
236 | ```csv
237 | source,target,description
238 | VERDANT OASIS PLAZA,UNITY MARCH,Verdant Oasis Plaza is the location of the Unity March
239 | VERDANT OASIS PLAZA,HARMONY ASSEMBLY,Harmony Assembly is holding a march at Verdant Oasis Plaza
240 | VERDANT OASIS PLAZA,UNITY MARCH,The Unity March is taking place at Verdant Oasis Plaza
241 | VERDANT OASIS PLAZA,TRIBUNE SPOTLIGHT,Tribune Spotlight is reporting on the Unity march taking place at Verdant Oasis Plaza
242 | VERDANT OASIS PLAZA,BAILEY ASADI,Bailey Asadi is speaking at Verdant Oasis Plaza about the march
243 | HARMONY ASSEMBLY,UNITY MARCH,Harmony Assembly is organizing the Unity March
244 | ```
245 | ```
246 | Output:
247 | {{
248 | "title": "Verdant Oasis Plaza and Unity March",
249 | "summary": "The community revolves around the Verdant Oasis Plaza, which is the location of the Unity March. The plaza has relationships with the Harmony Assembly, Unity March, and Tribune Spotlight, all of which are associated with the march event.",
250 | "findings": [
251 | {{
252 | "summary": "Verdant Oasis Plaza as the central location",
253 | "explanation": "Verdant Oasis Plaza is the central entity in this community, serving as the location for the Unity March. This plaza is the common link between all other entities, suggesting its significance in the community. The plaza's association with the march could potentially lead to issues such as public disorder or conflict, depending on the nature of the march and the reactions it provokes."
254 | }},
255 | {{
256 | "summary": "Harmony Assembly's role in the community",
257 | "explanation": "Harmony Assembly is another key entity in this community, being the organizer of the march at Verdant Oasis Plaza. The nature of Harmony Assembly and its march could be a potential source of threat, depending on their objectives and the reactions they provoke. The relationship between Harmony Assembly and the plaza is crucial in understanding the dynamics of this community."
258 | }},
259 | {{
260 | "summary": "Unity March as a significant event",
261 | "explanation": "The Unity March is a significant event taking place at Verdant Oasis Plaza. This event is a key factor in the community's dynamics and could be a potential source of threat, depending on the nature of the march and the reactions it provokes. The relationship between the march and the plaza is crucial in understanding the dynamics of this community."
262 | }},
263 | {{
264 | "summary": "Role of Tribune Spotlight",
265 | "explanation": "Tribune Spotlight is reporting on the Unity March taking place in Verdant Oasis Plaza. This suggests that the event has attracted media attention, which could amplify its impact on the community. The role of Tribune Spotlight could be significant in shaping public perception of the event and the entities involved."
266 | }}
267 | ]
268 | }}
269 |
270 | ## Real Data
271 | Use the following text for your answer. Do not make anything up in your answer.
272 |
273 | Text:
274 | ```
275 | {input_text}
276 | ```
277 |
278 | The report should include the following sections:
279 |
280 | - TITLE: community's name that represents its key entities - title should be short but specific. When possible, include representative named entities in the title.
281 | - SUMMARY: An executive summary of the community's overall structure, how its entities are related to each other, and significant information associated with its entities.
282 | - DETAILED FINDINGS: A list of 5-10 key insights about the community. Each insight should have a short summary followed by multiple paragraphs of explanatory text grounded according to the grounding rules below. Be comprehensive.
283 |
284 | Return output as a well-formed JSON-formatted string with the following format:
285 | {{
286 | "title": ,
287 | "summary": ,
288 | "rating": ,
289 | "rating_explanation": ,
290 | "findings": [
291 | {{
292 | "summary":,
293 | "explanation":
294 | }},
295 | {{
296 | "summary":,
297 | "explanation":
298 | }}
299 | ...
300 | ]
301 | }}
302 |
303 | ## Grounding Rules
304 | Do not include information where the supporting evidence for it is not provided.
305 |
306 | Output:
307 | """
308 |
309 | GLOBAL_MAP_POINTS = """
310 | You are a helpful assistant responding to questions about data in the tables provided.
311 |
312 |
313 | ---Goal---
314 |
315 | Generate a response consisting of a list of key points that responds to the user's question, summarizing all relevant information in the input data tables.
316 |
317 | You should use the data provided in the data tables below as the primary context for generating the response.
318 | If you don't know the answer or if the input data tables do not contain sufficient information to provide an answer, just say so. Do not make anything up.
319 |
320 | Each key point in the response should have the following element:
321 | - Description: A comprehensive description of the point.
322 | - Importance Score: An integer score between 0-100 that indicates how important the point is in answering the user's question. An 'I don't know' type of response should have a score of 0.
323 |
324 | The response should be HTML formatted as follows:
325 |
326 |
327 | "Description of point 1..."score_value
328 | "Description of point 2..."score_value
329 |
330 |
331 | The response shall preserve the original meaning and use of modal verbs such as "shall", "may" or "will".
332 | Do not include information where the supporting evidence for it is not provided.
333 |
334 |
335 | ---Data tables---
336 |
337 | {context_data}
338 |
339 | ---User query---
340 |
341 | {query}
342 |
343 | ---Goal---
344 |
345 | Generate a response consisting of a list of key points that responds to the user's question, summarizing all relevant information in the input data tables.
346 |
347 | You should use the data provided in the data tables below as the primary context for generating the response.
348 | If you don't know the answer or if the input data tables do not contain sufficient information to provide an answer, just say so. Do not make anything up.
349 |
350 | Each key point in the response should have the following element:
351 | - Description: A comprehensive description of the point.
352 | - Importance Score: An integer score between 0-100 that indicates how important the point is in answering the user's question. An 'I don't know' type of response should have a score of 0.
353 |
354 | The response shall preserve the original meaning and use of modal verbs such as "shall", "may" or "will".
355 | Do not include information where the supporting evidence for it is not provided.
356 |
357 | The response should be HTML formatted as follows:
358 |
359 | "Description of point 1..."score_value
360 | "Description of point 2..."score_value
361 |
362 |
363 | """
364 |
365 | LOCAL_QUERY = """
366 | ## User Query
367 | {query}
368 | ## Context
369 | {context}
370 | ## Task
371 | Based on given context, please provide a response to the user query.
372 | ## Your Response
373 | """
374 |
375 | GLOBAL_QUERY = """
376 | ## User Query
377 | {query}
378 | ## Context
379 | {context}
380 | ## Task
381 | Based on given context, please provide a response to the user query.
382 | ## Your Response
383 | """
384 |
--------------------------------------------------------------------------------
/Tiny-Graphrag_User_Guide_and_Code_Documentation.md:
--------------------------------------------------------------------------------
1 | # Tiny-Graphrag使用指南与代码解读
2 | >此README包括两部分:1.引言;2.正文
3 | ## 引言:
4 | - Tiny-Graphrag是一个基于Graphrag的简化版本,包含了Graphrag的核心功能: 1.知识图谱构建;2.图检索优化;3.生成增强。创建Graphrag项目的目的是帮助大家理解Graphrag的原理并提供Demo来实现。
5 | - 本项目中信息传输的总体流程如下所示:
6 |
7 |