├── storage_vector ├── graph_store.json └── index_store.json ├── chatbot_storage_vector ├── graph_store.json └── index_store.json ├── storage_graph ├── vector_store.json └── docstore.json ├── chatbot_storage_graph ├── vector_store.json └── index_store.json ├── requirements.txt ├── .devcontainer └── devcontainer.json ├── .gitignore ├── LICENSE ├── graph_rag_chatbot.py ├── kg_build_and_query_CN.py ├── kg_build_and_query.py ├── kg_retrieval_arguments_generation.py └── notebooks └── KG_Building.ipynb /storage_vector/graph_store.json: -------------------------------------------------------------------------------- 1 | {"graph_dict": {}} -------------------------------------------------------------------------------- /chatbot_storage_vector/graph_store.json: -------------------------------------------------------------------------------- 1 | {"graph_dict": {}} -------------------------------------------------------------------------------- /storage_graph/vector_store.json: -------------------------------------------------------------------------------- 1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}} -------------------------------------------------------------------------------- /chatbot_storage_graph/vector_store.json: -------------------------------------------------------------------------------- 1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}} -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | llama-index==0.8.9 2 | nebula3-python==3.4.0 3 | networkx==3.0 4 | nltk==3.8.1 5 | openai==0.28.0 6 | pyvis==0.3.2 7 | PyYAML==6.0 8 | tenacity==8.2.2 9 | langchain==0.0.335 10 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Python 3", 3 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 4 | "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", 5 | "customizations": { 6 | "codespaces": { 7 | "openFiles": [ 8 | "README.md", 9 | "graph_rag_chatbot.py" 10 | ] 11 | }, 12 | "vscode": { 13 | "settings": {}, 14 | "extensions": [ 15 | "ms-python.python", 16 | "ms-python.vscode-pylance" 17 | ] 18 | } 19 | }, 20 | "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y Query to NebulaGraph: 368 | 369 | ```cypher 370 | {render_query} 371 | ``` 372 | """ 373 | ) 374 | st.markdown("> The SubGraph Retrieved") 375 | result = query_nebulagraph(render_query) 376 | result_df = result_to_df(result) 377 | 378 | # create pyvis graph 379 | g = create_pyvis_graph(result_df) 380 | 381 | # render with random file name 382 | graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html") 383 | 384 | components.html(graph_html, height=500, scrolling=True) 385 | 386 | # st.write(f"*Answer*: {answer_GraphRAG}") 387 | -------------------------------------------------------------------------------- /kg_build_and_query_CN.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.stdout.reconfigure(encoding="utf-8") 4 | sys.stdin.reconfigure(encoding="utf-8") 5 | 6 | import streamlit as st 7 | import streamlit.components.v1 as components 8 | 9 | import re 10 | 11 | import random 12 | 13 | CODE_BUILD_KG = """ 14 | 15 | # 准备 GraphStore 16 | 17 | os.environ['NEBULA_USER'] = "root" 18 | os.environ['NEBULA_PASSWORD'] = "nebula" # default password 19 | os.environ['NEBULA_ADDRESS'] = "127.0.0.1:9669" # assumed we have NebulaGraph installed locally 20 | 21 | space_name = "guardians" 22 | edge_types, rel_prop_names = ["relationship"], ["relationship"] # default, could be omit if create from an empty kg 23 | tags = ["entity"] # default, could be omit if create from an empty kg 24 | 25 | graph_store = NebulaGraphStore(space_name=space_name, edge_types=edge_types, rel_prop_names=rel_prop_names, tags=tags) 26 | storage_context = StorageContext.from_defaults(graph_store=graph_store) 27 | 28 | # 从维基百科下载、预处理数据 29 | 30 | from llama_index import download_loader 31 | 32 | WikipediaReader = download_loader("WikipediaReader") 33 | 34 | loader = WikipediaReader() 35 | 36 | documents = loader.load_data(pages=['Guardians of the Galaxy Vol. 3'], auto_suggest=False) 37 | 38 | # 利用 LLM 从文档中抽取知识三元组,并存储到 GraphStore(NebulaGraph) 39 | 40 | kg_index = KnowledgeGraphIndex.from_documents( 41 | documents, 42 | storage_context=storage_context, 43 | max_triplets_per_chunk=10, 44 | service_context=service_context, 45 | space_name=space_name, 46 | edge_types=edge_types, 47 | rel_prop_names=rel_prop_names, 48 | tags=tags, 49 | include_embeddings=True, 50 | ) 51 | 52 | """ 53 | 54 | CODE_NL2CYPHER_LANGCHAIN = """ 55 | ## Langchain 56 | # Doc: https://python.langchain.com/docs/modules/chains/additional/graph_nebula_qa 57 | 58 | from langchain.chat_models import ChatOpenAI 59 | from langchain.chains import NebulaGraphQAChain 60 | from langchain.graphs import NebulaGraph 61 | 62 | graph = NebulaGraph( 63 | space=space_name, 64 | username="root", 65 | password="nebula", 66 | address="127.0.0.1", 67 | port=9669, 68 | session_pool_size=30, 69 | ) 70 | 71 | chain = NebulaGraphQAChain.from_llm( 72 | llm, graph=graph, verbose=True 73 | ) 74 | 75 | chain.run( 76 | "Tell me about Peter Quill?", 77 | ) 78 | """ 79 | 80 | CODE_NL2CYPHER_LLAMAINDEX = """ 81 | 82 | ## Llama Index 83 | # Doc: https://gpt-index.readthedocs.io/en/latest/examples/query_engine/knowledge_graph_query_engine.html 84 | 85 | from llama_index.query_engine import KnowledgeGraphQueryEngine 86 | 87 | from llama_index.storage.storage_context import StorageContext 88 | from llama_index.graph_stores import NebulaGraphStore 89 | 90 | nl2kg_query_engine = KnowledgeGraphQueryEngine( 91 | storage_context=storage_context, 92 | service_context=service_context, 93 | llm=llm, 94 | verbose=True, 95 | ) 96 | 97 | response = nl2kg_query_engine.query( 98 | "Tell me about Peter Quill?", 99 | ) 100 | """ 101 | 102 | 103 | import os 104 | import json 105 | import openai 106 | from llama_index.llms import AzureOpenAI 107 | from langchain.embeddings import OpenAIEmbeddings 108 | from llama_index import LangchainEmbedding 109 | from llama_index import ( 110 | VectorStoreIndex, 111 | SimpleDirectoryReader, 112 | KnowledgeGraphIndex, 113 | LLMPredictor, 114 | ServiceContext, 115 | ) 116 | 117 | from llama_index.storage.storage_context import StorageContext 118 | from llama_index.graph_stores import NebulaGraphStore 119 | 120 | import logging 121 | import sys 122 | 123 | logging.basicConfig( 124 | stream=sys.stdout, level=logging.INFO 125 | ) # logging.DEBUG for more verbose output 126 | # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) 127 | 128 | openai.api_type = "azure" 129 | openai.api_base = st.secrets["OPENAI_API_BASE"] 130 | # openai.api_version = "2022-12-01" azure gpt-3 131 | openai.api_version = "2023-05-15" # azure gpt-3.5 turbo 132 | openai.api_key = st.secrets["OPENAI_API_KEY"] 133 | 134 | llm = AzureOpenAI( 135 | engine=st.secrets["DEPLOYMENT_NAME"], 136 | temperature=0, 137 | model="gpt-35-turbo", 138 | ) 139 | llm_predictor = LLMPredictor(llm=llm) 140 | 141 | # You need to deploy your own embedding model as well as your own chat completion model 142 | embedding_llm = LangchainEmbedding( 143 | OpenAIEmbeddings( 144 | model="text-embedding-ada-002", 145 | deployment=st.secrets["EMBEDDING_DEPLOYMENT_NAME"], 146 | openai_api_key=openai.api_key, 147 | openai_api_base=openai.api_base, 148 | openai_api_type=openai.api_type, 149 | openai_api_version=openai.api_version, 150 | ), 151 | embed_batch_size=1, 152 | ) 153 | 154 | service_context = ServiceContext.from_defaults( 155 | llm_predictor=llm_predictor, 156 | embed_model=embedding_llm, 157 | ) 158 | os.environ["NEBULA_USER"] = st.secrets["graphd_user"] 159 | os.environ["NEBULA_PASSWORD"] = st.secrets["graphd_password"] 160 | os.environ[ 161 | "NEBULA_ADDRESS" 162 | ] = f"{st.secrets['graphd_host']}:{st.secrets['graphd_port']}" 163 | 164 | space_name = "guardians" 165 | edge_types, rel_prop_names = ["relationship"], [ 166 | "relationship" 167 | ] # default, could be omit if create from an empty kg 168 | tags = ["entity"] # default, could be omit if create from an empty kg 169 | 170 | graph_store = NebulaGraphStore( 171 | space_name=space_name, 172 | edge_types=edge_types, 173 | rel_prop_names=rel_prop_names, 174 | tags=tags, 175 | ) 176 | storage_context = StorageContext.from_defaults(graph_store=graph_store) 177 | 178 | from llama_index.query_engine import KnowledgeGraphQueryEngine 179 | 180 | from llama_index.storage.storage_context import StorageContext 181 | from llama_index.graph_stores import NebulaGraphStore 182 | 183 | nl2kg_query_engine = KnowledgeGraphQueryEngine( 184 | storage_context=storage_context, 185 | service_context=service_context, 186 | llm=llm, 187 | verbose=True, 188 | ) 189 | 190 | 191 | def cypher_to_all_paths(query): 192 | # Find the MATCH and RETURN parts 193 | match_parts = re.findall(r"(MATCH .+?(?=MATCH|$))", query, re.I | re.S) 194 | return_part = re.search(r"RETURN .+", query).group() 195 | 196 | modified_matches = [] 197 | path_ids = [] 198 | 199 | # Go through each MATCH part 200 | for i, part in enumerate(match_parts): 201 | path_id = f"path_{i}" 202 | path_ids.append(path_id) 203 | 204 | # Replace the MATCH keyword with "MATCH path_i = " 205 | modified_part = part.replace("MATCH ", f"MATCH {path_id} = ") 206 | modified_matches.append(modified_part) 207 | 208 | # Join the modified MATCH parts 209 | matches_string = " ".join(modified_matches) 210 | 211 | # Construct the new RETURN part 212 | return_string = f"RETURN {', '.join(path_ids)};" 213 | 214 | # Remove the old RETURN part from matches_string 215 | matches_string = matches_string.replace(return_part, "") 216 | 217 | # Combine everything 218 | modified_query = f"{matches_string}\n{return_string}" 219 | 220 | return modified_query 221 | 222 | 223 | # write string to file 224 | def result_to_df(result): 225 | from typing import Dict 226 | 227 | import pandas as pd 228 | 229 | columns = result.keys() 230 | d: Dict[str, list] = {} 231 | for col_num in range(result.col_size()): 232 | col_name = columns[col_num] 233 | col_list = result.column_values(col_name) 234 | d[col_name] = [x.cast() for x in col_list] 235 | return pd.DataFrame(d) 236 | 237 | 238 | def render_pd_item(g, item): 239 | from nebula3.data.DataObject import Node, PathWrapper, Relationship 240 | 241 | if isinstance(item, Node): 242 | node_id = item.get_id().cast() 243 | tags = item.tags() # list of strings 244 | props = dict() 245 | for tag in tags: 246 | props.update(item.properties(tag)) 247 | g.add_node(node_id, label=node_id, title=str(props)) 248 | elif isinstance(item, Relationship): 249 | src_id = item.start_vertex_id().cast() 250 | dst_id = item.end_vertex_id().cast() 251 | edge_name = item.edge_name() 252 | props = item.properties() 253 | # ensure start and end vertex exist in graph 254 | if not src_id in g.node_ids: 255 | g.add_node(src_id) 256 | if not dst_id in g.node_ids: 257 | g.add_node(dst_id) 258 | g.add_edge(src_id, dst_id, label=edge_name, title=str(props)) 259 | elif isinstance(item, PathWrapper): 260 | for node in item.nodes(): 261 | render_pd_item(g, node) 262 | for edge in item.relationships(): 263 | render_pd_item(g, edge) 264 | elif isinstance(item, list): 265 | for it in item: 266 | render_pd_item(g, it) 267 | 268 | 269 | def create_pyvis_graph(result_df): 270 | from pyvis.network import Network 271 | 272 | g = Network( 273 | notebook=True, 274 | directed=True, 275 | cdn_resources="in_line", 276 | height="500px", 277 | width="100%", 278 | ) 279 | for _, row in result_df.iterrows(): 280 | for item in row: 281 | render_pd_item(g, item) 282 | g.repulsion( 283 | node_distance=100, 284 | central_gravity=0.2, 285 | spring_length=200, 286 | spring_strength=0.05, 287 | damping=0.09, 288 | ) 289 | return g 290 | 291 | 292 | def query_nebulagraph( 293 | query, 294 | space_name=space_name, 295 | address=st.secrets["graphd_host"], 296 | port=9669, 297 | user=st.secrets["graphd_user"], 298 | password=st.secrets["graphd_password"], 299 | ): 300 | from nebula3.Config import SessionPoolConfig 301 | from nebula3.gclient.net.SessionPool import SessionPool 302 | 303 | config = SessionPoolConfig() 304 | session_pool = SessionPool(user, password, space_name, [(address, port)]) 305 | session_pool.init(config) 306 | return session_pool.execute(query) 307 | 308 | 309 | st.title("利用 LLM 构建、查询知识图谱") 310 | 311 | ( 312 | tab_code_kg, 313 | tab_notebook, 314 | tab_graph_view, 315 | tab_cypher, 316 | tab_nl2cypher, 317 | tab_code_nl2cypher, 318 | ) = st.tabs( 319 | [ 320 | "代码:构建知识图谱", 321 | "完整 Notebook", 322 | "图谱可视化", 323 | "Cypher 查询", 324 | "自然语言查询", 325 | "代码:NL2Cypher", 326 | ] 327 | ) 328 | 329 | with tab_code_kg: 330 | st.write("> 利用 LLM,几行代码构建知识图谱") 331 | st.code(body=CODE_BUILD_KG, language="python") 332 | 333 | with tab_notebook: 334 | st.write("> 完整 Demo 过程 Notebook") 335 | st.write( 336 | """ 337 | 338 | 这个 Notebook 展示了如何利用 LLM 从不同类型的信息源(以维基百科为例)中抽取知识三元组,并存储到图数据库 NebulaGraph 中。 339 | 340 | 本 Demo 中,我们先抽取了维基百科中关于《银河护卫队3》的信息,然后利用 LLM 生成的知识三元组,构建了一个图谱。 341 | 然后利用 Cypher 查询图谱,最后利用 LlamaIndex 和 Langchain 中的 NL2NebulaCypher,实现了自然语言查询图谱的功能。 342 | 343 | 您可以点击其他标签亲自试玩图谱的可视化、Cypher 查询、自然语言查询(NL2NebulaCypher)等功能。 344 | 345 | """ 346 | ) 347 | # link to download notebook 348 | st.markdown( 349 | """ 350 | 这里可以[下载](https://www.siwei.io/demo-dumps/kg-llm/KG_Building.ipynb) 完整的 Notebook。 351 | """ 352 | ) 353 | 354 | components.iframe( 355 | src="https://www.siwei.io/demo-dumps/kg-llm/KG_Building.html", 356 | height=2000, 357 | width=800, 358 | scrolling=True, 359 | ) 360 | 361 | with tab_graph_view: 362 | st.write( 363 | "> 图谱的可视化部分采样,知识来源[银河护卫队3](https://en.wikipedia.org/wiki/Guardians_of_the_Galaxy_Vol._3)" 364 | ) 365 | 366 | components.iframe( 367 | src="https://www.siwei.io/demo-dumps/kg-llm/nebulagraph_draw_sample.html", 368 | height=500, 369 | scrolling=True, 370 | ) 371 | 372 | with tab_cypher: 373 | st.write("> Cypher 查询图库") 374 | query_string = st.text_input( 375 | label="输入查询语句", value="MATCH ()-[e]->() RETURN e LIMIT 25" 376 | ) 377 | if st.button("> 执行"): 378 | # run query 379 | result = query_nebulagraph(query_string) 380 | 381 | # convert to pandas dataframe 382 | result_df = result_to_df(result) 383 | 384 | # display pd dataframe 385 | st.dataframe(result_df) 386 | 387 | # create pyvis graph 388 | g = create_pyvis_graph(result_df) 389 | 390 | # render with random file name 391 | import random 392 | 393 | graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html") 394 | 395 | components.html(graph_html, height=500, scrolling=True) 396 | 397 | with tab_nl2cypher: 398 | st.write("> 使用自然语言查询图库") 399 | nl_query_string = st.text_input( 400 | label="输入自然语言问题", value="Tell me about Peter Quill?" 401 | ) 402 | if st.button("生成 Cypher 查询语句,并执行"): 403 | response = nl2kg_query_engine.query(nl_query_string) 404 | graph_query = list(response.metadata.values())[0]["graph_store_query"] 405 | graph_query = graph_query.replace("WHERE", "\n WHERE").replace( 406 | "RETURN", "\nRETURN" 407 | ) 408 | answer = str(response) 409 | st.write(f"*答案*: {answer}") 410 | st.markdown( 411 | f""" 412 | ## 利用 LLM 生成的图查询语句 413 | ```cypher 414 | {graph_query} 415 | ``` 416 | """ 417 | ) 418 | st.write("## 结果可视化") 419 | render_query = cypher_to_all_paths(graph_query) 420 | result = query_nebulagraph(render_query) 421 | result_df = result_to_df(result) 422 | 423 | # create pyvis graph 424 | g = create_pyvis_graph(result_df) 425 | 426 | # render with random file name 427 | graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html") 428 | 429 | components.html(graph_html, height=500, scrolling=True) 430 | 431 | 432 | with tab_code_nl2cypher: 433 | st.write("利用 Langchain 或者 Llama Index,我们可以只用几行代码就实现自然语言查询图谱(NL2NebulaCypher)") 434 | 435 | tab_langchain, tab_llamaindex = st.tabs(["Langchain", "Llama Index"]) 436 | with tab_langchain: 437 | st.code(body=CODE_NL2CYPHER_LANGCHAIN, language="python") 438 | with tab_llamaindex: 439 | st.code(body=CODE_NL2CYPHER_LLAMAINDEX, language="python") 440 | 441 | st.markdown( 442 | """ 443 | 444 | ## 参考文档 445 | 446 | - [Langchain: NebulaGraphQAChain](https://python.langchain.com/docs/modules/chains/additional/graph_nebula_qa) 447 | - [Llama Index: KnowledgeGraphQueryEngine](https://gpt-index.readthedocs.io/en/latest/examples/query_engine/knowledge_graph_query_engine.html) 448 | """ 449 | ) 450 | -------------------------------------------------------------------------------- /kg_build_and_query.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.stdout.reconfigure(encoding="utf-8") 4 | sys.stdin.reconfigure(encoding="utf-8") 5 | 6 | import streamlit as st 7 | import streamlit.components.v1 as components 8 | 9 | import re 10 | 11 | import random 12 | 13 | CODE_BUILD_KG = """ 14 | 15 | # Prepare for GraphStore 16 | 17 | os.environ['NEBULA_USER'] = "root" 18 | os.environ['NEBULA_PASSWORD'] = "nebula" # default password 19 | os.environ['NEBULA_ADDRESS'] = "127.0.0.1:9669" # assumed we have NebulaGraph installed locally 20 | 21 | space_name = "guardians" 22 | edge_types, rel_prop_names = ["relationship"], ["relationship"] # default, could be omit if create from an empty kg 23 | tags = ["entity"] # default, could be omit if create from an empty kg 24 | 25 | graph_store = NebulaGraphStore(space_name=space_name, edge_types=edge_types, rel_prop_names=rel_prop_names, tags=tags) 26 | storage_context = StorageContext.from_defaults(graph_store=graph_store) 27 | 28 | # Download and Preprocess Data 29 | 30 | from llama_index import download_loader 31 | 32 | WikipediaReader = download_loader("WikipediaReader") 33 | 34 | loader = WikipediaReader() 35 | 36 | documents = loader.load_data(pages=['Guardians of the Galaxy Vol. 3'], auto_suggest=False) 37 | 38 | # Build Knowledge Graph 39 | 40 | kg_index = KnowledgeGraphIndex.from_documents( 41 | documents, 42 | storage_context=storage_context, 43 | max_triplets_per_chunk=10, 44 | service_context=service_context, 45 | space_name=space_name, 46 | edge_types=edge_types, 47 | rel_prop_names=rel_prop_names, 48 | tags=tags, 49 | include_embeddings=True, 50 | ) 51 | 52 | """ 53 | 54 | CODE_NL2CYPHER_LANGCHAIN = """ 55 | ## Langchain 56 | # Doc: https://python.langchain.com/docs/modules/chains/additional/graph_nebula_qa 57 | 58 | from langchain.chat_models import ChatOpenAI 59 | from langchain.chains import NebulaGraphQAChain 60 | from langchain.graphs import NebulaGraph 61 | 62 | graph = NebulaGraph( 63 | space=space_name, 64 | username="root", 65 | password="nebula", 66 | address="127.0.0.1", 67 | port=9669, 68 | session_pool_size=30, 69 | ) 70 | 71 | chain = NebulaGraphQAChain.from_llm( 72 | llm, graph=graph, verbose=True 73 | ) 74 | 75 | chain.run( 76 | "Tell me about Peter Quill?", 77 | ) 78 | """ 79 | 80 | CODE_NL2CYPHER_LLAMAINDEX = """ 81 | 82 | ## Llama Index 83 | # Doc: https://gpt-index.readthedocs.io/en/latest/examples/query_engine/knowledge_graph_query_engine.html 84 | 85 | from llama_index.query_engine import KnowledgeGraphQueryEngine 86 | 87 | from llama_index.storage.storage_context import StorageContext 88 | from llama_index.graph_stores import NebulaGraphStore 89 | 90 | nl2kg_query_engine = KnowledgeGraphQueryEngine( 91 | storage_context=storage_context, 92 | service_context=service_context, 93 | llm=llm, 94 | verbose=True, 95 | ) 96 | 97 | response = nl2kg_query_engine.query( 98 | "Tell me about Peter Quill?", 99 | ) 100 | """ 101 | 102 | 103 | import os 104 | import json 105 | import openai 106 | from llama_index.llms import AzureOpenAI 107 | from langchain.embeddings import OpenAIEmbeddings 108 | from llama_index import LangchainEmbedding 109 | from llama_index import ( 110 | VectorStoreIndex, 111 | SimpleDirectoryReader, 112 | KnowledgeGraphIndex, 113 | LLMPredictor, 114 | ServiceContext, 115 | ) 116 | 117 | from llama_index.storage.storage_context import StorageContext 118 | from llama_index.graph_stores import NebulaGraphStore 119 | 120 | import logging 121 | import sys 122 | 123 | logging.basicConfig( 124 | stream=sys.stdout, level=logging.INFO 125 | ) # logging.DEBUG for more verbose output 126 | # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) 127 | 128 | openai.api_type = "azure" 129 | openai.api_base = st.secrets["OPENAI_API_BASE"] 130 | # openai.api_version = "2022-12-01" azure gpt-3 131 | openai.api_version = "2023-05-15" # azure gpt-3.5 turbo 132 | openai.api_key = st.secrets["OPENAI_API_KEY"] 133 | 134 | llm = AzureOpenAI( 135 | engine=st.secrets["DEPLOYMENT_NAME"], 136 | temperature=0, 137 | model="gpt-35-turbo", 138 | ) 139 | llm_predictor = LLMPredictor(llm=llm) 140 | 141 | # You need to deploy your own embedding model as well as your own chat completion model 142 | embedding_llm = LangchainEmbedding( 143 | OpenAIEmbeddings( 144 | model="text-embedding-ada-002", 145 | deployment=st.secrets["EMBEDDING_DEPLOYMENT_NAME"], 146 | openai_api_key=openai.api_key, 147 | openai_api_base=openai.api_base, 148 | openai_api_type=openai.api_type, 149 | openai_api_version=openai.api_version, 150 | ), 151 | embed_batch_size=1, 152 | ) 153 | 154 | service_context = ServiceContext.from_defaults( 155 | llm_predictor=llm_predictor, 156 | embed_model=embedding_llm, 157 | ) 158 | os.environ["NEBULA_USER"] = st.secrets["graphd_user"] 159 | os.environ["NEBULA_PASSWORD"] = st.secrets["graphd_password"] 160 | os.environ[ 161 | "NEBULA_ADDRESS" 162 | ] = f"{st.secrets['graphd_host']}:{st.secrets['graphd_port']}" 163 | 164 | space_name = "guardians" 165 | edge_types, rel_prop_names = ["relationship"], [ 166 | "relationship" 167 | ] # default, could be omit if create from an empty kg 168 | tags = ["entity"] # default, could be omit if create from an empty kg 169 | 170 | graph_store = NebulaGraphStore( 171 | space_name=space_name, 172 | edge_types=edge_types, 173 | rel_prop_names=rel_prop_names, 174 | tags=tags, 175 | ) 176 | storage_context = StorageContext.from_defaults(graph_store=graph_store) 177 | 178 | from llama_index.query_engine import KnowledgeGraphQueryEngine 179 | 180 | from llama_index.storage.storage_context import StorageContext 181 | from llama_index.graph_stores import NebulaGraphStore 182 | 183 | nl2kg_query_engine = KnowledgeGraphQueryEngine( 184 | storage_context=storage_context, 185 | service_context=service_context, 186 | llm=llm, 187 | verbose=True, 188 | ) 189 | 190 | 191 | def cypher_to_all_paths(query): 192 | # Find the MATCH and RETURN parts 193 | match_parts = re.findall(r"(MATCH .+?(?=MATCH|$))", query, re.I | re.S) 194 | return_part = re.search(r"RETURN .+", query).group() 195 | 196 | modified_matches = [] 197 | path_ids = [] 198 | 199 | # Go through each MATCH part 200 | for i, part in enumerate(match_parts): 201 | path_id = f"path_{i}" 202 | path_ids.append(path_id) 203 | 204 | # Replace the MATCH keyword with "MATCH path_i = " 205 | modified_part = part.replace("MATCH ", f"MATCH {path_id} = ") 206 | modified_matches.append(modified_part) 207 | 208 | # Join the modified MATCH parts 209 | matches_string = " ".join(modified_matches) 210 | 211 | # Construct the new RETURN part 212 | return_string = f"RETURN {', '.join(path_ids)};" 213 | 214 | # Remove the old RETURN part from matches_string 215 | matches_string = matches_string.replace(return_part, "") 216 | 217 | # Combine everything 218 | modified_query = f"{matches_string}\n{return_string}" 219 | 220 | return modified_query 221 | 222 | 223 | # write string to file 224 | def result_to_df(result): 225 | from typing import Dict 226 | 227 | import pandas as pd 228 | 229 | columns = result.keys() 230 | d: Dict[str, list] = {} 231 | for col_num in range(result.col_size()): 232 | col_name = columns[col_num] 233 | col_list = result.column_values(col_name) 234 | d[col_name] = [x.cast() for x in col_list] 235 | return pd.DataFrame(d) 236 | 237 | 238 | def render_pd_item(g, item): 239 | from nebula3.data.DataObject import Node, PathWrapper, Relationship 240 | 241 | if isinstance(item, Node): 242 | node_id = item.get_id().cast() 243 | tags = item.tags() # list of strings 244 | props = dict() 245 | for tag in tags: 246 | props.update(item.properties(tag)) 247 | g.add_node(node_id, label=node_id, title=str(props)) 248 | elif isinstance(item, Relationship): 249 | src_id = item.start_vertex_id().cast() 250 | dst_id = item.end_vertex_id().cast() 251 | edge_name = item.edge_name() 252 | props = item.properties() 253 | # ensure start and end vertex exist in graph 254 | if not src_id in g.node_ids: 255 | g.add_node(src_id) 256 | if not dst_id in g.node_ids: 257 | g.add_node(dst_id) 258 | g.add_edge(src_id, dst_id, label=edge_name, title=str(props)) 259 | elif isinstance(item, PathWrapper): 260 | for node in item.nodes(): 261 | render_pd_item(g, node) 262 | for edge in item.relationships(): 263 | render_pd_item(g, edge) 264 | elif isinstance(item, list): 265 | for it in item: 266 | render_pd_item(g, it) 267 | 268 | 269 | def create_pyvis_graph(result_df): 270 | from pyvis.network import Network 271 | 272 | g = Network( 273 | notebook=True, 274 | directed=True, 275 | cdn_resources="in_line", 276 | height="500px", 277 | width="100%", 278 | ) 279 | for _, row in result_df.iterrows(): 280 | for item in row: 281 | render_pd_item(g, item) 282 | g.repulsion( 283 | node_distance=100, 284 | central_gravity=0.2, 285 | spring_length=200, 286 | spring_strength=0.05, 287 | damping=0.09, 288 | ) 289 | return g 290 | 291 | 292 | def query_nebulagraph( 293 | query, 294 | space_name=space_name, 295 | address=st.secrets["graphd_host"], 296 | port=9669, 297 | user=st.secrets["graphd_user"], 298 | password=st.secrets["graphd_password"], 299 | ): 300 | from nebula3.Config import SessionPoolConfig 301 | from nebula3.gclient.net.SessionPool import SessionPool 302 | 303 | config = SessionPoolConfig() 304 | session_pool = SessionPool(user, password, space_name, [(address, port)]) 305 | session_pool.init(config) 306 | return session_pool.execute(query) 307 | 308 | 309 | st.title("Demo: Knowledge Graph Build and Query with LLM") 310 | 311 | ( 312 | tab_code_kg, 313 | tab_notebook, 314 | tab_graph_view, 315 | tab_cypher, 316 | tab_nl2cypher, 317 | tab_code_nl2cypher, 318 | ) = st.tabs( 319 | [ 320 | "Code: Build KG", 321 | "Full Notebook", 322 | "Graph View", 323 | "Query", 324 | "Natural Language to Cypher", 325 | "Code: NL2Cypher", 326 | ] 327 | ) 328 | 329 | with tab_code_kg: 330 | st.write( 331 | "With a few lines of code, we can build a knowledge graph with LLM, LlamaIndex and NebulaGraph." 332 | ) 333 | st.write( 334 | "See full notebook for more details and try Graph Visualizations, Query, and Natural Language to Cypher by clicking on the tabs on the right." 335 | ) 336 | st.code(body=CODE_BUILD_KG, language="python") 337 | 338 | with tab_notebook: 339 | st.write("> Full Notebook") 340 | st.markdown( 341 | """ 342 | 343 | This is the full notebook to demonstrate how to: 344 | 345 | - Extract from data sources and build a knowledge graph with LLM and Llama Index, NebulaGraph in 3 lines of code 346 | - Query the Knowledge Graph with nGQL and visualize the graph 347 | - Query the knowledge graph with natural language in 1 line of code(both Langchain and Llama Index) 348 | """ 349 | ) 350 | # link to download notebook 351 | st.markdown( 352 | """ 353 | [Download](https://www.siwei.io/demo-dumps/kg-llm/KG_Building.ipynb) the notebook. 354 | """ 355 | ) 356 | 357 | components.iframe( 358 | src="https://www.siwei.io/demo-dumps/kg-llm/KG_Building.html", 359 | height=2000, 360 | width=800, 361 | scrolling=True, 362 | ) 363 | 364 | with tab_graph_view: 365 | st.write( 366 | "> Sub-Graph View of the Knowledge Graph about [Guardians of the Galaxy Vol. 3](https://en.wikipedia.org/wiki/Guardians_of_the_Galaxy_Vol._3)" 367 | ) 368 | components.iframe( 369 | src="https://www.siwei.io/demo-dumps/kg-llm/nebulagraph_draw_sample.html", 370 | height=500, 371 | scrolling=True, 372 | ) 373 | 374 | with tab_cypher: 375 | st.write("> Query Knowledge Graph in nGQL") 376 | query_string = st.text_input( 377 | label="Enter nGQL query string", value="MATCH ()-[e]->() RETURN e LIMIT 25" 378 | ) 379 | if st.button("> execute"): 380 | # run query 381 | result = query_nebulagraph(query_string) 382 | 383 | # convert to pandas dataframe 384 | result_df = result_to_df(result) 385 | 386 | # display pd dataframe 387 | st.dataframe(result_df) 388 | 389 | # create pyvis graph 390 | g = create_pyvis_graph(result_df) 391 | 392 | # render with random file name 393 | import random 394 | 395 | graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html") 396 | 397 | components.html(graph_html, height=500, scrolling=True) 398 | 399 | with tab_nl2cypher: 400 | st.write("> Natural Language to Cypher") 401 | nl_query_string = st.text_input( 402 | label="Enter natural language query string", value="Tell me about Peter Quill?" 403 | ) 404 | if st.button("Ask KG"): 405 | response = nl2kg_query_engine.query(nl_query_string) 406 | graph_query = list(response.metadata.values())[0]["graph_store_query"] 407 | graph_query = graph_query.replace("WHERE", "\n WHERE").replace( 408 | "RETURN", "\nRETURN" 409 | ) 410 | answer = str(response) 411 | st.write(f"*Answer*: {answer}") 412 | st.markdown( 413 | f""" 414 | ## Generated NebulaGraph Cypher Query 415 | ```cypher 416 | {graph_query} 417 | ``` 418 | """ 419 | ) 420 | st.write("## Rendered Graph") 421 | render_query = cypher_to_all_paths(graph_query) 422 | result = query_nebulagraph(render_query) 423 | result_df = result_to_df(result) 424 | 425 | # create pyvis graph 426 | g = create_pyvis_graph(result_df) 427 | 428 | # render with random file name 429 | graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html") 430 | 431 | components.html(graph_html, height=500, scrolling=True) 432 | 433 | 434 | with tab_code_nl2cypher: 435 | st.write( 436 | "> Natural Language to NebulaGraph Cypher Code with Langchain and Llama Index" 437 | ) 438 | tab_langchain, tab_llamaindex = st.tabs(["Langchain", "Llama Index"]) 439 | with tab_langchain: 440 | st.code(body=CODE_NL2CYPHER_LANGCHAIN, language="python") 441 | with tab_llamaindex: 442 | st.code(body=CODE_NL2CYPHER_LLAMAINDEX, language="python") 443 | 444 | st.markdown( 445 | """ 446 | 447 | ## References 448 | 449 | - [Langchain: NebulaGraphQAChain](https://python.langchain.com/docs/modules/chains/additional/graph_nebula_qa) 450 | - [Llama Index: KnowledgeGraphQueryEngine](https://gpt-index.readthedocs.io/en/latest/examples/query_engine/knowledge_graph_query_engine.html) 451 | """ 452 | ) 453 | -------------------------------------------------------------------------------- /kg_retrieval_arguments_generation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.stdout.reconfigure(encoding="utf-8") 4 | sys.stdin.reconfigure(encoding="utf-8") 5 | 6 | import streamlit as st 7 | import streamlit.components.v1 as components 8 | 9 | import re 10 | 11 | import random 12 | 13 | CODE_KG_RAG = """ 14 | 15 | # Build Knowledge Graph with KnowledgeGraphIndex 16 | 17 | kg_index = KnowledgeGraphIndex.from_documents( 18 | documents, 19 | storage_context=storage_context, 20 | max_triplets_per_chunk=10, 21 | service_context=service_context, 22 | space_name=space_name, 23 | edge_types=edge_types, 24 | rel_prop_names=rel_prop_names, 25 | tags=tags, 26 | include_embeddings=True, 27 | ) 28 | 29 | # Create a Graph RAG Query Engine 30 | 31 | kg_rag_query_engine = kg_index.as_query_engine( 32 | include_text=False, 33 | retriever_mode="keyword", 34 | response_mode="tree_summarize", 35 | ) 36 | 37 | """ 38 | 39 | 40 | import os 41 | import json 42 | import openai 43 | from llama_index.llms import AzureOpenAI 44 | from langchain.embeddings import OpenAIEmbeddings 45 | from llama_index import LangchainEmbedding 46 | from llama_index import ( 47 | VectorStoreIndex, 48 | SimpleDirectoryReader, 49 | KnowledgeGraphIndex, 50 | LLMPredictor, 51 | ServiceContext, 52 | ) 53 | 54 | from llama_index.storage.storage_context import StorageContext 55 | from llama_index.graph_stores import NebulaGraphStore 56 | 57 | import logging 58 | import sys 59 | 60 | logging.basicConfig( 61 | stream=sys.stdout, level=logging.INFO 62 | ) # logging.DEBUG for more verbose output 63 | # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) 64 | 65 | openai.api_type = "azure" 66 | openai.api_base = st.secrets["OPENAI_API_BASE"] 67 | # openai.api_version = "2022-12-01" azure gpt-3 68 | openai.api_version = "2023-05-15" # azure gpt-3.5 turbo 69 | openai.api_key = st.secrets["OPENAI_API_KEY"] 70 | 71 | llm = AzureOpenAI( 72 | engine=st.secrets["DEPLOYMENT_NAME"], 73 | temperature=0, 74 | model="gpt-35-turbo", 75 | ) 76 | llm_predictor = LLMPredictor(llm=llm) 77 | 78 | # You need to deploy your own embedding model as well as your own chat completion model 79 | embedding_llm = LangchainEmbedding( 80 | OpenAIEmbeddings( 81 | model="text-embedding-ada-002", 82 | deployment=st.secrets["EMBEDDING_DEPLOYMENT_NAME"], 83 | openai_api_key=openai.api_key, 84 | openai_api_base=openai.api_base, 85 | openai_api_type=openai.api_type, 86 | openai_api_version="2022-12-01", 87 | ), 88 | embed_batch_size=1, 89 | ) 90 | 91 | service_context = ServiceContext.from_defaults( 92 | llm_predictor=llm_predictor, 93 | embed_model=embedding_llm, 94 | ) 95 | os.environ["NEBULA_USER"] = st.secrets["graphd_user"] 96 | os.environ["NEBULA_PASSWORD"] = st.secrets["graphd_password"] 97 | os.environ[ 98 | "NEBULA_ADDRESS" 99 | ] = f"{st.secrets['graphd_host']}:{st.secrets['graphd_port']}" 100 | 101 | space_name = "guardians" 102 | edge_types, rel_prop_names = ["relationship"], [ 103 | "relationship" 104 | ] # default, could be omit if create from an empty kg 105 | tags = ["entity"] # default, could be omit if create from an empty kg 106 | 107 | graph_store = NebulaGraphStore( 108 | space_name=space_name, 109 | edge_types=edge_types, 110 | rel_prop_names=rel_prop_names, 111 | tags=tags, 112 | ) 113 | 114 | from llama_index import load_index_from_storage 115 | 116 | storage_context = StorageContext.from_defaults( 117 | persist_dir="./storage_graph", graph_store=graph_store 118 | ) 119 | kg_index = load_index_from_storage( 120 | storage_context=storage_context, 121 | service_context=service_context, 122 | max_triplets_per_chunk=10, 123 | space_name=space_name, 124 | edge_types=edge_types, 125 | rel_prop_names=rel_prop_names, 126 | tags=tags, 127 | include_embeddings=True, 128 | ) 129 | 130 | storage_context_vector = StorageContext.from_defaults(persist_dir="./storage_vector") 131 | vector_index = load_index_from_storage( 132 | service_context=service_context, storage_context=storage_context_vector 133 | ) 134 | 135 | from llama_index.query_engine import KnowledgeGraphQueryEngine 136 | 137 | from llama_index.storage.storage_context import StorageContext 138 | from llama_index.graph_stores import NebulaGraphStore 139 | 140 | nl2kg_query_engine = KnowledgeGraphQueryEngine( 141 | storage_context=storage_context, 142 | service_context=service_context, 143 | llm=llm, 144 | verbose=True, 145 | ) 146 | 147 | kg_rag_query_engine = kg_index.as_query_engine( 148 | include_text=False, 149 | retriever_mode="keyword", 150 | response_mode="tree_summarize", 151 | ) 152 | 153 | vector_rag_query_engine = vector_index.as_query_engine() 154 | 155 | # graph + vector rag 156 | # import QueryBundle 157 | from llama_index import QueryBundle 158 | 159 | # import NodeWithScore 160 | from llama_index.schema import NodeWithScore 161 | 162 | # Retrievers 163 | from llama_index.retrievers import BaseRetriever, VectorIndexRetriever, KGTableRetriever 164 | 165 | from typing import List 166 | 167 | 168 | class CustomRetriever(BaseRetriever): 169 | """Custom retriever that performs both Vector search and Knowledge Graph search""" 170 | 171 | def __init__( 172 | self, 173 | vector_retriever: VectorIndexRetriever, 174 | kg_retriever: KGTableRetriever, 175 | mode: str = "OR", 176 | ) -> None: 177 | """Init params.""" 178 | 179 | self._vector_retriever = vector_retriever 180 | self._kg_retriever = kg_retriever 181 | if mode not in ("AND", "OR"): 182 | raise ValueError("Invalid mode.") 183 | self._mode = mode 184 | 185 | def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: 186 | """Retrieve nodes given query.""" 187 | 188 | vector_nodes = self._vector_retriever.retrieve(query_bundle) 189 | kg_nodes = self._kg_retriever.retrieve(query_bundle) 190 | 191 | vector_ids = {n.node.node_id for n in vector_nodes} 192 | kg_ids = {n.node.node_id for n in kg_nodes} 193 | 194 | combined_dict = {n.node.node_id: n for n in vector_nodes} 195 | combined_dict.update({n.node.node_id: n for n in kg_nodes}) 196 | 197 | if self._mode == "AND": 198 | retrieve_ids = vector_ids.intersection(kg_ids) 199 | else: 200 | retrieve_ids = vector_ids.union(kg_ids) 201 | 202 | retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids] 203 | return retrieve_nodes 204 | 205 | 206 | from llama_index import get_response_synthesizer 207 | from llama_index.query_engine import RetrieverQueryEngine 208 | 209 | # create custom retriever 210 | vector_retriever = VectorIndexRetriever(index=vector_index) 211 | kg_retriever = KGTableRetriever( 212 | index=kg_index, retriever_mode="keyword", include_text=False 213 | ) 214 | custom_retriever = CustomRetriever(vector_retriever, kg_retriever) 215 | 216 | # create response synthesizer 217 | response_synthesizer = get_response_synthesizer( 218 | service_context=service_context, 219 | response_mode="tree_summarize", 220 | ) 221 | 222 | graph_vector_rag_query_engine = RetrieverQueryEngine( 223 | retriever=custom_retriever, 224 | response_synthesizer=response_synthesizer, 225 | ) 226 | 227 | 228 | def cypher_to_all_paths(query): 229 | # Find the MATCH and RETURN parts 230 | match_parts = re.findall(r"(MATCH .+?(?=MATCH|$))", query, re.I | re.S) 231 | return_part = re.search(r"RETURN .+", query).group() 232 | 233 | modified_matches = [] 234 | path_ids = [] 235 | 236 | # Go through each MATCH part 237 | for i, part in enumerate(match_parts): 238 | path_id = f"path_{i}" 239 | path_ids.append(path_id) 240 | 241 | # Replace the MATCH keyword with "MATCH path_i = " 242 | modified_part = part.replace("MATCH ", f"MATCH {path_id} = ") 243 | modified_matches.append(modified_part) 244 | 245 | # Join the modified MATCH parts 246 | matches_string = " ".join(modified_matches) 247 | 248 | # Construct the new RETURN part 249 | return_string = f"RETURN {', '.join(path_ids)};" 250 | 251 | # Remove the old RETURN part from matches_string 252 | matches_string = matches_string.replace(return_part, "") 253 | 254 | # Combine everything 255 | modified_query = f"{matches_string}\n{return_string}" 256 | 257 | return modified_query 258 | 259 | 260 | # write string to file 261 | def result_to_df(result): 262 | from typing import Dict 263 | 264 | import pandas as pd 265 | 266 | columns = result.keys() 267 | d: Dict[str, list] = {} 268 | for col_num in range(result.col_size()): 269 | col_name = columns[col_num] 270 | col_list = result.column_values(col_name) 271 | d[col_name] = [x.cast() for x in col_list] 272 | return pd.DataFrame(d) 273 | 274 | 275 | def render_pd_item(g, item): 276 | from nebula3.data.DataObject import Node, PathWrapper, Relationship 277 | 278 | if isinstance(item, Node): 279 | node_id = item.get_id().cast() 280 | tags = item.tags() # list of strings 281 | props = dict() 282 | for tag in tags: 283 | props.update(item.properties(tag)) 284 | g.add_node(node_id, label=node_id, title=str(props)) 285 | elif isinstance(item, Relationship): 286 | src_id = item.start_vertex_id().cast() 287 | dst_id = item.end_vertex_id().cast() 288 | edge_name = item.edge_name() 289 | props = item.properties() 290 | # ensure start and end vertex exist in graph 291 | if not src_id in g.node_ids: 292 | g.add_node(src_id) 293 | if not dst_id in g.node_ids: 294 | g.add_node(dst_id) 295 | g.add_edge(src_id, dst_id, label=edge_name, title=str(props)) 296 | elif isinstance(item, PathWrapper): 297 | for node in item.nodes(): 298 | render_pd_item(g, node) 299 | for edge in item.relationships(): 300 | render_pd_item(g, edge) 301 | elif isinstance(item, list): 302 | for it in item: 303 | render_pd_item(g, it) 304 | 305 | 306 | def create_pyvis_graph(result_df): 307 | from pyvis.network import Network 308 | 309 | g = Network( 310 | notebook=True, 311 | directed=True, 312 | cdn_resources="in_line", 313 | height="500px", 314 | width="100%", 315 | ) 316 | for _, row in result_df.iterrows(): 317 | for item in row: 318 | render_pd_item(g, item) 319 | g.repulsion( 320 | node_distance=100, 321 | central_gravity=0.2, 322 | spring_length=200, 323 | spring_strength=0.05, 324 | damping=0.09, 325 | ) 326 | return g 327 | 328 | 329 | def query_nebulagraph( 330 | query, 331 | space_name=space_name, 332 | address=st.secrets["graphd_host"], 333 | port=9669, 334 | user=st.secrets["graphd_user"], 335 | password=st.secrets["graphd_password"], 336 | ): 337 | from nebula3.Config import SessionPoolConfig 338 | from nebula3.gclient.net.SessionPool import SessionPool 339 | 340 | config = SessionPoolConfig() 341 | session_pool = SessionPool(user, password, space_name, [(address, port)]) 342 | session_pool.init(config) 343 | return session_pool.execute(query) 344 | 345 | 346 | st.title("Graph RAG vs RAG vs NL2Cypher") 347 | 348 | ( 349 | tab_code_rag, 350 | tab_notebook, 351 | tab_NL2Cypher_vs_GraphRAG, 352 | tab_Vector_vs_Graph_Vector, 353 | ) = st.tabs( 354 | [ 355 | "Code: Graph RAG", 356 | "Full Notebook", 357 | "Demo: NL2Cypher vs Graph RAG", 358 | "Demo: Vector vs Graph + Vector", 359 | ] 360 | ) 361 | 362 | 363 | with tab_code_rag: 364 | st.write( 365 | "To Create LLM Apps, we could leverage Knowledge Graph in different approaches: **NL2Cypher**, **Graph RAG** and **Graph + Vector RAG**, this Notebook demonstrates the know-how and the comparison between the different approaches." 366 | ) 367 | st.write( 368 | "See full notebook for more details and try different approaches online demo on corresponding tabs." 369 | ) 370 | st.code(body=CODE_KG_RAG, language="python") 371 | 372 | with tab_notebook: 373 | st.write("> Full Notebook") 374 | st.markdown( 375 | """ 376 | 377 | This is the full notebook to demonstrate how to: 378 | 379 | - Extract from data sources and build a knowledge graph with LLM and Llama Index, NebulaGraph in 3 lines of code 380 | - QA with NL2Cypher, 3 lines of code 381 | - QA with Graph RAG, 3 lines of code 382 | - QA with Graph + Vector RAG 383 | - Compare the performance of different approaches 384 | """ 385 | ) 386 | # link to download notebook 387 | st.markdown( 388 | """ 389 | [Download](https://www.siwei.io/demo-dumps/graph-rag/GraphRAG.ipynb) the notebook. 390 | """ 391 | ) 392 | 393 | components.iframe( 394 | src="https://www.siwei.io/demo-dumps/graph-rag/GraphRAG.html", 395 | height=2000, 396 | width=1000, 397 | scrolling=True, 398 | ) 399 | 400 | 401 | with tab_NL2Cypher_vs_GraphRAG: 402 | st.write("> NL2Cypher vs Graph RAG") 403 | 404 | query_string = st.text_input( 405 | label="Enter natural language query string", value="Tell me about Peter Quill?" 406 | ) 407 | col_NL2Cypher, col_GraphRAG = st.columns(2) 408 | if st.button("Generate Answer with NL2Cypher and Graph RAG"): 409 | response_NL2Cypher = nl2kg_query_engine.query(query_string) 410 | response_GraphRAG = kg_rag_query_engine.query(query_string) 411 | with col_NL2Cypher: 412 | response = response_NL2Cypher 413 | graph_query = list(response.metadata.values())[0]["graph_store_query"] 414 | graph_query = graph_query.replace("WHERE", "\n WHERE").replace( 415 | "RETURN", "\nRETURN" 416 | ) 417 | answer_NL2Cypher = str(response) 418 | st.markdown( 419 | f""" 420 | > Query used 421 | 422 | ```cypher 423 | {graph_query} 424 | ``` 425 | """ 426 | ) 427 | st.write("#### Rendered Graph") 428 | render_query = cypher_to_all_paths(graph_query) 429 | result = query_nebulagraph(render_query) 430 | result_df = result_to_df(result) 431 | 432 | # create pyvis graph 433 | g = create_pyvis_graph(result_df) 434 | 435 | # render with random file name 436 | graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html") 437 | 438 | components.html(graph_html, height=500, scrolling=True) 439 | 440 | st.write(f"*Answer*: {answer_NL2Cypher}") 441 | 442 | with col_GraphRAG: 443 | response = response_GraphRAG 444 | answer_GraphRAG = str(response) 445 | 446 | related_entities = list( 447 | list(response.metadata.values())[0]["kg_rel_map"].keys() 448 | ) 449 | render_query = f"MATCH p=(n)-[*1..2]-() \n WHERE id(n) IN {related_entities} \nRETURN p" 450 | 451 | st.markdown( 452 | f""" 453 | > RAG Subgraph Query(depth=2) 454 | 455 | ```cypher 456 | {render_query} 457 | ``` 458 | """ 459 | ) 460 | st.write("#### Rendered Graph") 461 | result = query_nebulagraph(render_query) 462 | result_df = result_to_df(result) 463 | 464 | # create pyvis graph 465 | g = create_pyvis_graph(result_df) 466 | 467 | # render with random file name 468 | graph_html = g.generate_html(f"graph_{random.randint(0, 1000)}.html") 469 | 470 | components.html(graph_html, height=500, scrolling=True) 471 | 472 | st.write(f"*Answer*: {answer_GraphRAG}") 473 | st.write("## Compare the two QA result") 474 | result = llm.complete( 475 | f""" 476 | Compare the two QA result on "{query_string}", list the differences between them, to help evalute them. Output in markdown table. 477 | 478 | Result from NL2Cypher: {str(response_NL2Cypher)} 479 | --- 480 | Result from Graph RAG: {str(response_GraphRAG)} 481 | """ 482 | ) 483 | st.markdown(result.text) 484 | 485 | with tab_Vector_vs_Graph_Vector: 486 | st.write("> Vector RAG vs Graph + Vector RAG") 487 | query_string = st.text_input( 488 | label="Type the question to answer", value="Tell me about Rocket?" 489 | ) 490 | col_VectorRAG, col_GraphVectorRAG = st.columns(2) 491 | if st.button("Generate Answer with Vector and Graph + Vector"): 492 | response_VectorRAG = vector_rag_query_engine.query(query_string) 493 | response_GraphVectorRAG = graph_vector_rag_query_engine.query(query_string) 494 | with col_VectorRAG: 495 | response = response_VectorRAG 496 | answer_VectorRAG = str(response) 497 | st.write(f"*Answer*: {answer_VectorRAG}") 498 | 499 | with col_GraphVectorRAG: 500 | response = response_GraphVectorRAG 501 | answer_GraphVectorRAG = str(response) 502 | st.write(f"*Answer*: {answer_GraphVectorRAG}") 503 | 504 | st.write("## Compare the two QA result") 505 | st.markdown( 506 | llm.complete( 507 | f""" 508 | Compare the two QA result on "{query_string}", list the differences between them, to help evalute them. Output in markdown table. 509 | 510 | Result from Vector RAG: {str(response_VectorRAG)} 511 | --- 512 | Result from Graph+Vector RAG: {str(response_GraphVectorRAG)} 513 | """ 514 | ).text 515 | ) 516 | -------------------------------------------------------------------------------- /notebooks/KG_Building.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "2816dd42", 6 | "metadata": {}, 7 | "source": [ 8 | "## Knowledge Graph Building with LLM\n", 9 | "\n", 10 | "```\n", 11 | " ┌─────────────────────────┐\n", 12 | " │ │\n", 13 | " │ Knowledge Graph │\n", 14 | " │ on NebulaGraph │\n", 15 | " │ │\n", 16 | " │ .───. │\n", 17 | " │ ┌──▶( ) │\n", 18 | " │ │ `───' │\n", 19 | "┌────────────────────┐ │ .───. │ │\n", 20 | "│ Data Sources │ │ ( )───┘ │\n", 21 | "│ │ Extract │ `───' │\n", 22 | "│ Database, Wikepedia│━━With LLM━━━▶ │ .───. │\n", 23 | "│ CSV, JSON Files │ │ └───────▶( ) │\n", 24 | "│ Web APIs... │ │ `───' │\n", 25 | "└────────────────────┘ │ ▲ │\n", 26 | " │ │ │\n", 27 | " │ │ .───. │\n", 28 | " │ └─( )│\n", 29 | " │ `───' │\n", 30 | " │ │\n", 31 | " │ │\n", 32 | " └─────────────────────────┘\n", 33 | "```" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "id": "4e900489", 39 | "metadata": {}, 40 | "source": [ 41 | "# 1. Preparation\n", 42 | "\n", 43 | "## 1.1 Prepare for LLM" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "id": "895f797a", 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# Only For OpenAI\n", 54 | "\n", 55 | "import os\n", 56 | "\n", 57 | "os.environ[\"OPENAI_API_KEY\"] = \"INSERT OPENAI KEY\"\n", 58 | "\n", 59 | "import logging\n", 60 | "import sys\n", 61 | "\n", 62 | "logging.basicConfig(\n", 63 | " stream=sys.stdout, level=logging.INFO\n", 64 | ") # logging.DEBUG for more verbose output\n", 65 | "\n", 66 | "from llama_index import (\n", 67 | " KnowledgeGraphIndex,\n", 68 | " LLMPredictor,\n", 69 | " ServiceContext,\n", 70 | " SimpleDirectoryReader,\n", 71 | ")\n", 72 | "from llama_index.storage.storage_context import StorageContext\n", 73 | "from llama_index.graph_stores import NebulaGraphStore\n", 74 | "\n", 75 | "\n", 76 | "from langchain import OpenAI\n", 77 | "from IPython.display import Markdown, display\n", 78 | "\n", 79 | "\n", 80 | "# define LLM\n", 81 | "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-002\"))\n", 82 | "service_context = ServiceContext.from_defaults(\n", 83 | " llm_predictor=llm_predictor, chunk_size_limit=512\n", 84 | ")" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "id": "f9b21fcc", 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "# Only For Azure OpenAI\n", 95 | "\n", 96 | "import os\n", 97 | "import json\n", 98 | "import openai\n", 99 | "from langchain.llms import AzureOpenAI\n", 100 | "from langchain.embeddings import OpenAIEmbeddings\n", 101 | "from llama_index import LangchainEmbedding\n", 102 | "from llama_index import (\n", 103 | " VectorStoreIndex,\n", 104 | " SimpleDirectoryReader,\n", 105 | " KnowledgeGraphIndex,\n", 106 | " LLMPredictor,\n", 107 | " ServiceContext,\n", 108 | ")\n", 109 | "\n", 110 | "from llama_index.storage.storage_context import StorageContext\n", 111 | "from llama_index.graph_stores import NebulaGraphStore\n", 112 | "\n", 113 | "import logging\n", 114 | "import sys\n", 115 | "\n", 116 | "from IPython.display import Markdown, display\n", 117 | "\n", 118 | "logging.basicConfig(\n", 119 | " stream=sys.stdout, level=logging.INFO\n", 120 | ") # logging.DEBUG for more verbose output\n", 121 | "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n", 122 | "\n", 123 | "openai.api_type = \"azure\"\n", 124 | "openai.api_base = \"INSERT AZURE API BASE\"\n", 125 | "openai.api_version = \"2022-12-01\"\n", 126 | "os.environ[\"OPENAI_API_KEY\"] = \"INSERT OPENAI KEY\"\n", 127 | "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n", 128 | "\n", 129 | "# define LLM\n", 130 | "llm = AzureOpenAI(\n", 131 | " deployment_name=\"INSERT DEPLOYMENT NAME\",\n", 132 | " temperature=0,\n", 133 | " openai_api_version=openai.api_version,\n", 134 | " model_kwargs={\n", 135 | " \"api_key\": openai.api_key,\n", 136 | " \"api_base\": openai.api_base,\n", 137 | " \"api_type\": openai.api_type,\n", 138 | " \"api_version\": openai.api_version,\n", 139 | " },\n", 140 | ")\n", 141 | "llm_predictor = LLMPredictor(llm=llm)\n", 142 | "\n", 143 | "# You need to deploy your own embedding model as well as your own chat completion model\n", 144 | "embedding_llm = LangchainEmbedding(\n", 145 | " OpenAIEmbeddings(\n", 146 | " model=\"text-embedding-ada-002\",\n", 147 | " deployment=\"INSERT DEPLOYMENT NAME\",\n", 148 | " openai_api_key=openai.api_key,\n", 149 | " openai_api_base=openai.api_base,\n", 150 | " openai_api_type=openai.api_type,\n", 151 | " openai_api_version=openai.api_version,\n", 152 | " ),\n", 153 | " embed_batch_size=1,\n", 154 | ")\n", 155 | "\n", 156 | "service_context = ServiceContext.from_defaults(\n", 157 | " llm_predictor=llm_predictor,\n", 158 | " embed_model=embedding_llm,\n", 159 | ")" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "210dc3d4", 165 | "metadata": {}, 166 | "source": [ 167 | "## 1.2. Prepare for NebulaGraph as Graph Store" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "id": "ddb6beff", 173 | "metadata": {}, 174 | "source": [ 175 | "❗Access NebulaGraph Console to **create space** and **graph schema**\n", 176 | "\n", 177 | "```sql\n", 178 | "CREATE SPACE guardians(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);\n", 179 | ":sleep 10;\n", 180 | "USE guardians;\n", 181 | "CREATE TAG entity(name string);\n", 182 | "CREATE EDGE relationship(relationship string);\n", 183 | ":sleep 10;\n", 184 | "CREATE TAG INDEX entity_index ON entity(name(256));\n", 185 | "```" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "id": "7e9037c5", 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "os.environ[\"NEBULA_USER\"] = \"root\"\n", 196 | "os.environ[\"NEBULA_PASSWORD\"] = \"nebula\" # default password\n", 197 | "os.environ[\n", 198 | " \"NEBULA_ADDRESS\"\n", 199 | "] = \"127.0.0.1:9669\" # assumed we have NebulaGraph installed locally\n", 200 | "\n", 201 | "space_name = \"guardians\"\n", 202 | "edge_types, rel_prop_names = [\"relationship\"], [\n", 203 | " \"relationship\"\n", 204 | "] # default, could be omit if create from an empty kg\n", 205 | "tags = [\"entity\"] # default, could be omit if create from an empty kg\n", 206 | "\n", 207 | "graph_store = NebulaGraphStore(\n", 208 | " space_name=space_name,\n", 209 | " edge_types=edge_types,\n", 210 | " rel_prop_names=rel_prop_names,\n", 211 | " tags=tags,\n", 212 | ")\n", 213 | "storage_context = StorageContext.from_defaults(graph_store=graph_store)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "id": "5f38240b", 219 | "metadata": {}, 220 | "source": [ 221 | "## 2. Build the Knowledge Graph\n", 222 | "\n" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "id": "7af875b5", 228 | "metadata": {}, 229 | "source": [ 230 | "### 2.1 Preprocess Data\n", 231 | "\n", 232 | "We will download and preprecess data from:\n", 233 | " https://en.wikipedia.org/wiki/Guardians_of_the_Galaxy_Vol._3" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "id": "a13b7b67", 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "from llama_index import download_loader\n", 244 | "\n", 245 | "WikipediaReader = download_loader(\"WikipediaReader\")\n", 246 | "\n", 247 | "loader = WikipediaReader()\n", 248 | "\n", 249 | "documents = loader.load_data(\n", 250 | " pages=[\"Guardians of the Galaxy Vol. 3\"], auto_suggest=False\n", 251 | ")" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "id": "1bc16445", 257 | "metadata": {}, 258 | "source": [ 259 | "### 2.2 Extract Triplets and Save to NebulaGraph" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "id": "e45cf6f9", 265 | "metadata": {}, 266 | "source": [ 267 | "We will persist it to disk and NebulaGraph, thus when using it, we don't need to extract again." 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "id": "ac09be97", 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "kg_index = KnowledgeGraphIndex.from_documents(\n", 278 | " documents,\n", 279 | " storage_context=storage_context,\n", 280 | " max_triplets_per_chunk=10,\n", 281 | " service_context=service_context,\n", 282 | " space_name=space_name,\n", 283 | " edge_types=edge_types,\n", 284 | " rel_prop_names=rel_prop_names,\n", 285 | " tags=tags,\n", 286 | " include_embeddings=True,\n", 287 | ")" 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "id": "7d245e9b", 293 | "metadata": {}, 294 | "source": [ 295 | "Let's persist the context from memory to disk" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "id": "36374bbe", 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "kg_index.storage_context.persist(persist_dir=\"./storage_graph\")" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "id": "fbf8fcb2", 311 | "metadata": {}, 312 | "source": [ 313 | "The files are generated:" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "id": "ba90f2cc", 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "total 9120\r\n", 327 | "-rw-r--r--@ 1 weyl staff 66922 Jul 12 20:26 docstore.json\r\n", 328 | "-rw-r--r--@ 1 weyl staff 4594860 Jul 12 20:26 index_store.json\r\n", 329 | "-rw-r--r--@ 1 weyl staff 51 Jul 12 20:26 vector_store.json\r\n" 330 | ] 331 | } 332 | ], 333 | "source": [ 334 | "!ls -l storage_graph" 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "id": "28b6e3c9", 340 | "metadata": {}, 341 | "source": [ 342 | "### 2.3 Inspect the Graph we built\n", 343 | "\n", 344 | "We will leverage NebulaGraph Jupyter Extension, do remember to install it before next step:\n", 345 | "\n", 346 | "```bash\n", 347 | "$ pip install ipython-ngql\n", 348 | "```" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "id": "cc19ed17", 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "%load_ext ngql\n", 359 | "%ngql --address 127.0.0.1 --port 9669 --user root --password nebula\n", 360 | "%ngql USE guardians" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "id": "f00485c2", 366 | "metadata": {}, 367 | "source": [ 368 | "We could query 30 random edges:" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "id": "f7538a65", 375 | "metadata": {}, 376 | "outputs": [ 377 | { 378 | "name": "stdout", 379 | "output_type": "stream", 380 | "text": [ 381 | "INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)\n", 382 | "Get connection to ('127.0.0.1', 9669)\n" 383 | ] 384 | }, 385 | { 386 | "data": { 387 | "text/html": [ 388 | "
\n", 389 | "\n", 402 | "\n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | "
e
0(\"Adam Warlock\")-[:relationship@98688268702526...
1(\"Alan F. Horn\")-[:relationship@-3866030880391...
2(\"Alan F. Horn\")-[:relationship@-3866030880391...
3(\"Bakalova\")-[:relationship@-78310709996010382...
4(\"Bakalova\")-[:relationship@-18287293525973127...
5(\"Bautista\")-[:relationship@262829015229588616...
6(\"Bautista\")-[:relationship@264209192087427643...
7(\"Chris Pratt\")-[:relationship@-53886203992796...
8(\"Christopher Fairbank\")-[:relationship@704429...
9(\"Cooper\")-[:relationship@2642091920874276436{...
10(\"Daniela Melchior\")-[:relationship@5794733688...
11(\"Dave Bautista\")-[:relationship@-538862039927...
12(\"Debicki\")-[:relationship@2682825685616935037...
13(\"Diesel\")-[:relationship@2642091920874276436{...
14(\"Disney\")-[:relationship@-7269035608107002438...
15(\"Disney\")-[:relationship@4594936970614874383{...
16(\"Drax\")-[:relationship@1274897091364343563{re...
17(\"Elizabeth Debicki\")-[:relationship@704429536...
18(\"Gamora\")-[:relationship@2108090488737331578{...
19(\"Gamora\")-[:relationship@4452575226635738814{...
20(\"Gamora\")-[:relationship@7254563908946132317{...
21(\"George MacKay\")-[:relationship@2027380399406...
22(\"Gillan\")-[:relationship@-1827525784919523442...
23(\"Gillan\")-[:relationship@1278621438198917644{...
24(\"Gillan\")-[:relationship@2642091920874276436{...
25(\"Gillan\")-[:relationship@7823655194542812825{...
26(\"Gregg Henry\")-[:relationship@704429536949728...
27(\"Guardians cast\")-[:relationship@-64051353433...
28(\"Guardians of the Galaxy\")-[:relationship@790...
29(\"Guardians of the Galaxy Vol. 3\")-[:relations...
\n", 532 | "
" 533 | ], 534 | "text/plain": [ 535 | " e\n", 536 | "0 (\"Adam Warlock\")-[:relationship@98688268702526...\n", 537 | "1 (\"Alan F. Horn\")-[:relationship@-3866030880391...\n", 538 | "2 (\"Alan F. Horn\")-[:relationship@-3866030880391...\n", 539 | "3 (\"Bakalova\")-[:relationship@-78310709996010382...\n", 540 | "4 (\"Bakalova\")-[:relationship@-18287293525973127...\n", 541 | "5 (\"Bautista\")-[:relationship@262829015229588616...\n", 542 | "6 (\"Bautista\")-[:relationship@264209192087427643...\n", 543 | "7 (\"Chris Pratt\")-[:relationship@-53886203992796...\n", 544 | "8 (\"Christopher Fairbank\")-[:relationship@704429...\n", 545 | "9 (\"Cooper\")-[:relationship@2642091920874276436{...\n", 546 | "10 (\"Daniela Melchior\")-[:relationship@5794733688...\n", 547 | "11 (\"Dave Bautista\")-[:relationship@-538862039927...\n", 548 | "12 (\"Debicki\")-[:relationship@2682825685616935037...\n", 549 | "13 (\"Diesel\")-[:relationship@2642091920874276436{...\n", 550 | "14 (\"Disney\")-[:relationship@-7269035608107002438...\n", 551 | "15 (\"Disney\")-[:relationship@4594936970614874383{...\n", 552 | "16 (\"Drax\")-[:relationship@1274897091364343563{re...\n", 553 | "17 (\"Elizabeth Debicki\")-[:relationship@704429536...\n", 554 | "18 (\"Gamora\")-[:relationship@2108090488737331578{...\n", 555 | "19 (\"Gamora\")-[:relationship@4452575226635738814{...\n", 556 | "20 (\"Gamora\")-[:relationship@7254563908946132317{...\n", 557 | "21 (\"George MacKay\")-[:relationship@2027380399406...\n", 558 | "22 (\"Gillan\")-[:relationship@-1827525784919523442...\n", 559 | "23 (\"Gillan\")-[:relationship@1278621438198917644{...\n", 560 | "24 (\"Gillan\")-[:relationship@2642091920874276436{...\n", 561 | "25 (\"Gillan\")-[:relationship@7823655194542812825{...\n", 562 | "26 (\"Gregg Henry\")-[:relationship@704429536949728...\n", 563 | "27 (\"Guardians cast\")-[:relationship@-64051353433...\n", 564 | "28 (\"Guardians of the Galaxy\")-[:relationship@790...\n", 565 | "29 (\"Guardians of the Galaxy Vol. 3\")-[:relations..." 566 | ] 567 | }, 568 | "execution_count": null, 569 | "metadata": {}, 570 | "output_type": "execute_result" 571 | } 572 | ], 573 | "source": [ 574 | "%ngql MATCH ()-[e]->() RETURN e LIMIT 30" 575 | ] 576 | }, 577 | { 578 | "cell_type": "markdown", 579 | "id": "28977dd6", 580 | "metadata": {}, 581 | "source": [ 582 | "And **draw** it:" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": null, 588 | "id": "97553264", 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "name": "stdout", 593 | "output_type": "stream", 594 | "text": [ 595 | "nebulagraph_draw.html\n" 596 | ] 597 | }, 598 | { 599 | "data": { 600 | "text/html": [ 601 | "\n", 602 | " \n", 610 | " " 611 | ], 612 | "text/plain": [ 613 | "" 614 | ] 615 | }, 616 | "execution_count": null, 617 | "metadata": {}, 618 | "output_type": "execute_result" 619 | } 620 | ], 621 | "source": [ 622 | "%ng_draw" 623 | ] 624 | }, 625 | { 626 | "cell_type": "markdown", 627 | "id": "bae60f9c", 628 | "metadata": {}, 629 | "source": [ 630 | "## NL2Cypher\n", 631 | "\n", 632 | "Now we have a Knowledge Graph built on top of Wikipedia. With NebulaGraph LLM tooling, we could query the KG in Natural language(NL2Cypher).\n", 633 | "\n", 634 | "First, let's use Llma Index:" 635 | ] 636 | }, 637 | { 638 | "cell_type": "code", 639 | "execution_count": null, 640 | "id": "a8f6f8a1", 641 | "metadata": {}, 642 | "outputs": [], 643 | "source": [ 644 | "from llama_index.query_engine import KnowledgeGraphQueryEngine\n", 645 | "\n", 646 | "from llama_index.storage.storage_context import StorageContext\n", 647 | "from llama_index.graph_stores import NebulaGraphStore\n", 648 | "\n", 649 | "nl2kg_query_engine = KnowledgeGraphQueryEngine(\n", 650 | " storage_context=storage_context,\n", 651 | " service_context=service_context,\n", 652 | " llm=llm,\n", 653 | " verbose=True,\n", 654 | ")" 655 | ] 656 | }, 657 | { 658 | "cell_type": "markdown", 659 | "id": "627f4b4d", 660 | "metadata": {}, 661 | "source": [ 662 | "We could see `KnowledgeGraphQueryEngine` could be used to **Generate Graph Query** and do query for us and fianlly LLM could help with the answer synthesis in one go!" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": null, 668 | "id": "97e14a81", 669 | "metadata": {}, 670 | "outputs": [ 671 | { 672 | "name": "stdout", 673 | "output_type": "stream", 674 | "text": [ 675 | "\u001b[33;1m\u001b[1;3mGraph Store Query: MATCH (p:`entity`)-[:relationship]->(e:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN e.`entity`.`name`;\n", 676 | "\u001b[0mINFO:llama_index.query_engine.knowledge_graph_query_engine:Graph Store Query: MATCH (p:`entity`)-[:relationship]->(e:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN e.`entity`.`name`;\n", 677 | "Graph Store Query: MATCH (p:`entity`)-[:relationship]->(e:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN e.`entity`.`name`;\n", 678 | "\u001b[33;1m\u001b[1;3mGraph Store Response: {'e.entity.name': ['Guardians of the Galaxy']}\n", 679 | "\u001b[0mINFO:llama_index.query_engine.knowledge_graph_query_engine:Graph Store Response: {'e.entity.name': ['Guardians of the Galaxy']}\n", 680 | "Graph Store Response: {'e.entity.name': ['Guardians of the Galaxy']}\n", 681 | "\u001b[32;1m\u001b[1;3mFinal Response: \n", 682 | "Peter Quill is a character from the Marvel Comics series Guardians of the Galaxy.\n", 683 | "\u001b[0m" 684 | ] 685 | }, 686 | { 687 | "data": { 688 | "text/markdown": [ 689 | "\n", 690 | "Peter Quill is a character from the Marvel Comics series Guardians of the Galaxy." 691 | ], 692 | "text/plain": [ 693 | "" 694 | ] 695 | }, 696 | "metadata": {}, 697 | "output_type": "display_data" 698 | } 699 | ], 700 | "source": [ 701 | "response = nl2kg_query_engine.query(\n", 702 | " \"Tell me about Peter Quill?\",\n", 703 | ")\n", 704 | "display(Markdown(f\"{response}\"))" 705 | ] 706 | }, 707 | { 708 | "cell_type": "markdown", 709 | "id": "ce948418", 710 | "metadata": {}, 711 | "source": [ 712 | "💡 Apart from the e2e KGQA, we could ask for only NL2Cypher like this with `generate_query`." 713 | ] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": null, 718 | "id": "bb51418c", 719 | "metadata": {}, 720 | "outputs": [ 721 | { 722 | "data": { 723 | "text/markdown": [ 724 | "\n", 725 | "```cypher\n", 726 | "MATCH (p:`entity`)-[:relationship]->(e:`entity`) \n", 727 | " WHERE p.`entity`.`name` == 'Peter Quill' \n", 728 | "RETURN e.`entity`.`name`;\n", 729 | "```\n" 730 | ], 731 | "text/plain": [ 732 | "" 733 | ] 734 | }, 735 | "metadata": {}, 736 | "output_type": "display_data" 737 | } 738 | ], 739 | "source": [ 740 | "graph_query = nl2kg_query_engine.generate_query(\n", 741 | " \"Tell me about Peter Quill?\",\n", 742 | ")\n", 743 | "graph_query = graph_query.replace(\"WHERE\", \"\\n WHERE\").replace(\"RETURN\", \"\\nRETURN\")\n", 744 | "\n", 745 | "display(\n", 746 | " Markdown(\n", 747 | " f\"\"\"\n", 748 | "```cypher\n", 749 | "{graph_query}\n", 750 | "```\n", 751 | "\"\"\"\n", 752 | " )\n", 753 | ")" 754 | ] 755 | }, 756 | { 757 | "cell_type": "markdown", 758 | "id": "ef9565ca", 759 | "metadata": {}, 760 | "source": [ 761 | "Then, of course we could run the query by ourselves with it!" 762 | ] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "execution_count": null, 767 | "id": "265fbfb6", 768 | "metadata": {}, 769 | "outputs": [ 770 | { 771 | "name": "stdout", 772 | "output_type": "stream", 773 | "text": [ 774 | "INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)\n", 775 | "Get connection to ('127.0.0.1', 9669)\n" 776 | ] 777 | }, 778 | { 779 | "data": { 780 | "text/html": [ 781 | "
\n", 782 | "\n", 795 | "\n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | "
e.entity.name
0Guardians of the Galaxy
\n", 809 | "
" 810 | ], 811 | "text/plain": [ 812 | " e.entity.name\n", 813 | "0 Guardians of the Galaxy" 814 | ] 815 | }, 816 | "execution_count": null, 817 | "metadata": {}, 818 | "output_type": "execute_result" 819 | } 820 | ], 821 | "source": [ 822 | "%%ngql\n", 823 | "MATCH (p:`entity`)-[:relationship]->(e:`entity`)\n", 824 | " WHERE p.`entity`.`name` == 'Peter Quill'\n", 825 | "RETURN e.`entity`.`name`;" 826 | ] 827 | }, 828 | { 829 | "cell_type": "markdown", 830 | "id": "8edc77cb", 831 | "metadata": {}, 832 | "source": [ 833 | "Or we changed the return part to whole path, for drawing it!" 834 | ] 835 | }, 836 | { 837 | "cell_type": "code", 838 | "execution_count": null, 839 | "id": "afe1fe35", 840 | "metadata": { 841 | "scrolled": true 842 | }, 843 | "outputs": [ 844 | { 845 | "name": "stdout", 846 | "output_type": "stream", 847 | "text": [ 848 | "INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)\n", 849 | "Get connection to ('127.0.0.1', 9669)\n" 850 | ] 851 | }, 852 | { 853 | "data": { 854 | "text/html": [ 855 | "
\n", 856 | "\n", 869 | "\n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | "
path_0
0(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...
\n", 883 | "
" 884 | ], 885 | "text/plain": [ 886 | " path_0\n", 887 | "0 (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[..." 888 | ] 889 | }, 890 | "execution_count": null, 891 | "metadata": {}, 892 | "output_type": "execute_result" 893 | } 894 | ], 895 | "source": [ 896 | "%%ngql\n", 897 | "MATCH path_0=(p:`entity`)-[:relationship]->(e:`entity`)\n", 898 | " WHERE p.`entity`.`name` == 'Peter Quill'\n", 899 | "RETURN path_0;" 900 | ] 901 | }, 902 | { 903 | "cell_type": "code", 904 | "execution_count": null, 905 | "id": "5de504de", 906 | "metadata": { 907 | "scrolled": false 908 | }, 909 | "outputs": [ 910 | { 911 | "name": "stdout", 912 | "output_type": "stream", 913 | "text": [ 914 | "nebulagraph_draw.html\n" 915 | ] 916 | }, 917 | { 918 | "data": { 919 | "text/html": [ 920 | "\n", 921 | " \n", 929 | " " 930 | ], 931 | "text/plain": [ 932 | "" 933 | ] 934 | }, 935 | "execution_count": null, 936 | "metadata": {}, 937 | "output_type": "execute_result" 938 | } 939 | ], 940 | "source": [ 941 | "%ng_draw" 942 | ] 943 | }, 944 | { 945 | "cell_type": "code", 946 | "execution_count": null, 947 | "id": "d6de8141", 948 | "metadata": {}, 949 | "outputs": [], 950 | "source": [ 951 | "!mv nebulagraph_draw.html nebulagraph_draw_nl2cypher.html" 952 | ] 953 | }, 954 | { 955 | "cell_type": "markdown", 956 | "id": "cd7fe472", 957 | "metadata": {}, 958 | "source": [ 959 | "### NL2Cypher With Langchain\n", 960 | "\n", 961 | "Alternatively, we could do via Langchain **NebulaGraphQAChain**, see [docs](https://python.langchain.com/docs/modules/chains/additional/graph_nebula_qa)" 962 | ] 963 | }, 964 | { 965 | "cell_type": "code", 966 | "execution_count": null, 967 | "id": "d51df174", 968 | "metadata": {}, 969 | "outputs": [], 970 | "source": [ 971 | "from langchain.chat_models import ChatOpenAI\n", 972 | "from langchain.chains import NebulaGraphQAChain\n", 973 | "from langchain.graphs import NebulaGraph\n", 974 | "\n", 975 | "graph = NebulaGraph(\n", 976 | " space=space_name,\n", 977 | " username=\"root\",\n", 978 | " password=\"nebula\",\n", 979 | " address=\"127.0.0.1\",\n", 980 | " port=9669,\n", 981 | " session_pool_size=30,\n", 982 | ")\n", 983 | "\n", 984 | "chain = NebulaGraphQAChain.from_llm(llm, graph=graph, verbose=True)" 985 | ] 986 | }, 987 | { 988 | "cell_type": "code", 989 | "execution_count": null, 990 | "id": "96afe26b", 991 | "metadata": {}, 992 | "outputs": [ 993 | { 994 | "name": "stdout", 995 | "output_type": "stream", 996 | "text": [ 997 | "\n", 998 | "\n", 999 | "\u001b[1m> Entering new chain...\u001b[0m\n", 1000 | "Generated nGQL:\n", 1001 | "\u001b[32;1m\u001b[1;3m\n", 1002 | "\n", 1003 | "MATCH (p:`entity`)-[e:relationship]->(m:`entity`) WHERE p.`entity`.`name` == 'Peter Quill' RETURN p.`entity`.`name`, e.relationship, m.`entity`.`name`;\u001b[0m\n", 1004 | "Full Context:\n", 1005 | "\u001b[32;1m\u001b[1;3m{'p.entity.name': ['Peter Quill'], 'e.relationship': ['is leader of'], 'm.entity.name': ['Guardians of the Galaxy']}\u001b[0m\n", 1006 | "\n", 1007 | "\u001b[1m> Finished chain.\u001b[0m\n" 1008 | ] 1009 | }, 1010 | { 1011 | "data": { 1012 | "text/plain": [ 1013 | "' Peter Quill is the leader of the Guardians of the Galaxy.'" 1014 | ] 1015 | }, 1016 | "execution_count": null, 1017 | "metadata": {}, 1018 | "output_type": "execute_result" 1019 | } 1020 | ], 1021 | "source": [ 1022 | "chain.run(\n", 1023 | " \"Tell me about Peter Quill?\",\n", 1024 | ")" 1025 | ] 1026 | }, 1027 | { 1028 | "cell_type": "markdown", 1029 | "id": "c306e4be", 1030 | "metadata": {}, 1031 | "source": [ 1032 | "## Graph RAG\n", 1033 | "\n", 1034 | "Apart from the NL2Cypher fashion of exploiting KG in QA, especially for complex tasks, we could also do it in the **Retrieval Arguments Generation** way." 1035 | ] 1036 | }, 1037 | { 1038 | "cell_type": "code", 1039 | "execution_count": null, 1040 | "id": "122a4442", 1041 | "metadata": {}, 1042 | "outputs": [ 1043 | { 1044 | "name": "stdout", 1045 | "output_type": "stream", 1046 | "text": [ 1047 | "INFO:llama_index.indices.loading:Loading all indices.\n", 1048 | "Loading all indices.\n" 1049 | ] 1050 | } 1051 | ], 1052 | "source": [ 1053 | "from llama_index import load_index_from_storage\n", 1054 | "\n", 1055 | "storage_context_graph = StorageContext.from_defaults(\n", 1056 | " persist_dir=\"./storage_graph\", graph_store=graph_store\n", 1057 | ")\n", 1058 | "kg_index_new = load_index_from_storage(\n", 1059 | " storage_context=storage_context_graph,\n", 1060 | " service_context=service_context,\n", 1061 | " max_triplets_per_chunk=10,\n", 1062 | " space_name=space_name,\n", 1063 | " edge_types=edge_types,\n", 1064 | " rel_prop_names=rel_prop_names,\n", 1065 | " tags=tags,\n", 1066 | " include_embeddings=True,\n", 1067 | ")" 1068 | ] 1069 | }, 1070 | { 1071 | "cell_type": "code", 1072 | "execution_count": null, 1073 | "id": "0e1bb6fe", 1074 | "metadata": {}, 1075 | "outputs": [], 1076 | "source": [ 1077 | "kg_rag_query_engine = kg_index_new.as_query_engine(\n", 1078 | " include_text=False,\n", 1079 | " retriever_mode=\"keyword\",\n", 1080 | " response_mode=\"tree_summarize\",\n", 1081 | ")" 1082 | ] 1083 | }, 1084 | { 1085 | "cell_type": "code", 1086 | "execution_count": null, 1087 | "id": "100395cb", 1088 | "metadata": {}, 1089 | "outputs": [ 1090 | { 1091 | "name": "stdout", 1092 | "output_type": "stream", 1093 | "text": [ 1094 | "INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me about Peter Quill?\n", 1095 | "> Starting query: Tell me about Peter Quill?\n", 1096 | "INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['biography', 'Peter Quill', 'Peter', 'Quill', 'information']\n", 1097 | "> Query keywords: ['biography', 'Peter Quill', 'Peter', 'Quill', 'information']\n", 1098 | "INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`\n", 1099 | "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'released in', '2014']\n", 1100 | "Peter Quill ['portrays', 'Peter Quill']\n", 1101 | "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'reprised role from', 'Guardians of the Galaxy']\n", 1102 | "Peter Quill ['is leader of', 'Guardians of the Galaxy']\n", 1103 | "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'directed', 'Guardians of the Galaxy']\n", 1104 | "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'wrote', 'Guardians of the Galaxy']\n", 1105 | "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'sequel to', 'Guardians of the Galaxy']\n", 1106 | "Quill ['speaks', ' fuck ']\n", 1107 | "> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`\n", 1108 | "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'released in', '2014']\n", 1109 | "Peter Quill ['portrays', 'Peter Quill']\n", 1110 | "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'reprised role from', 'Guardians of the Galaxy']\n", 1111 | "Peter Quill ['is leader of', 'Guardians of the Galaxy']\n", 1112 | "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'directed', 'Guardians of the Galaxy']\n", 1113 | "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'wrote', 'Guardians of the Galaxy']\n", 1114 | "Peter Quill ['is leader of', 'Guardians of the Galaxy', 'sequel to', 'Guardians of the Galaxy']\n", 1115 | "Quill ['speaks', ' fuck ']\n" 1116 | ] 1117 | }, 1118 | { 1119 | "data": { 1120 | "text/markdown": [ 1121 | "\n", 1122 | "Peter Quill is the leader of the Guardians of the Galaxy, a superhero team released in 2014. He portrays the character of Peter Quill and reprised his role from the Guardians of the Galaxy. He was also the director and writer of the Guardians of the Galaxy and its sequel. Quill is known to speak with profanity." 1123 | ], 1124 | "text/plain": [ 1125 | "" 1126 | ] 1127 | }, 1128 | "metadata": {}, 1129 | "output_type": "display_data" 1130 | } 1131 | ], 1132 | "source": [ 1133 | "response = kg_rag_query_engine.query(\"Tell me about Peter Quill?\")\n", 1134 | "display(Markdown(f\"{response}\"))" 1135 | ] 1136 | }, 1137 | { 1138 | "cell_type": "code", 1139 | "execution_count": null, 1140 | "id": "c453a760", 1141 | "metadata": {}, 1142 | "outputs": [ 1143 | { 1144 | "name": "stdout", 1145 | "output_type": "stream", 1146 | "text": [ 1147 | "INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)\n", 1148 | "Get connection to ('127.0.0.1', 9669)\n" 1149 | ] 1150 | }, 1151 | { 1152 | "data": { 1153 | "text/html": [ 1154 | "
\n", 1155 | "\n", 1168 | "\n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | "
path0
0(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...
1(\"Peter Quill\" :entity{name: \"Peter Quill\"})<-...
2(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...
3(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...
4(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...
5(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...
6(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...
7(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...
8(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...
9(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...
10(\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...
\n", 1222 | "
" 1223 | ], 1224 | "text/plain": [ 1225 | " path0\n", 1226 | "0 (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n", 1227 | "1 (\"Peter Quill\" :entity{name: \"Peter Quill\"})<-...\n", 1228 | "2 (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n", 1229 | "3 (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n", 1230 | "4 (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n", 1231 | "5 (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n", 1232 | "6 (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n", 1233 | "7 (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n", 1234 | "8 (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n", 1235 | "9 (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[...\n", 1236 | "10 (\"Peter Quill\" :entity{name: \"Peter Quill\"})-[..." 1237 | ] 1238 | }, 1239 | "execution_count": null, 1240 | "metadata": {}, 1241 | "output_type": "execute_result" 1242 | } 1243 | ], 1244 | "source": [ 1245 | "%%ngql\n", 1246 | "MATCH path0=(p:`entity`)-[*1..2]-() WHERE p.`entity`.`name` == 'Peter Quill'\n", 1247 | "RETURN path0;" 1248 | ] 1249 | }, 1250 | { 1251 | "cell_type": "code", 1252 | "execution_count": null, 1253 | "id": "8014418d", 1254 | "metadata": { 1255 | "scrolled": false 1256 | }, 1257 | "outputs": [ 1258 | { 1259 | "name": "stdout", 1260 | "output_type": "stream", 1261 | "text": [ 1262 | "nebulagraph_draw.html\n" 1263 | ] 1264 | }, 1265 | { 1266 | "data": { 1267 | "text/html": [ 1268 | "\n", 1269 | " \n", 1277 | " " 1278 | ], 1279 | "text/plain": [ 1280 | "" 1281 | ] 1282 | }, 1283 | "execution_count": null, 1284 | "metadata": {}, 1285 | "output_type": "execute_result" 1286 | } 1287 | ], 1288 | "source": [ 1289 | "%ng_draw" 1290 | ] 1291 | }, 1292 | { 1293 | "cell_type": "code", 1294 | "execution_count": null, 1295 | "id": "04ac7fd8", 1296 | "metadata": {}, 1297 | "outputs": [], 1298 | "source": [ 1299 | "!mv nebulagraph_draw.html nebulagraph_draw_rag.html" 1300 | ] 1301 | } 1302 | ], 1303 | "metadata": { 1304 | "kernelspec": { 1305 | "display_name": "Python 3 (ipykernel)", 1306 | "language": "python", 1307 | "name": "python3" 1308 | }, 1309 | "language_info": { 1310 | "codemirror_mode": { 1311 | "name": "ipython", 1312 | "version": 3 1313 | }, 1314 | "file_extension": ".py", 1315 | "mimetype": "text/x-python", 1316 | "name": "python", 1317 | "nbconvert_exporter": "python", 1318 | "pygments_lexer": "ipython3", 1319 | "version": "3.11.4" 1320 | } 1321 | }, 1322 | "nbformat": 4, 1323 | "nbformat_minor": 5 1324 | } 1325 | -------------------------------------------------------------------------------- /storage_graph/docstore.json: -------------------------------------------------------------------------------- 1 | {"docstore/metadata": {"914e16a9-7a9f-4e06-ae6c-35e1e3296832": {"doc_hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "181df629-dabe-499f-b9e4-5517c1106e22": {"doc_hash": "1c0cfcf1846867db87e14694014a3b84dbacfd2298ec2e05fe9a82e79c7c881f", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69": {"doc_hash": "2de6f537170cfe777c202cb5b48c4e8ded65a0f307dfeaad4c63b5545d5068b1", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "6b4cd20b-0460-4cf6-838a-987a9ead426b": {"doc_hash": "dbed2b759f925de4023bcee94e0ba1cf8298e578a440c312a5788c45ed972083", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "23688f93-fc51-495e-ae70-c4b79a00d153": {"doc_hash": "f3037ba4ac0936be6f7692f84daacfb85cc65e6d80b93ddcaf2cb642aa3aea76", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "212ada4e-157a-4241-b453-30f8e5f8f3e1": {"doc_hash": "97a7ab4238b6213dabf3e04318e0478313f1f66b83155259b57c343f679c83e2", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "71898eef-f64f-4d47-a677-343a227d9524": {"doc_hash": "e48fadeb031fcf69647b29b0577ec12c02f472001d1dfc3853c40b0a29da708c", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "dc262176-f882-4b19-9a83-c9e81d77d3b8": {"doc_hash": "acb70087e4cc902de303a48e0672c1ea09ae704635bbc8a71f602857cf1c5054", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "758c1ea9-bb37-4813-b891-e2f90686393f": {"doc_hash": "eb15f34b0356f23e89ad5d135a76be4ee32646ceeb2c391167cd074d543244c1", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "7e403166-2350-4ff6-a516-3cbb25356f32": {"doc_hash": "405a764ce822658d4e2ceecc26b4b77c75fba0a9827abb24c1ed0a25b7fffcc8", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6": {"doc_hash": "2fa01cbd033355cb0cf08eca484e62114ebf2018ac45c855f3cb0bed6c4d5252", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "66f24e08-79cb-4559-8d91-44ab4270add3": {"doc_hash": "245d161980c9962b654c1051f50b83b9a21ca0fbdaab03df36408ac1e6dc4a5b", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "49d408fd-5bb2-4c41-b807-60daf4b1531a": {"doc_hash": "85975279279946ded84af9db8e61c024842ced9b9bfa5464ed571c6506e193b4", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "fc9299ee-ee60-4abc-8e2f-45295f64a7aa": {"doc_hash": "53ce5fb0c66d0b346ac988da8f3e954fac4af17c985b820c97514e6abf37b37f", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "bdf2433e-f430-4550-a566-bf5624374b70": {"doc_hash": "9b8169681ccd4608c0a72477730c40122550754dacb5932a9af709a15a523133", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "5fcd0003-fce2-49bc-a004-75e7ba06a7ad": {"doc_hash": "077ef8fce032bfceca02aa0332bce50087a91ee6f22b166dc4359c95a14f638c", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "08659ce9-39e2-48ca-816e-33f5af331d37": {"doc_hash": "0f85282e87b66185eddf746374f76e7de35e60c4b2f29050137e6a81cc7877e5", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}, "778fc2f5-0312-4503-85da-8cb294597451": {"doc_hash": "7fd46180ce995de60ba53f3363ce55baa003d9f1dfaff370c3ffc2edaffaeca6", "ref_doc_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832"}}, "docstore/data": {"181df629-dabe-499f-b9e4-5517c1106e22": {"__data__": {"id_": "181df629-dabe-499f-b9e4-5517c1106e22", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "3": {"node_id": "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69", "node_type": null, "metadata": {}, "hash": "2de6f537170cfe777c202cb5b48c4e8ded65a0f307dfeaad4c63b5545d5068b1"}}, "hash": "1c0cfcf1846867db87e14694014a3b84dbacfd2298ec2e05fe9a82e79c7c881f", "text": "The following scientific events occurred or are scheduled to occur in 2023.\n\n\n== Events ==\n\n\n=== January ===\n\n\n=== February ===\n\n\n=== March ===\n\n\n=== April ===\n3 April\nFive employees at the National Hurricane Center publish a tropical cyclone report (TCR) on Hurricane Ian, which officially upgrades the hurricane from a Category 4 to a Category 5 on the Saffir\u2013Simpson scale. The TCR also stated that Hurricane Ian caused, with 90% confidence, $112.9 billion worth of damage to the United States, which made Ian the third-costliest United States hurricane on record as well as the costliest hurricane to strike Florida on record.\nAn unexplained rise of emissions of five chlorofluorocarbons (CFCs), successfully banned by the Montreal Protocol of 1989, is reported. Their climate impact in 2020 is roughly equivalent to that of the CO2e from Denmark in 2018.\nA study affirms and explains why a moderate decrease in body temperature extends lifespan.\n5 April\nThe NOAA reports that greenhouse gases continued to increase rapidly in 2022 and that CO2 levels in the atmosphere are now the highest in 4.3 million years.\nAn umbrella review summarizes scientific results on the extensive health effects of added-sugar foods and makes recommendations such as limiting sugar-sweetened beverages which are \"the largest source of added sugars\" and developing of policy such as advertising restrictions.\n6 April \u2013 A study shows neurons take up glucose (from food) and metabolize it by glycolysis. There was only limited research on how neurons get their energy in the context of links between glucose metabolism and cognition (brain health and performance).\n10 April \u2013 A study expands upon the role of elites' unsustainable consumption in urban water crises. In Cape Town, for example, the wealthiest 14% of the population use half of the city's water, while the poorest 62% use just a quarter.\n11 April \u2013 A study reports that genomic surveillance (GS) shows that a clonal lineage of the wheat blast fungus has spread", "start_char_idx": 0, "end_char_idx": 2006, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69": {"__data__": {"id_": "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "181df629-dabe-499f-b9e4-5517c1106e22", "node_type": null, "metadata": {}, "hash": "1c0cfcf1846867db87e14694014a3b84dbacfd2298ec2e05fe9a82e79c7c881f"}, "3": {"node_id": "6b4cd20b-0460-4cf6-838a-987a9ead426b", "node_type": null, "metadata": {}, "hash": "dbed2b759f925de4023bcee94e0ba1cf8298e578a440c312a5788c45ed972083"}}, "hash": "2de6f537170cfe777c202cb5b48c4e8ded65a0f307dfeaad4c63b5545d5068b1", "text": "(GS) shows that a clonal lineage of the wheat blast fungus has spread worldwide and that there is a need for GS to track and mitigate the potential pandemic threat to the global food supply as it may become fungicide-insensitive.\n13 April\nThe direct imaging of HIP 99770 b, a new exoplanet found 133 light years away, is reported by astronomers.\nA global trend towards more rapid-onset \"flash droughts\" hindering forecasting is reported.\n14 April\nJupiter Icy Moons Explorer (JUICE) is launched by the European Space Agency (ESA) to search for life in the Jovian system, with an expected arrival date of 2031.\nA review reports that a gender-affirming therapy in adolescents \u2013 gender transition for \"rapidly growing numbers\" of gender-dysphoric youth \u2013 is not supported by the evidence, and asks the field to honor principles of evidence-based medicine.\n17 April\nA new technique for improving the resolution of post-mortem MRI brain scans \"by 64 million times\" is reported by researchers, who capture the sharpest ever images of an entire mouse brain.\nA study expands upon the international Earth heat inventory from 2020, which provides a measure of the Earth energy imbalance (EEI) and allows for quantifying how much and where heat has accumulated in the Earth system with comprehensive data. It suggests that the EEI is the \"most fundamental global climate indicator\" to gauge climate change mitigation efforts.\n18 April\nAstronomers conclude that \"... planets in the habitable zones of stars with low metallicity are the best targets to search for complex life on land.\"\nA university reports a study (29 Mar) affirming the high level of economic losses from biological invasions, showing they have risen to the level of economic damage costs from floods or earthquakes, which are also rising.\n19 April\nA bolide is observed over Ukraine and Belarus for about five seconds. It is first observed at an altitude of 98 km above Velyka Dymerka, then passes directly", "start_char_idx": 1950, "end_char_idx": 3911, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "6b4cd20b-0460-4cf6-838a-987a9ead426b": {"__data__": {"id_": "6b4cd20b-0460-4cf6-838a-987a9ead426b", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69", "node_type": null, "metadata": {}, "hash": "2de6f537170cfe777c202cb5b48c4e8ded65a0f307dfeaad4c63b5545d5068b1"}, "3": {"node_id": "23688f93-fc51-495e-ae70-c4b79a00d153", "node_type": null, "metadata": {}, "hash": "f3037ba4ac0936be6f7692f84daacfb85cc65e6d80b93ddcaf2cb642aa3aea76"}}, "hash": "dbed2b759f925de4023bcee94e0ba1cf8298e578a440c312a5788c45ed972083", "text": "an altitude of 98 km above Velyka Dymerka, then passes directly above Kyiv at an altitude of 80 km and continues to the southwest with a speed of 29 km/s. A bright flare occurs at an altitude of 38 km, when the bolide's absolute magnitude reaches approximately \u201318.\nThe likely cause of grey hair is shown to be pigment-making cells losing their ability to mature into melanocytes.\nResearchers show parrots can and enjoy to use a videocalling system.\nA study with mice shows that microplastics pass the blood\u2013brain barrier (BBB), entering and accumulating in the brain, and identifies a key determinant for whether or not they pass the BBB.\n20 April\nA new 29-year record of ice sheet mass in Greenland and Antarctica is published as part of the IMBIE collaboration. It finds that the combined ice loss in these regions has more than tripled since the early 1990s, with 2019 seeing the greatest losses of any year on record. These findings have implications for future sea level rise.\nPaleoneurologists publish the first neuroevolutionary timeline about correlations of changes in the shape of the cerebral cortex and functions, showing \"variability in surface geometry relates to species' ecology and behaviour\" and cognition. It characterizes many of the neuromorphological events in the origin of distinct human intelligence over the past 77 million years.\nA UNICEF report indicates \"public perception of the importance of vaccines for children declined during the COVID-19 pandemic in 52 out of 55 countries studied\" with causal factors including \"growing access to misleading information\". On 26 April, news outlets report that Twitter is warned by EU digital policy-makers after a report indicated its recent policies \"boost\" Russian disinformation-based propaganda. On 17 April, Twitter introduces labels for rationales when tweets are made less visible which previously were semi-censored without any explanation. On 5 April, the first review of interventions against false conspiracy beliefs, with interventions \"that fostered an analytical mindset or taught critical", "start_char_idx": 3916, "end_char_idx": 5990, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "23688f93-fc51-495e-ae70-c4b79a00d153": {"__data__": {"id_": "23688f93-fc51-495e-ae70-c4b79a00d153", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "6b4cd20b-0460-4cf6-838a-987a9ead426b", "node_type": null, "metadata": {}, "hash": "dbed2b759f925de4023bcee94e0ba1cf8298e578a440c312a5788c45ed972083"}, "3": {"node_id": "212ada4e-157a-4241-b453-30f8e5f8f3e1", "node_type": null, "metadata": {}, "hash": "97a7ab4238b6213dabf3e04318e0478313f1f66b83155259b57c343f679c83e2"}}, "hash": "f3037ba4ac0936be6f7692f84daacfb85cc65e6d80b93ddcaf2cb642aa3aea76", "text": "with interventions \"that fostered an analytical mindset or taught critical thinking skills\" being most effective and preventive action being important.\n21 April \u2013 Researchers report the development of neuromorphic AI hardware using nanowires (see also 2020-04-20) physically mimicking the brain's activity in identifying and remembering an image from memory. On 26 April, a university reports on a demonstration (11 Mar) of multisensory motion cue integration by a neuromorphic nerve for robots.24 April\nAstronomers release close-up global images, for the first time, of the Martian moon Deimos that were taken by the Mars Hope orbiter.\nThe first review of issues identified in meta-science of metascience is published, providing an overview of ten \"questionable\" practices (QMPs) in the field such as \"overplaying the role of replication in science\" and preregistration potential.\nA policy study identifies reduction of car travel activity as the most important transportation policy option in reducing GHG emissions to levels comparable to carbon budget levels, with a \"decrease car distance driven and car ownership by over 80% as compared to current levels\" by 2027 being effective in \"edging close to the designated carbon budget\" in their case-study of London and electrification being highly insufficient. On 20 April, an international study indicates that the contemporary domestic policy-proposal of a general speed limit on highways in Germany, the only large country in the world without such, for a quick GHG emissions reduction would also be economically beneficial. It points to a climate change mitigation law (KSG) that mandated emission reductions in this sector that was changed in 2023 so as to remove these obligations.\n25 April\nScientists, based on new evidence, conclude that Rosalind Franklin was a contributor and \"equal player\" in the discovery process of DNA, rather than otherwise, as may have been presented subsequently after the time of the discovery.\nThe first gene silencing approach to Alzheimer's disease is reported, with a drug called BIIB080 used on the microtubule-associated protein tau (MAPT) gene. Patients in a Phase 1 trial were", "start_char_idx": 5979, "end_char_idx": 8150, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "212ada4e-157a-4241-b453-30f8e5f8f3e1": {"__data__": {"id_": "212ada4e-157a-4241-b453-30f8e5f8f3e1", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "23688f93-fc51-495e-ae70-c4b79a00d153", "node_type": null, "metadata": {}, "hash": "f3037ba4ac0936be6f7692f84daacfb85cc65e6d80b93ddcaf2cb642aa3aea76"}, "3": {"node_id": "71898eef-f64f-4d47-a677-343a227d9524", "node_type": null, "metadata": {}, "hash": "e48fadeb031fcf69647b29b0577ec12c02f472001d1dfc3853c40b0a29da708c"}}, "hash": "97a7ab4238b6213dabf3e04318e0478313f1f66b83155259b57c343f679c83e2", "text": "protein tau (MAPT) gene. Patients in a Phase 1 trial were found to have a greater than 50% reduction in levels of harmful tau protein after taking the drug.\n26 April\nAstronomers present an image, for the first time viewed together, of the shadow of the black hole in the center of the Messier 87 galaxy, and its related high-energy jet.\nThe first-ever global assessment of glacier mass loss from satellite radar altimetry is published. It shows that glaciers lost 2,720 gigatonnes of ice, about 2% of their volume, between 2010 and 2020.\n28 April \u2013 Progress in AI software:\nChatGPT is shown to outperform human doctors in responding to online medical questions when measured on quality and empathy by \"a team of licensed health care professionals\", albeit the chatbot may have previously been trained with these reddit question and answers threads.\nFurther LLM developments during what has been called an \"AI boom\" include: local or open source versions of LLaMA which was leaked in March, news outlets report on GPT4-based Auto-GPT that given natural language commands uses the Internet and other tools in attempts to understand and achieve its tasks with unclear or so-far little practicality, a systematic evaluation of answers from four \"generative search engines\" suggests their outputs \"appear informative, but frequently contain unsupported statements and inaccurate citations\", a multi-modal open source tool for understanding and generating speech, a data scientist argues that \"researchers need to collaborate to develop open-source LLMs that are transparent\" and independent, Stability AI launches an open source LLM.\nOn 12 April, researchers demonstrate an 'AI scientist' that can create of models of natural phenomena from knowledge axioms and experimental data, showing the software can rediscover physical laws using logical reasoning and few data points.\nPromising results of therapeutic candidates are reported: a review suggests daily vitamin D3 may reduce cancer mortality by around 12% (31 Mar), review of experimental phototherapies against dementia cognitive", "start_char_idx": 8169, "end_char_idx": 10249, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "71898eef-f64f-4d47-a677-343a227d9524": {"__data__": {"id_": "71898eef-f64f-4d47-a677-343a227d9524", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "212ada4e-157a-4241-b453-30f8e5f8f3e1", "node_type": null, "metadata": {}, "hash": "97a7ab4238b6213dabf3e04318e0478313f1f66b83155259b57c343f679c83e2"}, "3": {"node_id": "dc262176-f882-4b19-9a83-c9e81d77d3b8", "node_type": null, "metadata": {}, "hash": "acb70087e4cc902de303a48e0672c1ea09ae704635bbc8a71f602857cf1c5054"}}, "hash": "e48fadeb031fcf69647b29b0577ec12c02f472001d1dfc3853c40b0a29da708c", "text": "12% (31 Mar), review of experimental phototherapies against dementia cognitive decline (5 Apr), mice-tested L. reuteri-and-tryptophan-diet for checkpoint inhibitor potentiation (6 Apr), doxycycline post-exposure prophylaxis against STIs (6 Apr), an engineered probiotic against alcohol-induced damage (11 Apr), phase 2 trialed AXA1125 against long COVID fatigue (14 Apr), review finds cranberry products useful against UTIs in women (17 Apr), and macaques-tested low-intensity focus ultrasound delivery of AAV into brain regions against brain diseases (19 Apr). Progress in screening: an \u03b1-synuclein SAA (assay) against Parkinson's disease (12 Apr), and exogenously administered bioengineered sensors that amplify urinary cancer biomarkers for detection (24 Apr).\nPromising innovations relating to global challenges are reported: a laser-using drone-based methane plume localization method, approval of the first yeast-based cow-free dairy (Remilk), a Tor browser-equivalent Web browser for privacy-protected browsing when using a VPN (Mullvad browser), a concentrated solar-to-hydrogen device approaching viability, a method for fat tissue cultured meat, flexible organic solar cells on balloons in the 35 km stratosphere.\n\n\n=== May ===\n1 May\nA new brain-reading method for \"semantic decoding\" is demonstrated. The non-invasive system, based on 16 hours of fMRI data per participant and a transformer, is able to translate a person's neural activity into a continuous stream of text.\nNews outlets report the first study (6 Feb) modelling contemporary detectability of human civilization from afar which suggests overall radio-leakage from mobile towers would still be too weak to be detectable with humanity's next-generation radio telescopes from three of Earth's current closest nearby star-systems. Radar systems are not yet included in their model, while radar emissions during \u2013 and possibly since \u2013 the", "start_char_idx": 10228, "end_char_idx": 12137, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "dc262176-f882-4b19-9a83-c9e81d77d3b8": {"__data__": {"id_": "dc262176-f882-4b19-9a83-c9e81d77d3b8", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "71898eef-f64f-4d47-a677-343a227d9524", "node_type": null, "metadata": {}, "hash": "e48fadeb031fcf69647b29b0577ec12c02f472001d1dfc3853c40b0a29da708c"}, "3": {"node_id": "758c1ea9-bb37-4813-b891-e2f90686393f", "node_type": null, "metadata": {}, "hash": "eb15f34b0356f23e89ad5d135a76be4ee32646ceeb2c391167cd074d543244c1"}}, "hash": "acb70087e4cc902de303a48e0672c1ea09ae704635bbc8a71f602857cf1c5054", "text": "not yet included in their model, while radar emissions during \u2013 and possibly since \u2013 the Cold War are thought to be the first most detectable cue by which hypothetical extraterrestrials could detect humanity.\nThe second study, after one from early 2022 with similar results, about EEG data of dying humans finds a surge of gamma waves and increased functional connectivities in two of four patients. It provides data and analysis about the brain process of dying (terminal loss of sentience and life) and near-death experiences.\n2 May\nA new AI algorithm developed by Baidu is shown to boost the antibody response of COVID-19 mRNA vaccines by 128 times.\nA single-molecule valve is demonstrated, a breakthrough in nanoscale control of fluids.\nScientists report economic factors of neurology or mental health and cognition during child development: association of low income with brain structure and hippocampal volume, stronger associations in U.S. states with higher cost of living, and lower associations for stronger social safety nets for low-income-affected people.3 May\nDrug company Eli Lilly reports that donanemab can slow the pace of Alzheimer's disease by 35%, following a Phase 3 study in human patients.\nAstronomers using the Gemini South telescope report the first direct evidence of an exoplanet being swallowed by an ancient Sun-like star, a fate that likely awaits the Earth in five billion years.\n4 May \u2013 Westinghouse Electric's nuclear division announces the AP300, a miniature version of its signature AP1000 nuclear reactor.\n5 May \u2013 The World Health Organization announces that COVID-19 is no longer considered a global health emergency.\n8 May\nThe first infrared image of an asteroid belt outside our Solar System is captured by the James Webb Space Telescope. Three distinct rings of debris are shown to exist around Fomalhaut, a young star 25 light years away.\nAI successfully identifies people at the highest risk for pancreatic cancer up to three years before diagnosis, using solely the patients' medical records.\n10 May\nA rough draft of the", "start_char_idx": 12134, "end_char_idx": 14198, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "758c1ea9-bb37-4813-b891-e2f90686393f": {"__data__": {"id_": "758c1ea9-bb37-4813-b891-e2f90686393f", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "dc262176-f882-4b19-9a83-c9e81d77d3b8", "node_type": null, "metadata": {}, "hash": "acb70087e4cc902de303a48e0672c1ea09ae704635bbc8a71f602857cf1c5054"}, "3": {"node_id": "7e403166-2350-4ff6-a516-3cbb25356f32", "node_type": null, "metadata": {}, "hash": "405a764ce822658d4e2ceecc26b4b77c75fba0a9827abb24c1ed0a25b7fffcc8"}}, "hash": "eb15f34b0356f23e89ad5d135a76be4ee32646ceeb2c391167cd074d543244c1", "text": "using solely the patients' medical records.\n10 May\nA rough draft of the human \"pan-genome\" is presented, consisting of 47 genomes from a cohort of genetically diverse individuals. This aims to improve medical research by building on the earlier Human Genome Project.\nScientists demonstrate with experimental evolution how macroscopic multicellularity could have emerged on Earth.\n11 May \u2013 The discovery of 62 new moons of Saturn is reported, taking its total confirmed number to 145 and overtaking Jupiter.\n15 May\nThe National Institutes of Health begins a Phase 1 trial of an mRNA-based universal influenza vaccine, enrolling 50 volunteers.\nA study shows most extensively the neuro-molecular mechanics of how a fungal parasite affects behavior of insects.\nA study found that, of 70,000 monitored species, some 48% are experiencing population declines from human activity, whereas only 3% have increasing populations.\nBy publishing virome-related results, researchers close a major gap in the acceleratingly accumulating research into centenarians' microbiome characteristics for life extension.\n16 May \u2013 A software tool called Allegro is reported to accurately simulate 44 million atoms, running on the Perlmutter supercomputer.\n17 May\nScientists report, based on genetic studies, a more complicated pathway of human evolution than previously understood. According to the studies, humans evolved from different places and times in Africa, instead of from a single location and period of time.\nThe newly discovered exoplanet LP 791-18 d is theorised to be covered with volcanoes, due to the extreme gravitational pull of a super-Earth in the same system.\nA study proposes school curricula start including useful basic life support, noting that e.g. complemented video lessons could be effective.\n18 May\nAstronomers map the paths of potentially hazardous asteroids for the next 1,000 years. At least 28 asteroids of 1 km diameter or larger are found to have non-zero probabilities of a 'deep encounter' with", "start_char_idx": 14211, "end_char_idx": 16217, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "7e403166-2350-4ff6-a516-3cbb25356f32": {"__data__": {"id_": "7e403166-2350-4ff6-a516-3cbb25356f32", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "758c1ea9-bb37-4813-b891-e2f90686393f", "node_type": null, "metadata": {}, "hash": "eb15f34b0356f23e89ad5d135a76be4ee32646ceeb2c391167cd074d543244c1"}, "3": {"node_id": "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6", "node_type": null, "metadata": {}, "hash": "2fa01cbd033355cb0cf08eca484e62114ebf2018ac45c855f3cb0bed6c4d5252"}}, "hash": "405a764ce822658d4e2ceecc26b4b77c75fba0a9827abb24c1ed0a25b7fffcc8", "text": "are found to have non-zero probabilities of a 'deep encounter' with Earth.\nA study reports that more than 50% of freshwater lakes and reservoirs lost volume since 1992 due to human and climatic drivers.\n19 May\nA policies study review, based on a systematic examination of existing methane policies across sectors, concludes that both only \"about 13% of methane emissions are covered by methane mitigation policies\" and that the effectiveness of these policies \"is far from clear\".\nResearchers propose a methodological approach and quantifications for reparations from fossil fuel producers. Cross-disciplinary researchers propose academics make universities implement, leadingly, the plant-based dietary transition that an increasingly large and confirmed corpus of studies, to which these contributed to, concludes is vital (7 May).\nMetascience-related events\nNature reports China has \"overtaken the United States as the number one ranked country or territory for contributions to research articles published in the Nature Index group of high-quality natural-science journals\", remaining at second place overall. The Nature Index, since 2016, evaluates contribution by the number of articles published in a subgroup of their journals \u2013 other potential or less popular approaches and metrics for quantifications of success or impact can or could produce different rankings or annual tables and conclusions.\n34% of neuroscience papers and 23% of medical papers published in 2020 were probably fabricated or plagiarized, according to a preprint study, stemming from paper mills (9 May).\nA time-use research study (10 May) estimates the costs of manuscript (re)formatting to fit journal guidelines, ~$230 million or ~75 million hours of lost academics' time in 2021. As researchers, usually with little time, usually conduct these tasks themselves and manually and the, largely cosmetic, unstandardized changes are required before, not after, the paper is accepted for publication, the study proposes journals start allowing \"free-format submissions\".\nA study (25 May) highlights a list of problematic persuasive", "start_char_idx": 16222, "end_char_idx": 18331, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6": {"__data__": {"id_": "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "7e403166-2350-4ff6-a516-3cbb25356f32", "node_type": null, "metadata": {}, "hash": "405a764ce822658d4e2ceecc26b4b77c75fba0a9827abb24c1ed0a25b7fffcc8"}, "3": {"node_id": "66f24e08-79cb-4559-8d91-44ab4270add3", "node_type": null, "metadata": {}, "hash": "245d161980c9962b654c1051f50b83b9a21ca0fbdaab03df36408ac1e6dc4a5b"}}, "hash": "2fa01cbd033355cb0cf08eca484e62114ebf2018ac45c855f3cb0bed6c4d5252", "text": "submissions\".\nA study (25 May) highlights a list of problematic persuasive methods in academic articles, such as exaggerating the importance of the work or insufficient contextualization by \"Not citing previous work that decreases the perceived novelty of the current work\".21 May \u2013 IBM announces that it will begin development of a 100,000-qubit quantum computer, the world's largest and most powerful, to be completed by 2033.\n22 May \u2013 A study quantifies \"the human cost of global warming\", showing current policies \"leading to around 2.7\u2009\u00b0C global warming could by 2080\u20132100 leave one-third (22\u201339%)\" of people outside their climate niche (see also 4 May 2020) \u2013 humans' long-time range of mean annual temperatures to which their physiology may have largely adapted to. It indicates meeting the 1.5\u2009\u00b0C goal would decrease the population exposed to unprecedented heat\u2009~5-fold and ties itself to earlier research by initially noting that quantifying the social cost of carbon in monetary terms, as related or economics studies tend to do, may be inadequate.\n23 May\nUsing the Hubble Space Telescope and Gaia spacecraft, an analysis of proper motions of the closest known globular cluster, Messier 4, reveals an excess mass of roughly 800 solar masses in the center. This appears to not be extended, and could thus be the best kinematic evidence for an intermediate-mass black hole (even if an unusually compact cluster of compact objects, white dwarfs, neutron stars or stellar-mass black holes cannot be completely discounted).\nThe first survey study of academics' perception of the topic of \"unidentified aerial phenomena\" \u2013 also called UAP and one type of \"anomalous phenomena\" \u2013 suggests few academics are knowledgeable about the existing academic literature on UAP, such as studies by Knuth, Nolan and Loeb or of The Galileo Project. The majority was not or only slightly aware of the 2021 US Intelligence UFO Report. Results indicate", "start_char_idx": 18324, "end_char_idx": 20263, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "66f24e08-79cb-4559-8d91-44ab4270add3": {"__data__": {"id_": "66f24e08-79cb-4559-8d91-44ab4270add3", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6", "node_type": null, "metadata": {}, "hash": "2fa01cbd033355cb0cf08eca484e62114ebf2018ac45c855f3cb0bed6c4d5252"}, "3": {"node_id": "49d408fd-5bb2-4c41-b807-60daf4b1531a", "node_type": null, "metadata": {}, "hash": "85975279279946ded84af9db8e61c024842ced9b9bfa5464ed571c6506e193b4"}}, "hash": "245d161980c9962b654c1051f50b83b9a21ca0fbdaab03df36408ac1e6dc4a5b", "text": "not or only slightly aware of the 2021 US Intelligence UFO Report. Results indicate \"more academic research about UAP\" is rated by the respondent population (~4% of ~40,000 contacted) most commonly as \"Of Average Importance\", below and before \"Very Important\".\nA study identifies plastic chopping boards as a substantial source of ingested microplastics. Researchers show plastic recycling facilities are a major source of microplastic water pollution (1 May).\nComputational neuroscientists show that people with higher intelligence scores in HCP cognitive tests took more time to solve difficult problems and that their higher synchrony between brain areas allowed for better integration of evidence (or progress) from preceding working memory sub-problem processing. Reducing synchrony in \"avatar\" simulations, that were adjusted and tuned towards personalization, \"led decision-making circuits to quickly jump to conclusions\". Their codified results may be useful for an understanding of cognition to replicate or imitate in bio-inspired computing.\nResearchers report trends in reasons for HPV vaccine hesitancy during 2010\u201320.\n24 May\nScientists show how gene 'FAAH'-related disruption via genetic or epigenome editing can enable pain insensitivity (see also 10 March 2021). Their analyses, mainly about long non-coding RNA 'FAAH-OUT', following from decade-long study of a woman who can't feel pain or anxiety, could also enable novel therapeutic developments against other neurological problems.\nOne of the first empirical studies on what real users are shown during their typical use of popular Web search engines interprets its results to show that choices for unreliable news sources for their queries are driven primarily by users' own choices and less by the engine's algorithms. The Web scientists link their findings to the concept of filter bubbles which emphasizes the role of design- and personalization algorithms. On 2 May, a report accompanied by an open letter concludes that Alphabet Inc, against its voluntary promises, still runs climate misinformation ads. Statements by Elon Musk in 2022 suggest YouTube", "start_char_idx": 20259, "end_char_idx": 22386, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "49d408fd-5bb2-4c41-b807-60daf4b1531a": {"__data__": {"id_": "49d408fd-5bb2-4c41-b807-60daf4b1531a", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "66f24e08-79cb-4559-8d91-44ab4270add3", "node_type": null, "metadata": {}, "hash": "245d161980c9962b654c1051f50b83b9a21ca0fbdaab03df36408ac1e6dc4a5b"}, "3": {"node_id": "fc9299ee-ee60-4abc-8e2f-45295f64a7aa", "node_type": null, "metadata": {}, "hash": "53ce5fb0c66d0b346ac988da8f3e954fac4af17c985b820c97514e6abf37b37f"}}, "hash": "85975279279946ded84af9db8e61c024842ced9b9bfa5464ed571c6506e193b4", "text": "still runs climate misinformation ads. Statements by Elon Musk in 2022 suggest YouTube may also show ethically disputed advertising other than science-related misinformation such as extensively showing \"scam ads\".\n25 May\n5,000 marine species new to science are discovered in the Clipperton Fracture Zone, a proposed deep sea mining hotspot in the Pacific Ocean.\nAI is used to develop an experimental antibiotic called abaucin, which is shown to be effective against A. baumannii.\nEvidence for the existence of a second Kuiper Belt is presented by NASA scientists, which the New Horizons spacecraft could potentially visit during the late 2020s or early 2030s.\nA study reports observational evidence for problematic fast slowdown of the Antarctic bottom water current.\nNeuroengineers demonstrate induction of a torpor-like state in mice via ultrasound stimulation.\n29 May\nA new record high efficiency of 19.3% for organic solar cells is reported.\nMBR Explorer is announced by the United Arab Emirates Space Agency, an uncrewed mission to explore seven asteroids, which includes an attempted surface landing on 269 Justitia in 2034.\nScientists provide details of H5N1 bird flu's fast viral evolution of clade 2.3.4.4b including reassortment after \"explosive geographic expansion in 2021 among wild birds\", with relevance to measures such as existing candidate vaccines.\n31 May\nThe first X-ray of a single atom is reported.\nAn international study, using modelling and literature assessment, codifies, integrates into and quantifies \"safe and just Earth system boundaries\" (ESBs) with the context of Earth system stability and minimization of human harm. They expand upon earlier boundary frameworks by incorporating concepts such as intra- and intergenerational justice, propose that their framework may better enable a quantitative foundation for safeguarding the global commons, and report many of the ESBs are already exceeded.\nHealthcare systems related results are published: large increases in medication prices via sustained decrease in their use can cause poorer disease control (8 May),", "start_char_idx": 22383, "end_char_idx": 24475, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "fc9299ee-ee60-4abc-8e2f-45295f64a7aa": {"__data__": {"id_": "fc9299ee-ee60-4abc-8e2f-45295f64a7aa", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "49d408fd-5bb2-4c41-b807-60daf4b1531a", "node_type": null, "metadata": {}, "hash": "85975279279946ded84af9db8e61c024842ced9b9bfa5464ed571c6506e193b4"}, "3": {"node_id": "bdf2433e-f430-4550-a566-bf5624374b70", "node_type": null, "metadata": {}, "hash": "9b8169681ccd4608c0a72477730c40122550754dacb5932a9af709a15a523133"}}, "hash": "53ce5fb0c66d0b346ac988da8f3e954fac4af17c985b820c97514e6abf37b37f", "text": "decrease in their use can cause poorer disease control (8 May), widespread implementation of the particular Alzheimer's disease therapeutic solution lecanemab may increase annual U.S. Medicare spending by $2.0 to $5.1 billion (11 May), mailed HPV self-collection kits with scheduling assistance can lead to greater uptake of cervical cancer screening (11 May), cost-related medication nonadherence occurs in approximately 1 in 5 older adults in the U.S. in 2022 (18 May), and a QALY-based health economics study evaluates the cost-effectiveness of U.S. population-wide screening for CKD (23 May).\nResearch on potential current public risk sources is published: the common DBP and BPAF appear to have \"substantial impact on the integrity of the sperm nucleus and DNA structure\" in mice via oxidative stress (5 May), a preliminary study contextualizes \"time spent on social media\" as one of the \"least influential factors in adolescent mental health\" (8 May), ubiquitous environmental contaminant TCE appears to be a risk factor for Parkinson's disease (PD) (15 May), various pesticides are identified as potential risk factors for PD (16 May), researchers demonstrate a two zero-day vulnerabilities-based quick low-cost method \u2013 \"BrutePrint\" \u2013 for bypassing common smartphones' fingerprint authentication (18 May), and common sucralose impurity sucralose-6-acetate appears to be DNA damaging with sucralose-sweetened drinks potentially far exceeding the threshold of toxicological concern (29 May).\nPromising innovations relating to global challenges are demonstrated: an open source automated experimentation science platform (BacterAI) for predicting microbial metabolism with little data (4 May), a pesticide alternative against wheat seed loss (22 May), a low-cost smartphone-attachment (BPClip) for blood pressure measurement (29 May), an open source transfer learning-based system", "start_char_idx": 24497, "end_char_idx": 26382, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "bdf2433e-f430-4550-a566-bf5624374b70": {"__data__": {"id_": "bdf2433e-f430-4550-a566-bf5624374b70", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "fc9299ee-ee60-4abc-8e2f-45295f64a7aa", "node_type": null, "metadata": {}, "hash": "53ce5fb0c66d0b346ac988da8f3e954fac4af17c985b820c97514e6abf37b37f"}, "3": {"node_id": "5fcd0003-fce2-49bc-a004-75e7ba06a7ad", "node_type": null, "metadata": {}, "hash": "077ef8fce032bfceca02aa0332bce50087a91ee6f22b166dc4359c95a14f638c"}}, "hash": "9b8169681ccd4608c0a72477730c40122550754dacb5932a9af709a15a523133", "text": "for blood pressure measurement (29 May), an open source transfer learning-based system (Geneformer) for predicting how networks of interconnected human genes control or affect the function of cells (31 May).\nPromising results of therapeutic candidates are reported: phase\u2009I trialed ultrasound BBB-opening device against brain cancer (1 May), phase I trialed personalized mRNA vaccine against pancreatic cancer recurrence (10 May), a novel antibiotic (Streptothricin F) against ABR bacteria (9 May), an e-skin for prosthetic sensing (18 May), two-dose JYNNEOS vaccine against mpox appears ~86% (19 May) or ~66% (18 May) effective, and a xenografted mice-tested pan-KRAS-inhibitor against cancer (31 May).\n\n\n=== June ===\n1 June \u2013 Caltech reports the first successful beaming of solar energy from space down to a receiver on the ground, via the MAPLE instrument on its SSPD-1 spacecraft, launched into orbit in January.\n2 June \u2013 Physicist Lucas Lombriser reports an alternative way of interpreting the available scientific data which suggests that the notion of an expanding universe may be more a \"mirage\" than otherwise.\n5 June\nScientists report evidence that Homo naledi, an extinct species of archaic human discovered in 2013 in South Africa, and living as long as 500,000 years ago, buried their dead, created art in their caves and used fire.\nA 'chef' robot developed at the University of Cambridge is trained to watch and learn from cooking videos, and recreate dishes itself.\n6 June \u2013 A study in Nature finds that the first complete disappearance of Arctic sea ice could occur during the 2030s, a decade earlier than previously forecast.\n8 June\nUS scientists confirm that the next El Ni\u00f1o has begun, likely resulting in higher global temperatures in late 2023 and into 2024.\nTaurine given to a range of animal species is found to boost health and extend lifespan by up to 12%.\n14 June\nResearchers at the University of Cambridge and", "start_char_idx": 26361, "end_char_idx": 28297, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "5fcd0003-fce2-49bc-a004-75e7ba06a7ad": {"__data__": {"id_": "5fcd0003-fce2-49bc-a004-75e7ba06a7ad", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "bdf2433e-f430-4550-a566-bf5624374b70", "node_type": null, "metadata": {}, "hash": "9b8169681ccd4608c0a72477730c40122550754dacb5932a9af709a15a523133"}, "3": {"node_id": "08659ce9-39e2-48ca-816e-33f5af331d37", "node_type": null, "metadata": {}, "hash": "0f85282e87b66185eddf746374f76e7de35e60c4b2f29050137e6a81cc7877e5"}}, "hash": "077ef8fce032bfceca02aa0332bce50087a91ee6f22b166dc4359c95a14f638c", "text": "lifespan by up to 12%.\n14 June\nResearchers at the University of Cambridge and Caltech report the creation of the first synthetic human embryo from stem cells, without the need for sperm or egg cells.\nScientists report evidence that the planet Earth may have formed in just three million years, much faster than the 100 million years thought earlier.\nAstronomers report that the presence of phosphates on Enceladus, moon of the planet Saturn, has been detected, completing the discovery of all the basic chemical ingredients for life on the moon.\nIBM computer scientists report that a quantum computer produced better results for a physics problem than a conventional supercomputer.\nA machine learning model developed at the University of Edinburgh is trained to recognise the key features of chemicals with senolytic activity. It finds three chemicals \u2013 ginkgetin, periplocin and oleandrin \u2013 able to remove senescent cells without damaging healthy cells.\n17 June \u2013 Researchers report that a single gas-stove burner can raise the indoor concentrations of benzene, related to cancer risk, to more than that found in secondhand tobacco smoke.\n21 June \u2013 The first successful transplant of a functional cryopreserved mammalian kidney is reported by the University of Minnesota.\n22 June \u2013 A study in The Lancet predicts that by 2050, the worldwide number of adults with diabetes will more than double, from 529 million to over 1.3 billion. No country is expected to see a decline.\n26 June\nRetatrutide, an experimental drug for obesity, is shown to achieve a more than 24% mean weight reduction in human adults during a Phase 2 trial.\nAstronomers detect, for the first time, methenium, CH3+ (and/or carbon cation, C+), basic ingredients of life as we know it, in interstellar space.\n28 June \u2013 Astronomers report the possible detection of a gravitational wave background (GWB) in the Universe.\n29 June \u2013 Astronomers report using a new technique to detect, for the first time, the release of neutrinos from the galactic plane of the Milky Way galaxy.\n\n\n=== July ===\n1", "start_char_idx": 28307, "end_char_idx": 30366, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "08659ce9-39e2-48ca-816e-33f5af331d37": {"__data__": {"id_": "08659ce9-39e2-48ca-816e-33f5af331d37", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "5fcd0003-fce2-49bc-a004-75e7ba06a7ad", "node_type": null, "metadata": {}, "hash": "077ef8fce032bfceca02aa0332bce50087a91ee6f22b166dc4359c95a14f638c"}, "3": {"node_id": "778fc2f5-0312-4503-85da-8cb294597451", "node_type": null, "metadata": {}, "hash": "7fd46180ce995de60ba53f3363ce55baa003d9f1dfaff370c3ffc2edaffaeca6"}}, "hash": "0f85282e87b66185eddf746374f76e7de35e60c4b2f29050137e6a81cc7877e5", "text": " the galactic plane of the Milky Way galaxy.\n\n\n=== July ===\n1 July \u2013 The ESA space telescope Euclid is launched, beginning a six-year mission to study dark energy and dark matter.\n5 July \u2013 Harvard astronomer Avi Loeb reports the possibility of finding interstellar material.\n10 July\nDynamic shell formation is demonstrated experimentally for the first time at the University of Rochester's Laboratory for Laser Energetics. Researchers claim their technique is a feasible target for mass production of fusion energy.\nThe highest albedo ever measured for an exoplanet is confirmed using data from the CHEOPS space telescope. The ultra-hot Neptune LTT 9779 b is shown to reflect 80% of incoming light from its star (compared to 75% for Venus), due to the high metal content of its clouds.\n11 July \u2013 Berkeley Earth reports that June 2023 was the warmest June since records began in 1850, and broke the previous record by 0.18 \u00b0C. Its temperature dataset suggests that 2023 is now 81% likely to become a new record year for global warming.\n12 July \u2013 Astronomers report considerable success of the James Webb Space Telescope (JWST) after its first year of operations.\n\n\n== Predicted and scheduled events ==\n\nSearch for extraterrestrial intelligence (SETI) and ufology\nFirst major observational campaign of the SETI project COSMIC.\nExpected public release date of the first study by NASA on UAP in mid-2023.\nExpected public first release of results from the international UFO investigation project The Galileo Project led by astronomer Avi Loeb.\nRocket Lab's Venus probe is scheduled to be launched and to arrive on Venus in October, partly to search for signs of life on Venus.\nExpected start of the Vera Rubin Observatory, the Qitai Radio Telescope, the European Spallation Source and the Jiangmen Underground Neutrino Observatory.\nNature has listed 11 clinical trials to watch in 2023. Results of the Participatory Evaluation (of) Aging", "start_char_idx": 30382, "end_char_idx": 32314, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}, "778fc2f5-0312-4503-85da-8cb294597451": {"__data__": {"id_": "778fc2f5-0312-4503-85da-8cb294597451", "embedding": null, "metadata": {}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "914e16a9-7a9f-4e06-ae6c-35e1e3296832", "node_type": null, "metadata": {}, "hash": "42c008af3ead287107e9a1893ca8857436c2040e91be0af9910e6adb5f26ccf6"}, "2": {"node_id": "08659ce9-39e2-48ca-816e-33f5af331d37", "node_type": null, "metadata": {}, "hash": "0f85282e87b66185eddf746374f76e7de35e60c4b2f29050137e6a81cc7877e5"}}, "hash": "7fd46180ce995de60ba53f3363ce55baa003d9f1dfaff370c3ffc2edaffaeca6", "text": "to watch in 2023. Results of the Participatory Evaluation (of) Aging (With) Rapamycin (for) Longevity Study (PEARL) clinical trial investigating a life extension intervention are expected to be released.\nScience-related budgets\n US: several fields, research topics and agencies are provided with increased budgets, including the new Advanced Research Projects Agency for Health (ARPA-H). Various changes to the budgets of US institutions like NASA, FDA, EPA and NIH have been described.\n UK: \n\n\n=== Astronomical events ===\n\n\n== Awards ==\n\n\n== Deaths ==\n\n\n== See also ==\n\nCategory:Science events\nCategory:Science timelines\nList of emerging technologies\nList of years in science\n\n\n== References ==\n\n\n== External links ==\n Media related to 2023 in science at Wikimedia Commons", "start_char_idx": 32297, "end_char_idx": 33070, "text_template": "{metadata_str}\n\n{content}", "metadata_template": "{key}: {value}", "metadata_seperator": "\n"}, "__type__": "1"}}, "docstore/ref_doc_info": {"914e16a9-7a9f-4e06-ae6c-35e1e3296832": {"node_ids": ["181df629-dabe-499f-b9e4-5517c1106e22", "a3d7cd8e-91b5-4bb0-aacb-91363eed7f69", "6b4cd20b-0460-4cf6-838a-987a9ead426b", "23688f93-fc51-495e-ae70-c4b79a00d153", "212ada4e-157a-4241-b453-30f8e5f8f3e1", "71898eef-f64f-4d47-a677-343a227d9524", "dc262176-f882-4b19-9a83-c9e81d77d3b8", "758c1ea9-bb37-4813-b891-e2f90686393f", "7e403166-2350-4ff6-a516-3cbb25356f32", "ea31ceb6-cbf8-44e9-9027-7f73d1c3c9e6", "66f24e08-79cb-4559-8d91-44ab4270add3", "49d408fd-5bb2-4c41-b807-60daf4b1531a", "fc9299ee-ee60-4abc-8e2f-45295f64a7aa", "bdf2433e-f430-4550-a566-bf5624374b70", "5fcd0003-fce2-49bc-a004-75e7ba06a7ad", "08659ce9-39e2-48ca-816e-33f5af331d37", "778fc2f5-0312-4503-85da-8cb294597451"], "metadata": {}}}} --------------------------------------------------------------------------------